/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 411 by ph10, Fri Apr 10 15:40:21 2009 UTC revision 842 by ph10, Sat Dec 31 15:19:04 2011 UTC
# Line 4  Line 4 
4    
5  /* This program was hacked up as a tester for PCRE. I really should have  /* This program was hacked up as a tester for PCRE. I really should have
6  written it more tidily in the first place. Will I ever learn? It has grown and  written it more tidily in the first place. Will I ever learn? It has grown and
7  been extended and consequently is now rather, er, *very* untidy in places.  been extended and consequently is now rather, er, *very* untidy in places. The
8    addition of 16-bit support has made it even worse. :-(
9    
10  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
11  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 35  POSSIBILITY OF SUCH DAMAGE. Line 36  POSSIBILITY OF SUCH DAMAGE.
36  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
37  */  */
38    
39    /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40    libraries in a single program. This is different from the modules such as
41    pcre_compile.c in the library itself, which are compiled separately for each
42    mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43    (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44    compiled only once. Therefore, it must not make use of any of the macros from
45    pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46    however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47    only supported library functions. */
48    
49    
50  #ifdef HAVE_CONFIG_H  #ifdef HAVE_CONFIG_H
51  #include "config.h"  #include "config.h"
# Line 79  input mode under Windows. */ Line 90  input mode under Windows. */
90  #define fileno _fileno  #define fileno _fileno
91  #endif  #endif
92    
93    /* A user sent this fix for Borland Builder 5 under Windows. */
94    
95    #ifdef __BORLANDC__
96    #define _setmode(handle, mode) setmode(handle, mode)
97    #endif
98    
99    /* Not Windows */
100    
101  #else  #else
102  #include <sys/time.h>          /* These two includes are needed */  #include <sys/time.h>          /* These two includes are needed */
103  #include <sys/resource.h>      /* for setrlimit(). */  #include <sys/resource.h>      /* for setrlimit(). */
# Line 97  here before pcre_internal.h so that the Line 116  here before pcre_internal.h so that the
116  appropriately for an application, not for building PCRE. */  appropriately for an application, not for building PCRE. */
117    
118  #include "pcre.h"  #include "pcre.h"
119    
120    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8
121    /* Configure internal macros to 16 bit mode. */
122    #define COMPILE_PCRE16
123    #endif
124    
125  #include "pcre_internal.h"  #include "pcre_internal.h"
126    
127    /* The pcre_printint() function, which prints the internal form of a compiled
128    regex, is held in a separate file so that (a) it can be compiled in either
129    8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
130    when that is compiled in debug mode. */
131    
132    #ifdef SUPPORT_PCRE8
133    void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
134    #endif
135    #ifdef SUPPORT_PCRE16
136    void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
137    #endif
138    
139  /* We need access to some of the data tables that PCRE uses. So as not to have  /* We need access to some of the data tables that PCRE uses. So as not to have
140  to keep two copies, we include the source file here, changing the names of the  to keep two copies, we include the source file here, changing the names of the
141  external symbols to prevent clashes. */  external symbols to prevent clashes. */
142    
143  #define _pcre_ucp_gentype      ucp_gentype  #define PCRE_INCLUDED
144  #define _pcre_utf8_table1      utf8_table1  #undef PRIV
145  #define _pcre_utf8_table1_size utf8_table1_size  #define PRIV(name) name
 #define _pcre_utf8_table2      utf8_table2  
 #define _pcre_utf8_table3      utf8_table3  
 #define _pcre_utf8_table4      utf8_table4  
 #define _pcre_utt              utt  
 #define _pcre_utt_size         utt_size  
 #define _pcre_utt_names        utt_names  
 #define _pcre_OP_lengths       OP_lengths  
146    
147  #include "pcre_tables.c"  #include "pcre_tables.c"
148    
149  /* We also need the pcre_printint() function for printing out compiled  /* The definition of the macro PRINTABLE, which determines whether to print an
 patterns. This function is in a separate file so that it can be included in  
 pcre_compile.c when that module is compiled with debugging enabled.  
   
 The definition of the macro PRINTABLE, which determines whether to print an  
150  output character as-is or as a hex value when showing compiled patterns, is  output character as-is or as a hex value when showing compiled patterns, is
151  contained in this file. We uses it here also, in cases when the locale has not  the same as in the printint.src file. We uses it here in cases when the locale
152  been explicitly changed, so as to get consistent output from systems that  has not been explicitly changed, so as to get consistent output from systems
153  differ in their output from isprint() even in the "C" locale. */  that differ in their output from isprint() even in the "C" locale. */
154    
155  #include "pcre_printint.src"  #ifdef EBCDIC
156    #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
157    #else
158    #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
159    #endif
160    
161  #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))  #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
162    
163    /* Posix support is disabled in 16 bit only mode. */
164    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX
165    #define NOPOSIX
166    #endif
167    
168  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
169  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 139  Makefile. */ Line 173  Makefile. */
173  #include "pcreposix.h"  #include "pcreposix.h"
174  #endif  #endif
175    
176  /* It is also possible, for the benefit of the version currently imported into  /* It is also possible, originally for the benefit of a version that was
177  Exim, to build pcretest without support for UTF8 (define NOUTF8), without the  imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
178  interface to the DFA matcher (NODFA), and without the doublecheck of the old  NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
179  "info" function (define NOINFOCHECK). In fact, we automatically cut out the  automatically cut out the UTF support if PCRE is built without it. */
180  UTF8 support if PCRE is built without it. */  
181    #ifndef SUPPORT_UTF
182  #ifndef SUPPORT_UTF8  #ifndef NOUTF
183  #ifndef NOUTF8  #define NOUTF
 #define NOUTF8  
184  #endif  #endif
185  #endif  #endif
186    
187    /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
188    for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
189    only from one place and is handled differently). I couldn't dream up any way of
190    using a single macro to do this in a generic way, because of the many different
191    argument requirements. We know that at least one of SUPPORT_PCRE8 and
192    SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
193    use these in the definitions of generic macros.
194    
195    **** Special note about the PCHARSxxx macros: the address of the string to be
196    printed is always given as two arguments: a base address followed by an offset.
197    The base address is cast to the correct data size for 8 or 16 bit data; the
198    offset is in units of this size. If the string were given as base+offset in one
199    argument, the casting might be incorrectly applied. */
200    
201    #ifdef SUPPORT_PCRE8
202    
203    #define PCHARS8(lv, p, offset, len, f) \
204      lv = pchars((pcre_uint8 *)(p) + offset, len, f)
205    
206    #define PCHARSV8(p, offset, len, f) \
207      (void)pchars((pcre_uint8 *)(p) + offset, len, f)
208    
209    #define READ_CAPTURE_NAME8(p, cn8, cn16, re) \
210      p = read_capture_name8(p, cn8, re)
211    
212    #define SET_PCRE_CALLOUT8(callout) \
213      pcre_callout = callout
214    
215    #define STRLEN8(p) ((int)strlen((char *)p))
216    
217    
218    #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
219      re = pcre_compile((char *)pat, options, error, erroffset, tables)
220    
221    #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
222        namesptr, cbuffer, size) \
223      rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
224        (char *)namesptr, cbuffer, size)
225    
226    #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
227      rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
228    
229    #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
230        offsets, size_offsets, workspace, size_workspace) \
231      count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
232        offsets, size_offsets, workspace, size_workspace)
233    
234    #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
235        offsets, size_offsets) \
236      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
237        offsets, size_offsets)
238    
239    #define PCRE_FREE_STUDY8(extra) \
240      pcre_free_study(extra)
241    
242    #define PCRE_FREE_SUBSTRING8(substring) \
243      pcre_free_substring(substring)
244    
245    #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
246      pcre_free_substring_list(listptr)
247    
248    #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
249        getnamesptr, subsptr) \
250      rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
251        (char *)getnamesptr, subsptr)
252    
253    #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
254      n = pcre_get_stringnumber(re, (char *)ptr)
255    
256    #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
257      rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
258    
259    #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
260      rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
261    
262    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
263      rc = pcre_pattern_to_host_byte_order(re, extra, tables)
264    
265    #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
266      pcre_printint(re, outfile, debug_lengths)
267    
268    #define PCRE_STUDY8(extra, re, options, error) \
269      extra = pcre_study(re, options, error)
270    
271    #endif /* SUPPORT_PCRE8 */
272    
273    /* -----------------------------------------------------------*/
274    
275    #ifdef SUPPORT_PCRE16
276    
277    #define PCHARS16(lv, p, offset, len, f) \
278      lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
279    
280    #define PCHARSV16(p, offset, len, f) \
281      (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
282    
283    #define READ_CAPTURE_NAME16(p, cn8, cn16, re) \
284      p = read_capture_name16(p, cn16, re)
285    
286    #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
287    
288    #define SET_PCRE_CALLOUT16(callout) \
289      pcre16_callout = callout
290    
291    
292    #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
293      re = pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, tables)
294    
295    #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
296        namesptr, cbuffer, size) \
297      rc = pcre16_copy_named_substring(re, (PCRE_SPTR16)bptr, offsets, count, \
298        (PCRE_SPTR16)namesptr, (PCRE_SCHAR16 *)cbuffer, size/2)
299    
300    #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
301      rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
302        (PCRE_SCHAR16 *)cbuffer, size/2)
303    
304    #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
305        offsets, size_offsets, workspace, size_workspace) \
306      count = pcre16_dfa_exec(re, extra, (PCRE_SPTR16)bptr, len, start_offset, \
307        options, offsets, size_offsets, workspace, size_workspace)
308    
309    #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
310        offsets, size_offsets) \
311      count = pcre16_exec(re, extra, (PCRE_SPTR16)bptr, len, start_offset, \
312        options, offsets, size_offsets)
313    
314    #define PCRE_FREE_STUDY16(extra) \
315      pcre16_free_study(extra)
316    
317    #define PCRE_FREE_SUBSTRING16(substring) \
318      pcre16_free_substring((PCRE_SPTR16)substring)
319    
320    #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
321      pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
322    
323    #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
324        getnamesptr, subsptr) \
325      rc = pcre16_get_named_substring(re, (PCRE_SPTR16)bptr, offsets, count, \
326        (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
327    
328    #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
329      n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
330    
331    #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
332      rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
333        (PCRE_SPTR16 *)(void*)subsptr)
334    
335    #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
336      rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
337        (PCRE_SPTR16 **)(void*)listptr)
338    
339    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
340      rc = pcre16_pattern_to_host_byte_order(re, extra, tables)
341    
342    #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
343      pcre16_printint(re, outfile, debug_lengths)
344    
345    #define PCRE_STUDY16(extra, re, options, error) \
346      extra = pcre16_study(re, options, error)
347    
348    #endif /* SUPPORT_PCRE16 */
349    
350    
351    /* ----- Both modes are supported; a runtime test is needed, except for
352    pcre_config(), and the JIT stack functions, when it doesn't matter which
353    version is called. ----- */
354    
355    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
356    
357    #define CHAR_SIZE (use_pcre16? 2:1)
358    
359    #define PCHARS(lv, p, offset, len, f) \
360      if (use_pcre16) \
361        PCHARS16(lv, p, offset, len, f); \
362      else \
363        PCHARS8(lv, p, offset, len, f)
364    
365    #define PCHARSV(p, offset, len, f) \
366      if (use_pcre16) \
367        PCHARSV16(p, offset, len, f); \
368      else \
369        PCHARSV8(p, offset, len, f)
370    
371    #define READ_CAPTURE_NAME(p, cn8, cn16, re) \
372      if (use_pcre16) \
373        READ_CAPTURE_NAME16(p, cn8, cn16, re); \
374      else \
375        READ_CAPTURE_NAME8(p, cn8, cn16, re)
376    
377    #define SET_PCRE_CALLOUT(callout) \
378      if (use_pcre16) \
379        SET_PCRE_CALLOUT16(callout); \
380      else \
381        SET_PCRE_CALLOUT8(callout)
382    
383    #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
384    
385    #define PCRE_ASSIGN_JIT_STACK pcre_assign_jit_stack
386    
387    #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
388      if (use_pcre16) \
389        PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
390      else \
391        PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
392    
393    #define PCRE_CONFIG pcre_config
394    
395    #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
396        namesptr, cbuffer, size) \
397      if (use_pcre16) \
398        PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
399          namesptr, cbuffer, size); \
400      else \
401        PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
402          namesptr, cbuffer, size)
403    
404    #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
405      if (use_pcre16) \
406        PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
407      else \
408        PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
409    
410    #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
411        offsets, size_offsets, workspace, size_workspace) \
412      if (use_pcre16) \
413        PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
414          offsets, size_offsets, workspace, size_workspace); \
415      else \
416        PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
417          offsets, size_offsets, workspace, size_workspace)
418    
419    #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
420        offsets, size_offsets) \
421      if (use_pcre16) \
422        PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
423          offsets, size_offsets); \
424      else \
425        PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
426          offsets, size_offsets)
427    
428    #define PCRE_FREE_STUDY(extra) \
429      if (use_pcre16) \
430        PCRE_FREE_STUDY16(extra); \
431      else \
432        PCRE_FREE_STUDY8(extra)
433    
434    #define PCRE_FREE_SUBSTRING(substring) \
435      if (use_pcre16) \
436        PCRE_FREE_SUBSTRING16(substring); \
437      else \
438        PCRE_FREE_SUBSTRING8(substring)
439    
440    #define PCRE_FREE_SUBSTRING_LIST(listptr) \
441      if (use_pcre16) \
442        PCRE_FREE_SUBSTRING_LIST16(listptr); \
443      else \
444        PCRE_FREE_SUBSTRING_LIST8(listptr)
445    
446    #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
447        getnamesptr, subsptr) \
448      if (use_pcre16) \
449        PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
450          getnamesptr, subsptr); \
451      else \
452        PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
453          getnamesptr, subsptr)
454    
455    #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
456      if (use_pcre16) \
457        PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
458      else \
459        PCRE_GET_STRINGNUMBER8(n, rc, ptr)
460    
461    #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
462      if (use_pcre16) \
463        PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
464      else \
465        PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
466    
467    #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
468      if (use_pcre16) \
469        PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
470      else \
471        PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
472    
473    #define PCRE_JIT_STACK_ALLOC pcre_jit_stack_alloc
474    #define PCRE_JIT_STACK_FREE pcre_jit_stack_free
475    
476    #define PCRE_MAKETABLES \
477      (use_pcre16? pcre16_maketables() : pcre_maketables())
478    
479    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
480      if (use_pcre16) \
481        PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
482      else \
483        PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
484    
485    #define PCRE_PRINTINT(re, outfile, debug_lengths) \
486      if (use_pcre16) \
487        PCRE_PRINTINT16(re, outfile, debug_lengths); \
488      else \
489        PCRE_PRINTINT8(re, outfile, debug_lengths)
490    
491    #define PCRE_STUDY(extra, re, options, error) \
492      if (use_pcre16) \
493        PCRE_STUDY16(extra, re, options, error); \
494      else \
495        PCRE_STUDY8(extra, re, options, error)
496    
497    /* ----- Only 8-bit mode is supported ----- */
498    
499    #elif defined SUPPORT_PCRE8
500    #define CHAR_SIZE                 1
501    #define PCHARS                    PCHARS8
502    #define PCHARSV                   PCHARSV8
503    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME8
504    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT8
505    #define STRLEN                    STRLEN8
506    #define PCRE_ASSIGN_JIT_STACK     pcre_assign_jit_stack
507    #define PCRE_COMPILE              PCRE_COMPILE8
508    #define PCRE_CONFIG               pcre_config
509    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
510    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING8
511    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC8
512    #define PCRE_EXEC                 PCRE_EXEC8
513    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY8
514    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING8
515    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST8
516    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING8
517    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER8
518    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING8
519    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST8
520    #define PCRE_JIT_STACK_ALLOC      pcre_jit_stack_alloc
521    #define PCRE_JIT_STACK_FREE       pcre_jit_stack_free
522    #define PCRE_MAKETABLES           pcre_maketables()
523    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
524    #define PCRE_PRINTINT             PCRE_PRINTINT8
525    #define PCRE_STUDY                PCRE_STUDY8
526    
527    /* ----- Only 16-bit mode is supported ----- */
528    
529    #else
530    #define CHAR_SIZE                 2
531    #define PCHARS                    PCHARS16
532    #define PCHARSV                   PCHARSV16
533    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME16
534    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT16
535    #define STRLEN                    STRLEN16
536    #define PCRE_ASSIGN_JIT_STACK     pcre16_assign_jit_stack
537    #define PCRE_COMPILE              PCRE_COMPILE16
538    #define PCRE_CONFIG               pcre16_config
539    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
540    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING16
541    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC16
542    #define PCRE_EXEC                 PCRE_EXEC16
543    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY16
544    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING16
545    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST16
546    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING16
547    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER16
548    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING16
549    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST16
550    #define PCRE_JIT_STACK_ALLOC      pcre16_jit_stack_alloc
551    #define PCRE_JIT_STACK_FREE       pcre16_jit_stack_free
552    #define PCRE_MAKETABLES           pcre16_maketables()
553    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
554    #define PCRE_PRINTINT             PCRE_PRINTINT16
555    #define PCRE_STUDY                PCRE_STUDY16
556    #endif
557    
558    /* ----- End of mode-specific function call macros ----- */
559    
560    
561  /* Other parameters */  /* Other parameters */
562    
# Line 178  static int debug_lengths; Line 584  static int debug_lengths;
584  static int first_callout;  static int first_callout;
585  static int locale_set = 0;  static int locale_set = 0;
586  static int show_malloc;  static int show_malloc;
587  static int use_utf8;  static int use_utf;
588  static size_t gotten_store;  static size_t gotten_store;
589    static size_t first_gotten_store = 0;
590    static const unsigned char *last_callout_mark = NULL;
591    
592  /* The buffers grow automatically if very long input lines are encountered. */  /* The buffers grow automatically if very long input lines are encountered. */
593    
594  static int buffer_size = 50000;  static int buffer_size = 50000;
595  static uschar *buffer = NULL;  static pcre_uint8 *buffer = NULL;
596  static uschar *dbuffer = NULL;  static pcre_uint8 *dbuffer = NULL;
597  static uschar *pbuffer = NULL;  static pcre_uint8 *pbuffer = NULL;
598    
599    /* Another buffer is needed translation to 16-bit character strings. It will
600    obtained and extended as required. */
601    
602    #ifdef SUPPORT_PCRE16
603    static int buffer16_size = 0;
604    static pcre_uint16 *buffer16 = NULL;
605    
606    #ifdef SUPPORT_PCRE8
607    
608    /* We need the table of operator lengths that is used for 16-bit compiling, in
609    order to swap bytes in a pattern for saving/reloading testing. Luckily, the
610    data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
611    appropriately for the 16-bit world. Just as a safety check, make sure that
612    COMPILE_PCRE16 is *not* set. */
613    
614    #ifdef COMPILE_PCRE16
615    #error COMPILE_PCRE16 must not be set when compiling pcretest.c
616    #endif
617    
618    #if LINK_SIZE == 2
619    #undef LINK_SIZE
620    #define LINK_SIZE 1
621    #elif LINK_SIZE == 3 || LINK_SIZE == 4
622    #undef LINK_SIZE
623    #define LINK_SIZE 2
624    #else
625    #error LINK_SIZE must be either 2, 3, or 4
626    #endif
627    
628    #undef IMM2_SIZE
629    #define IMM2_SIZE 1
630    
631    #endif /* SUPPORT_PCRE8 */
632    
633    static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
634    #endif  /* SUPPORT_PCRE16 */
635    
636    /* If we have 8-bit support, default use_pcre16 to false; if there is also
637    16-bit support, it can be changed by an option. If there is no 8-bit support,
638    there must be 16-bit support, so default it to 1. */
639    
640    #ifdef SUPPORT_PCRE8
641    static int use_pcre16 = 0;
642    #else
643    static int use_pcre16 = 1;
644    #endif
645    
646    /* Textual explanations for runtime error codes */
647    
648    static const char *errtexts[] = {
649      NULL,  /* 0 is no error */
650      NULL,  /* NOMATCH is handled specially */
651      "NULL argument passed",
652      "bad option value",
653      "magic number missing",
654      "unknown opcode - pattern overwritten?",
655      "no more memory",
656      NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */
657      "match limit exceeded",
658      "callout error code",
659      NULL,  /* BADUTF8/16 is handled specially */
660      NULL,  /* BADUTF8/16 offset is handled specially */
661      NULL,  /* PARTIAL is handled specially */
662      "not used - internal error",
663      "internal error - pattern overwritten?",
664      "bad count value",
665      "item unsupported for DFA matching",
666      "backreference condition or recursion test not supported for DFA matching",
667      "match limit not supported for DFA matching",
668      "workspace size exceeded in DFA matching",
669      "too much recursion for DFA matching",
670      "recursion limit exceeded",
671      "not used - internal error",
672      "invalid combination of newline options",
673      "bad offset value",
674      NULL,  /* SHORTUTF8/16 is handled specially */
675      "nested recursion at the same subject position",
676      "JIT stack limit reached",
677      "pattern compiled in wrong mode: 8-bit/16-bit error"
678    };
679    
680    
681    /*************************************************
682    *         Alternate character tables             *
683    *************************************************/
684    
685    /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
686    using the default tables of the library. However, the T option can be used to
687    select alternate sets of tables, for different kinds of testing. Note also that
688    the L (locale) option also adjusts the tables. */
689    
690    /* This is the set of tables distributed as default with PCRE. It recognizes
691    only ASCII characters. */
692    
693    static const pcre_uint8 tables0[] = {
694    
695    /* This table is a lower casing table. */
696    
697        0,  1,  2,  3,  4,  5,  6,  7,
698        8,  9, 10, 11, 12, 13, 14, 15,
699       16, 17, 18, 19, 20, 21, 22, 23,
700       24, 25, 26, 27, 28, 29, 30, 31,
701       32, 33, 34, 35, 36, 37, 38, 39,
702       40, 41, 42, 43, 44, 45, 46, 47,
703       48, 49, 50, 51, 52, 53, 54, 55,
704       56, 57, 58, 59, 60, 61, 62, 63,
705       64, 97, 98, 99,100,101,102,103,
706      104,105,106,107,108,109,110,111,
707      112,113,114,115,116,117,118,119,
708      120,121,122, 91, 92, 93, 94, 95,
709       96, 97, 98, 99,100,101,102,103,
710      104,105,106,107,108,109,110,111,
711      112,113,114,115,116,117,118,119,
712      120,121,122,123,124,125,126,127,
713      128,129,130,131,132,133,134,135,
714      136,137,138,139,140,141,142,143,
715      144,145,146,147,148,149,150,151,
716      152,153,154,155,156,157,158,159,
717      160,161,162,163,164,165,166,167,
718      168,169,170,171,172,173,174,175,
719      176,177,178,179,180,181,182,183,
720      184,185,186,187,188,189,190,191,
721      192,193,194,195,196,197,198,199,
722      200,201,202,203,204,205,206,207,
723      208,209,210,211,212,213,214,215,
724      216,217,218,219,220,221,222,223,
725      224,225,226,227,228,229,230,231,
726      232,233,234,235,236,237,238,239,
727      240,241,242,243,244,245,246,247,
728      248,249,250,251,252,253,254,255,
729    
730    /* This table is a case flipping table. */
731    
732        0,  1,  2,  3,  4,  5,  6,  7,
733        8,  9, 10, 11, 12, 13, 14, 15,
734       16, 17, 18, 19, 20, 21, 22, 23,
735       24, 25, 26, 27, 28, 29, 30, 31,
736       32, 33, 34, 35, 36, 37, 38, 39,
737       40, 41, 42, 43, 44, 45, 46, 47,
738       48, 49, 50, 51, 52, 53, 54, 55,
739       56, 57, 58, 59, 60, 61, 62, 63,
740       64, 97, 98, 99,100,101,102,103,
741      104,105,106,107,108,109,110,111,
742      112,113,114,115,116,117,118,119,
743      120,121,122, 91, 92, 93, 94, 95,
744       96, 65, 66, 67, 68, 69, 70, 71,
745       72, 73, 74, 75, 76, 77, 78, 79,
746       80, 81, 82, 83, 84, 85, 86, 87,
747       88, 89, 90,123,124,125,126,127,
748      128,129,130,131,132,133,134,135,
749      136,137,138,139,140,141,142,143,
750      144,145,146,147,148,149,150,151,
751      152,153,154,155,156,157,158,159,
752      160,161,162,163,164,165,166,167,
753      168,169,170,171,172,173,174,175,
754      176,177,178,179,180,181,182,183,
755      184,185,186,187,188,189,190,191,
756      192,193,194,195,196,197,198,199,
757      200,201,202,203,204,205,206,207,
758      208,209,210,211,212,213,214,215,
759      216,217,218,219,220,221,222,223,
760      224,225,226,227,228,229,230,231,
761      232,233,234,235,236,237,238,239,
762      240,241,242,243,244,245,246,247,
763      248,249,250,251,252,253,254,255,
764    
765    /* This table contains bit maps for various character classes. Each map is 32
766    bytes long and the bits run from the least significant end of each byte. The
767    classes that have their own maps are: space, xdigit, digit, upper, lower, word,
768    graph, print, punct, and cntrl. Other classes are built from combinations. */
769    
770      0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
771      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
772      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
773      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
774    
775      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
776      0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
777      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
778      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
779    
780      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
781      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
782      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
783      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
784    
785      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
786      0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
787      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
788      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
789    
790      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
791      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
792      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
793      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
794    
795      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
796      0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
797      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
798      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
799    
800      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
801      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
802      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
803      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
804    
805      0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
806      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
807      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
808      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
809    
810      0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
811      0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
812      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
813      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
814    
815      0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
816      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
817      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
818      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
819    
820    /* This table identifies various classes of character by individual bits:
821      0x01   white space character
822      0x02   letter
823      0x04   decimal digit
824      0x08   hexadecimal digit
825      0x10   alphanumeric or '_'
826      0x80   regular expression metacharacter or binary zero
827    */
828    
829      0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
830      0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
831      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
832      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
833      0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
834      0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
835      0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
836      0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
837      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
838      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
839      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
840      0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
841      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
842      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
843      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
844      0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
845      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
846      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
847      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
848      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
849      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
850      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
851      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
852      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
853      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
854      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
855      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
856      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
857      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
858      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
859      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
860      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
861    
862    /* This is a set of tables that came orginally from a Windows user. It seems to
863    be at least an approximation of ISO 8859. In particular, there are characters
864    greater than 128 that are marked as spaces, letters, etc. */
865    
866    static const pcre_uint8 tables1[] = {
867    0,1,2,3,4,5,6,7,
868    8,9,10,11,12,13,14,15,
869    16,17,18,19,20,21,22,23,
870    24,25,26,27,28,29,30,31,
871    32,33,34,35,36,37,38,39,
872    40,41,42,43,44,45,46,47,
873    48,49,50,51,52,53,54,55,
874    56,57,58,59,60,61,62,63,
875    64,97,98,99,100,101,102,103,
876    104,105,106,107,108,109,110,111,
877    112,113,114,115,116,117,118,119,
878    120,121,122,91,92,93,94,95,
879    96,97,98,99,100,101,102,103,
880    104,105,106,107,108,109,110,111,
881    112,113,114,115,116,117,118,119,
882    120,121,122,123,124,125,126,127,
883    128,129,130,131,132,133,134,135,
884    136,137,138,139,140,141,142,143,
885    144,145,146,147,148,149,150,151,
886    152,153,154,155,156,157,158,159,
887    160,161,162,163,164,165,166,167,
888    168,169,170,171,172,173,174,175,
889    176,177,178,179,180,181,182,183,
890    184,185,186,187,188,189,190,191,
891    224,225,226,227,228,229,230,231,
892    232,233,234,235,236,237,238,239,
893    240,241,242,243,244,245,246,215,
894    248,249,250,251,252,253,254,223,
895    224,225,226,227,228,229,230,231,
896    232,233,234,235,236,237,238,239,
897    240,241,242,243,244,245,246,247,
898    248,249,250,251,252,253,254,255,
899    0,1,2,3,4,5,6,7,
900    8,9,10,11,12,13,14,15,
901    16,17,18,19,20,21,22,23,
902    24,25,26,27,28,29,30,31,
903    32,33,34,35,36,37,38,39,
904    40,41,42,43,44,45,46,47,
905    48,49,50,51,52,53,54,55,
906    56,57,58,59,60,61,62,63,
907    64,97,98,99,100,101,102,103,
908    104,105,106,107,108,109,110,111,
909    112,113,114,115,116,117,118,119,
910    120,121,122,91,92,93,94,95,
911    96,65,66,67,68,69,70,71,
912    72,73,74,75,76,77,78,79,
913    80,81,82,83,84,85,86,87,
914    88,89,90,123,124,125,126,127,
915    128,129,130,131,132,133,134,135,
916    136,137,138,139,140,141,142,143,
917    144,145,146,147,148,149,150,151,
918    152,153,154,155,156,157,158,159,
919    160,161,162,163,164,165,166,167,
920    168,169,170,171,172,173,174,175,
921    176,177,178,179,180,181,182,183,
922    184,185,186,187,188,189,190,191,
923    224,225,226,227,228,229,230,231,
924    232,233,234,235,236,237,238,239,
925    240,241,242,243,244,245,246,215,
926    248,249,250,251,252,253,254,223,
927    192,193,194,195,196,197,198,199,
928    200,201,202,203,204,205,206,207,
929    208,209,210,211,212,213,214,247,
930    216,217,218,219,220,221,222,255,
931    0,62,0,0,1,0,0,0,
932    0,0,0,0,0,0,0,0,
933    32,0,0,0,1,0,0,0,
934    0,0,0,0,0,0,0,0,
935    0,0,0,0,0,0,255,3,
936    126,0,0,0,126,0,0,0,
937    0,0,0,0,0,0,0,0,
938    0,0,0,0,0,0,0,0,
939    0,0,0,0,0,0,255,3,
940    0,0,0,0,0,0,0,0,
941    0,0,0,0,0,0,12,2,
942    0,0,0,0,0,0,0,0,
943    0,0,0,0,0,0,0,0,
944    254,255,255,7,0,0,0,0,
945    0,0,0,0,0,0,0,0,
946    255,255,127,127,0,0,0,0,
947    0,0,0,0,0,0,0,0,
948    0,0,0,0,254,255,255,7,
949    0,0,0,0,0,4,32,4,
950    0,0,0,128,255,255,127,255,
951    0,0,0,0,0,0,255,3,
952    254,255,255,135,254,255,255,7,
953    0,0,0,0,0,4,44,6,
954    255,255,127,255,255,255,127,255,
955    0,0,0,0,254,255,255,255,
956    255,255,255,255,255,255,255,127,
957    0,0,0,0,254,255,255,255,
958    255,255,255,255,255,255,255,255,
959    0,2,0,0,255,255,255,255,
960    255,255,255,255,255,255,255,127,
961    0,0,0,0,255,255,255,255,
962    255,255,255,255,255,255,255,255,
963    0,0,0,0,254,255,0,252,
964    1,0,0,248,1,0,0,120,
965    0,0,0,0,254,255,255,255,
966    0,0,128,0,0,0,128,0,
967    255,255,255,255,0,0,0,0,
968    0,0,0,0,0,0,0,128,
969    255,255,255,255,0,0,0,0,
970    0,0,0,0,0,0,0,0,
971    128,0,0,0,0,0,0,0,
972    0,1,1,0,1,1,0,0,
973    0,0,0,0,0,0,0,0,
974    0,0,0,0,0,0,0,0,
975    1,0,0,0,128,0,0,0,
976    128,128,128,128,0,0,128,0,
977    28,28,28,28,28,28,28,28,
978    28,28,0,0,0,0,0,128,
979    0,26,26,26,26,26,26,18,
980    18,18,18,18,18,18,18,18,
981    18,18,18,18,18,18,18,18,
982    18,18,18,128,128,0,128,16,
983    0,26,26,26,26,26,26,18,
984    18,18,18,18,18,18,18,18,
985    18,18,18,18,18,18,18,18,
986    18,18,18,128,128,0,0,0,
987    0,0,0,0,0,1,0,0,
988    0,0,0,0,0,0,0,0,
989    0,0,0,0,0,0,0,0,
990    0,0,0,0,0,0,0,0,
991    1,0,0,0,0,0,0,0,
992    0,0,18,0,0,0,0,0,
993    0,0,20,20,0,18,0,0,
994    0,20,18,0,0,0,0,0,
995    18,18,18,18,18,18,18,18,
996    18,18,18,18,18,18,18,18,
997    18,18,18,18,18,18,18,0,
998    18,18,18,18,18,18,18,18,
999    18,18,18,18,18,18,18,18,
1000    18,18,18,18,18,18,18,18,
1001    18,18,18,18,18,18,18,0,
1002    18,18,18,18,18,18,18,18
1003    };
1004    
1005    
1006    
1007    
1008    #ifndef HAVE_STRERROR
1009    /*************************************************
1010    *     Provide strerror() for non-ANSI libraries  *
1011    *************************************************/
1012    
1013    /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1014    in their libraries, but can provide the same facility by this simple
1015    alternative function. */
1016    
1017    extern int   sys_nerr;
1018    extern char *sys_errlist[];
1019    
1020    char *
1021    strerror(int n)
1022    {
1023    if (n < 0 || n >= sys_nerr) return "unknown error number";
1024    return sys_errlist[n];
1025    }
1026    #endif /* HAVE_STRERROR */
1027    
1028    
1029    /*************************************************
1030    *         JIT memory callback                    *
1031    *************************************************/
1032    
1033    static pcre_jit_stack* jit_callback(void *arg)
1034    {
1035    return (pcre_jit_stack *)arg;
1036    }
1037    
1038    
1039    #if !defined NOUTF || defined SUPPORT_PCRE16
1040    /*************************************************
1041    *            Convert UTF-8 string to value       *
1042    *************************************************/
1043    
1044    /* This function takes one or more bytes that represents a UTF-8 character,
1045    and returns the value of the character.
1046    
1047    Argument:
1048      utf8bytes   a pointer to the byte vector
1049      vptr        a pointer to an int to receive the value
1050    
1051    Returns:      >  0 => the number of bytes consumed
1052                  -6 to 0 => malformed UTF-8 character at offset = (-return)
1053    */
1054    
1055    static int
1056    utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1057    {
1058    int c = *utf8bytes++;
1059    int d = c;
1060    int i, j, s;
1061    
1062    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
1063      {
1064      if ((d & 0x80) == 0) break;
1065      d <<= 1;
1066      }
1067    
1068    if (i == -1) { *vptr = c; return 1; }  /* ascii character */
1069    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
1070    
1071    /* i now has a value in the range 1-5 */
1072    
1073    s = 6*i;
1074    d = (c & utf8_table3[i]) << s;
1075    
1076    for (j = 0; j < i; j++)
1077      {
1078      c = *utf8bytes++;
1079      if ((c & 0xc0) != 0x80) return -(j+1);
1080      s -= 6;
1081      d |= (c & 0x3f) << s;
1082      }
1083    
1084    /* Check that encoding was the correct unique one */
1085    
1086    for (j = 0; j < utf8_table1_size; j++)
1087      if (d <= utf8_table1[j]) break;
1088    if (j != i) return -(i+1);
1089    
1090    /* Valid value */
1091    
1092    *vptr = d;
1093    return i+1;
1094    }
1095    #endif /* NOUTF || SUPPORT_PCRE16 */
1096    
1097    
1098    
1099    #if !defined NOUTF || defined SUPPORT_PCRE16
1100    /*************************************************
1101    *       Convert character value to UTF-8         *
1102    *************************************************/
1103    
1104    /* This function takes an integer value in the range 0 - 0x7fffffff
1105    and encodes it as a UTF-8 character in 0 to 6 bytes.
1106    
1107    Arguments:
1108      cvalue     the character value
1109      utf8bytes  pointer to buffer for result - at least 6 bytes long
1110    
1111    Returns:     number of characters placed in the buffer
1112    */
1113    
1114    static int
1115    ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1116    {
1117    register int i, j;
1118    for (i = 0; i < utf8_table1_size; i++)
1119      if (cvalue <= utf8_table1[i]) break;
1120    utf8bytes += i;
1121    for (j = i; j > 0; j--)
1122     {
1123     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1124     cvalue >>= 6;
1125     }
1126    *utf8bytes = utf8_table2[i] | cvalue;
1127    return i + 1;
1128    }
1129    #endif
1130    
1131    
1132    #ifdef SUPPORT_PCRE16
1133    /*************************************************
1134    *         Convert a string to 16-bit             *
1135    *************************************************/
1136    
1137    /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1138    8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1139    double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1140    in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1141    result is always left in buffer16.
1142    
1143    Note that this function does not object to surrogate values. This is
1144    deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1145    for the purpose of testing that they are correctly faulted.
1146    
1147    Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1148    in UTF-8 so that values greater than 255 can be handled.
1149    
1150    Arguments:
1151      data       TRUE if converting a data line; FALSE for a regex
1152      p          points to a byte string
1153      utf        true if UTF-8 (to be converted to UTF-16)
1154      len        number of bytes in the string (excluding trailing zero)
1155    
1156    Returns:     number of 16-bit data items used (excluding trailing zero)
1157                 OR -1 if a UTF-8 string is malformed
1158                 OR -2 if a value > 0x10ffff is encountered
1159                 OR -3 if a value > 0xffff is encountered when not in UTF mode
1160    */
1161    
1162    static int
1163    to16(int data, pcre_uint8 *p, int utf, int len)
1164    {
1165    pcre_uint16 *pp;
1166    
1167    if (buffer16_size < 2*len + 2)
1168      {
1169      if (buffer16 != NULL) free(buffer16);
1170      buffer16_size = 2*len + 2;
1171      buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1172      if (buffer16 == NULL)
1173        {
1174        fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1175        exit(1);
1176        }
1177      }
1178    
1179    pp = buffer16;
1180    
1181    if (!utf && !data)
1182      {
1183      while (len-- > 0) *pp++ = *p++;
1184      }
1185    
1186    else
1187      {
1188      int c = 0;
1189      while (len > 0)
1190        {
1191        int chlen = utf82ord(p, &c);
1192        if (chlen <= 0) return -1;
1193        if (c > 0x10ffff) return -2;
1194        p += chlen;
1195        len -= chlen;
1196        if (c < 0x10000) *pp++ = c; else
1197          {
1198          if (!utf) return -3;
1199          c -= 0x10000;
1200          *pp++ = 0xD800 | (c >> 10);
1201          *pp++ = 0xDC00 | (c & 0x3ff);
1202          }
1203        }
1204      }
1205    
1206    *pp = 0;
1207    return pp - buffer16;
1208    }
1209    #endif
1210    
1211    
1212  /*************************************************  /*************************************************
# Line 213  Returns:       pointer to the start of n Line 1232  Returns:       pointer to the start of n
1232                 NULL if no data read and EOF reached                 NULL if no data read and EOF reached
1233  */  */
1234    
1235  static uschar *  static pcre_uint8 *
1236  extend_inputline(FILE *f, uschar *start, const char *prompt)  extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1237  {  {
1238  uschar *here = start;  pcre_uint8 *here = start;
1239    
1240  for (;;)  for (;;)
1241    {    {
1242    int rlen = buffer_size - (here - buffer);    int rlen = (int)(buffer_size - (here - buffer));
1243    
1244    if (rlen > 1000)    if (rlen > 1000)
1245      {      {
# Line 250  for (;;) Line 1269  for (;;)
1269      /* Read the next line by normal means, prompting if the file is stdin. */      /* Read the next line by normal means, prompting if the file is stdin. */
1270    
1271        {        {
1272        if (f == stdin) printf(prompt);        if (f == stdin) printf("%s", prompt);
1273        if (fgets((char *)here, rlen,  f) == NULL)        if (fgets((char *)here, rlen,  f) == NULL)
1274          return (here == start)? NULL : start;          return (here == start)? NULL : start;
1275        }        }
# Line 263  for (;;) Line 1282  for (;;)
1282    else    else
1283      {      {
1284      int new_buffer_size = 2*buffer_size;      int new_buffer_size = 2*buffer_size;
1285      uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1286      uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1287      uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1288    
1289      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1290        {        {
# Line 296  return NULL;  /* Control never gets here Line 1315  return NULL;  /* Control never gets here
1315    
1316    
1317    
   
   
   
   
1318  /*************************************************  /*************************************************
1319  *          Read number from string               *  *          Read number from string               *
1320  *************************************************/  *************************************************/
# Line 316  Returns:        the unsigned long Line 1331  Returns:        the unsigned long
1331  */  */
1332    
1333  static int  static int
1334  get_value(unsigned char *str, unsigned char **endptr)  get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1335  {  {
1336  int result = 0;  int result = 0;
1337  while(*str != 0 && isspace(*str)) str++;  while(*str != 0 && isspace(*str)) str++;
# Line 327  return(result); Line 1342  return(result);
1342    
1343    
1344    
   
1345  /*************************************************  /*************************************************
1346  *            Convert UTF-8 string to value       *  *             Print one character                *
1347  *************************************************/  *************************************************/
1348    
1349  /* This function takes one or more bytes that represents a UTF-8 character,  /* Print a single character either literally, or as a hex escape. */
 and returns the value of the character.  
   
 Argument:  
   utf8bytes   a pointer to the byte vector  
   vptr        a pointer to an int to receive the value  
   
 Returns:      >  0 => the number of bytes consumed  
               -6 to 0 => malformed UTF-8 character at offset = (-return)  
 */  
   
 #if !defined NOUTF8  
1350    
1351  static int  static int pchar(int c, FILE *f)
 utf82ord(unsigned char *utf8bytes, int *vptr)  
1352  {  {
1353  int c = *utf8bytes++;  if (PRINTOK(c))
1354  int d = c;    {
1355  int i, j, s;    if (f != NULL) fprintf(f, "%c", c);
1356      return 1;
1357      }
1358    
1359  for (i = -1; i < 6; i++)               /* i is number of additional bytes */  if (c < 0x100)
1360    {    {
1361    if ((d & 0x80) == 0) break;    if (use_utf)
1362    d <<= 1;      {
1363        if (f != NULL) fprintf(f, "\\x{%02x}", c);
1364        return 6;
1365        }
1366      else
1367        {
1368        if (f != NULL) fprintf(f, "\\x%02x", c);
1369        return 4;
1370        }
1371    }    }
1372    
1373  if (i == -1) { *vptr = c; return 1; }  /* ascii character */  if (f != NULL) fprintf(f, "\\x{%02x}", c);
1374  if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */  return (c <= 0x000000ff)? 6 :
1375           (c <= 0x00000fff)? 7 :
1376           (c <= 0x0000ffff)? 8 :
1377           (c <= 0x000fffff)? 9 : 10;
1378    }
1379    
 /* i now has a value in the range 1-5 */  
1380    
 s = 6*i;  
 d = (c & utf8_table3[i]) << s;  
1381    
1382  for (j = 0; j < i; j++)  #ifdef SUPPORT_PCRE8
1383    {  /*************************************************
1384    c = *utf8bytes++;  *         Print 8-bit character string           *
1385    if ((c & 0xc0) != 0x80) return -(j+1);  *************************************************/
   s -= 6;  
   d |= (c & 0x3f) << s;  
   }  
1386    
1387  /* Check that encoding was the correct unique one */  /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1388    If handed a NULL file, just counts chars without printing. */
1389    
1390  for (j = 0; j < utf8_table1_size; j++)  static int pchars(pcre_uint8 *p, int length, FILE *f)
1391    if (d <= utf8_table1[j]) break;  {
1392  if (j != i) return -(i+1);  int c = 0;
1393    int yield = 0;
1394    
1395  /* Valid value */  if (length < 0)
1396      length = strlen((char *)p);
1397    
1398  *vptr = d;  while (length-- > 0)
1399  return i+1;    {
1400  }  #if !defined NOUTF
1401      if (use_utf)
1402        {
1403        int rc = utf82ord(p, &c);
1404        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
1405          {
1406          length -= rc - 1;
1407          p += rc;
1408          yield += pchar(c, f);
1409          continue;
1410          }
1411        }
1412    #endif
1413      c = *p++;
1414      yield += pchar(c, f);
1415      }
1416    
1417    return yield;
1418    }
1419  #endif  #endif
1420    
1421    
1422    
1423    #ifdef SUPPORT_PCRE16
1424  /*************************************************  /*************************************************
1425  *       Convert character value to UTF-8         *  *    Find length of 0-terminated 16-bit string   *
1426  *************************************************/  *************************************************/
1427    
1428  /* This function takes an integer value in the range 0 - 0x7fffffff  static int strlen16(PCRE_SPTR16 p)
 and encodes it as a UTF-8 character in 0 to 6 bytes.  
   
 Arguments:  
   cvalue     the character value  
   utf8bytes  pointer to buffer for result - at least 6 bytes long  
   
 Returns:     number of characters placed in the buffer  
 */  
   
 #if !defined NOUTF8  
   
 static int  
 ord2utf8(int cvalue, uschar *utf8bytes)  
1429  {  {
1430  register int i, j;  int len = 0;
1431  for (i = 0; i < utf8_table1_size; i++)  while (*p++ != 0) len++;
1432    if (cvalue <= utf8_table1[i]) break;  return len;
 utf8bytes += i;  
 for (j = i; j > 0; j--)  
  {  
  *utf8bytes-- = 0x80 | (cvalue & 0x3f);  
  cvalue >>= 6;  
  }  
 *utf8bytes = utf8_table2[i] | cvalue;  
 return i + 1;  
1433  }  }
1434    #endif  /* SUPPORT_PCRE16 */
 #endif  
   
1435    
1436    
1437    #ifdef SUPPORT_PCRE16
1438  /*************************************************  /*************************************************
1439  *             Print character string             *  *           Print 16-bit character string        *
1440  *************************************************/  *************************************************/
1441    
1442  /* Character string printing function. Must handle UTF-8 strings in utf8  /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1443  mode. Yields number of characters printed. If handed a NULL file, just counts  If handed a NULL file, just counts chars without printing. */
 chars without printing. */  
1444    
1445  static int pchars(unsigned char *p, int length, FILE *f)  static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1446  {  {
 int c = 0;  
1447  int yield = 0;  int yield = 0;
1448    
1449    if (length < 0)
1450      length = strlen16(p);
1451    
1452  while (length-- > 0)  while (length-- > 0)
1453    {    {
1454  #if !defined NOUTF8    int c = *p++ & 0xffff;
1455    if (use_utf8)  #if !defined NOUTF
1456      if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1457      {      {
1458      int rc = utf82ord(p, &c);      int d = *p & 0xffff;
1459        if (d >= 0xDC00 && d < 0xDFFF)
     if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */  
1460        {        {
1461        length -= rc - 1;        c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1462        p += rc;        length--;
1463        if (PRINTHEX(c))        p++;
         {  
         if (f != NULL) fprintf(f, "%c", c);  
         yield++;  
         }  
       else  
         {  
         int n = 4;  
         if (f != NULL) fprintf(f, "\\x{%02x}", c);  
         yield += (n <= 0x000000ff)? 2 :  
                  (n <= 0x00000fff)? 3 :  
                  (n <= 0x0000ffff)? 4 :  
                  (n <= 0x000fffff)? 5 : 6;  
         }  
       continue;  
1464        }        }
1465      }      }
1466  #endif  #endif
1467      yield += pchar(c, f);
1468      }
1469    
1470     /* Not UTF-8, or malformed UTF-8  */  return yield;
1471    }
1472    #endif  /* SUPPORT_PCRE16 */
1473    
1474    c = *p++;  
1475    if (PRINTHEX(c))  
1476      {  #ifdef SUPPORT_PCRE8
1477      if (f != NULL) fprintf(f, "%c", c);  /*************************************************
1478      yield++;  *     Read a capture name (8-bit) and check it   *
1479      }  *************************************************/
1480    else  
1481      {  static pcre_uint8 *
1482      if (f != NULL) fprintf(f, "\\x%02x", c);  read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1483      yield += 4;  {
1484      }  pcre_uint8 *npp = *pp;
1485    while (isalnum(*p)) *npp++ = *p++;
1486    *npp++ = 0;
1487    *npp = 0;
1488    if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1489      {
1490      fprintf(outfile, "no parentheses with name \"");
1491      PCHARSV(*pp, 0, -1, outfile);
1492      fprintf(outfile, "\"\n");
1493    }    }
1494    
1495  return yield;  *pp = npp;
1496    return p;
1497    }
1498    #endif  /* SUPPORT_PCRE8 */
1499    
1500    
1501    
1502    #ifdef SUPPORT_PCRE16
1503    /*************************************************
1504    *     Read a capture name (16-bit) and check it  *
1505    *************************************************/
1506    
1507    /* Note that the text being read is 8-bit. */
1508    
1509    static pcre_uint8 *
1510    read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1511    {
1512    pcre_uint16 *npp = *pp;
1513    while (isalnum(*p)) *npp++ = *p++;
1514    *npp++ = 0;
1515    *npp = 0;
1516    if (pcre16_get_stringnumber(re, (PCRE_SPTR16)(*pp)) < 0)
1517      {
1518      fprintf(outfile, "no parentheses with name \"");
1519      PCHARSV(*pp, 0, -1, outfile);
1520      fprintf(outfile, "\"\n");
1521      }
1522    *pp = npp;
1523    return p;
1524  }  }
1525    #endif  /* SUPPORT_PCRE16 */
1526    
1527    
1528    
# Line 514  if (callout_extra) Line 1551  if (callout_extra)
1551      else      else
1552        {        {
1553        fprintf(f, "%2d: ", i/2);        fprintf(f, "%2d: ", i/2);
1554        (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],        PCHARSV(cb->subject, cb->offset_vector[i],
1555          cb->offset_vector[i+1] - cb->offset_vector[i], f);          cb->offset_vector[i+1] - cb->offset_vector[i], f);
1556        fprintf(f, "\n");        fprintf(f, "\n");
1557        }        }
# Line 527  printed lengths of the substrings. */ Line 1564  printed lengths of the substrings. */
1564    
1565  if (f != NULL) fprintf(f, "--->");  if (f != NULL) fprintf(f, "--->");
1566    
1567  pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);  PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
1568  post_start = pchars((unsigned char *)(cb->subject + cb->start_match),  PCHARS(post_start, cb->subject, cb->start_match,
1569    cb->current_position - cb->start_match, f);    cb->current_position - cb->start_match, f);
1570    
1571  subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);  PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
1572    
1573  (void)pchars((unsigned char *)(cb->subject + cb->current_position),  PCHARSV(cb->subject, cb->current_position,
1574    cb->subject_length - cb->current_position, f);    cb->subject_length - cb->current_position, f);
1575    
1576  if (f != NULL) fprintf(f, "\n");  if (f != NULL) fprintf(f, "\n");
# Line 570  fprintf(outfile, "%.*s", (cb->next_item_ Line 1607  fprintf(outfile, "%.*s", (cb->next_item_
1607  fprintf(outfile, "\n");  fprintf(outfile, "\n");
1608  first_callout = 0;  first_callout = 0;
1609    
1610    if (cb->mark != last_callout_mark)
1611      {
1612      if (cb->mark == NULL)
1613        fprintf(outfile, "Latest Mark: <unset>\n");
1614      else
1615        {
1616        fprintf(outfile, "Latest Mark: ");
1617        PCHARSV(cb->mark, 0, -1, outfile);
1618        putc('\n', outfile);
1619        }
1620      last_callout_mark = cb->mark;
1621      }
1622    
1623  if (cb->callout_data != NULL)  if (cb->callout_data != NULL)
1624    {    {
1625    int callout_data = *((int *)(cb->callout_data));    int callout_data = *((int *)(cb->callout_data));
# Line 589  return (cb->callout_number != callout_fa Line 1639  return (cb->callout_number != callout_fa
1639  *            Local malloc functions              *  *            Local malloc functions              *
1640  *************************************************/  *************************************************/
1641    
1642  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and save the size of a
1643  compiled re. */  compiled re, which is the first store request that pcre_compile() makes. The
1644    show_malloc variable is set only during matching. */
1645    
1646  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
1647  {  {
1648  void *block = malloc(size);  void *block = malloc(size);
1649  gotten_store = size;  gotten_store = size;
1650    if (first_gotten_store == 0) first_gotten_store = size;
1651  if (show_malloc)  if (show_malloc)
1652    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
1653  return block;  return block;
# Line 608  if (show_malloc) Line 1660  if (show_malloc)
1660  free(block);  free(block);
1661  }  }
1662    
   
1663  /* For recursion malloc/free, to test stacking calls */  /* For recursion malloc/free, to test stacking calls */
1664    
1665  static void *stack_malloc(size_t size)  static void *stack_malloc(size_t size)
# Line 631  free(block); Line 1682  free(block);
1682  *          Call pcre_fullinfo()                  *  *          Call pcre_fullinfo()                  *
1683  *************************************************/  *************************************************/
1684    
1685  /* Get one piece of information from the pcre_fullinfo() function */  /* Get one piece of information from the pcre_fullinfo() function. When only
1686    one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1687    value, but the code is defensive.
1688    
1689  static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)  Arguments:
1690      re        compiled regex
1691      study     study data
1692      option    PCRE_INFO_xxx option
1693      ptr       where to put the data
1694    
1695    Returns:    0 when OK, < 0 on error
1696    */
1697    
1698    static int
1699    new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1700  {  {
1701  int rc;  int rc;
1702  if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)  
1703    fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);  if (use_pcre16)
1704    #ifdef SUPPORT_PCRE16
1705      rc = pcre16_fullinfo(re, study, option, ptr);
1706    #else
1707      rc = PCRE_ERROR_BADMODE;
1708    #endif
1709    else
1710    #ifdef SUPPORT_PCRE8
1711      rc = pcre_fullinfo(re, study, option, ptr);
1712    #else
1713      rc = PCRE_ERROR_BADMODE;
1714    #endif
1715    
1716    if (rc < 0)
1717      {
1718      fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1719        use_pcre16? "16" : "", option);
1720      if (rc == PCRE_ERROR_BADMODE)
1721        fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
1722          "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
1723      }
1724    
1725    return rc;
1726  }  }
1727    
1728    
1729    
1730  /*************************************************  /*************************************************
1731  *         Byte flipping function                 *  *             Swap byte functions                *
1732  *************************************************/  *************************************************/
1733    
1734  static unsigned long int  /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
1735  byteflip(unsigned long int value, int n)  value, respectively.
1736    
1737    Arguments:
1738      value        any number
1739    
1740    Returns:       the byte swapped value
1741    */
1742    
1743    static pcre_uint32
1744    swap_uint32(pcre_uint32 value)
1745  {  {
 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);  
1746  return ((value & 0x000000ff) << 24) |  return ((value & 0x000000ff) << 24) |
1747         ((value & 0x0000ff00) <<  8) |         ((value & 0x0000ff00) <<  8) |
1748         ((value & 0x00ff0000) >>  8) |         ((value & 0x00ff0000) >>  8) |
1749         ((value & 0xff000000) >> 24);         (value >> 24);
1750    }
1751    
1752    static pcre_uint16
1753    swap_uint16(pcre_uint16 value)
1754    {
1755    return (value >> 8) | (value << 8);
1756  }  }
1757    
1758    
1759    
1760    /*************************************************
1761    *        Flip bytes in a compiled pattern        *
1762    *************************************************/
1763    
1764    /* This function is called if the 'F' option was present on a pattern that is
1765    to be written to a file. We flip the bytes of all the integer fields in the
1766    regex data block and the study block. In 16-bit mode this also flips relevant
1767    bytes in the pattern itself. This is to make it possible to test PCRE's
1768    ability to reload byte-flipped patterns, e.g. those compiled on a different
1769    architecture. */
1770    
1771    static void
1772    regexflip(pcre *ere, pcre_extra *extra)
1773    {
1774    real_pcre *re = (real_pcre *)ere;
1775    #ifdef SUPPORT_PCRE16
1776    int op;
1777    pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1778    int length = re->name_count * re->name_entry_size;
1779    #ifdef SUPPORT_UTF
1780    BOOL utf = (re->options & PCRE_UTF16) != 0;
1781    BOOL utf16_char = FALSE;
1782    #endif /* SUPPORT_UTF */
1783    #endif /* SUPPORT_PCRE16 */
1784    
1785    /* Always flip the bytes in the main data block and study blocks. */
1786    
1787    re->magic_number = REVERSED_MAGIC_NUMBER;
1788    re->size = swap_uint32(re->size);
1789    re->options = swap_uint32(re->options);
1790    re->flags = swap_uint16(re->flags);
1791    re->top_bracket = swap_uint16(re->top_bracket);
1792    re->top_backref = swap_uint16(re->top_backref);
1793    re->first_char = swap_uint16(re->first_char);
1794    re->req_char = swap_uint16(re->req_char);
1795    re->name_table_offset = swap_uint16(re->name_table_offset);
1796    re->name_entry_size = swap_uint16(re->name_entry_size);
1797    re->name_count = swap_uint16(re->name_count);
1798    
1799    if (extra != NULL)
1800      {
1801      pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1802      rsd->size = swap_uint32(rsd->size);
1803      rsd->flags = swap_uint32(rsd->flags);
1804      rsd->minlength = swap_uint32(rsd->minlength);
1805      }
1806    
1807    /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1808    in the name table, if present, and then in the pattern itself. */
1809    
1810    #ifdef SUPPORT_PCRE16
1811    if (!use_pcre16) return;
1812    
1813    while(TRUE)
1814      {
1815      /* Swap previous characters. */
1816      while (length-- > 0)
1817        {
1818        *ptr = swap_uint16(*ptr);
1819        ptr++;
1820        }
1821    #ifdef SUPPORT_UTF
1822      if (utf16_char)
1823        {
1824        if ((ptr[-1] & 0xfc00) == 0xd800)
1825          {
1826          /* We know that there is only one extra character in UTF-16. */
1827          *ptr = swap_uint16(*ptr);
1828          ptr++;
1829          }
1830        }
1831      utf16_char = FALSE;
1832    #endif /* SUPPORT_UTF */
1833    
1834      /* Get next opcode. */
1835    
1836      length = 0;
1837      op = *ptr;
1838      *ptr++ = swap_uint16(op);
1839    
1840      switch (op)
1841        {
1842        case OP_END:
1843        return;
1844    
1845    #ifdef SUPPORT_UTF
1846        case OP_CHAR:
1847        case OP_CHARI:
1848        case OP_NOT:
1849        case OP_NOTI:
1850        case OP_STAR:
1851        case OP_MINSTAR:
1852        case OP_PLUS:
1853        case OP_MINPLUS:
1854        case OP_QUERY:
1855        case OP_MINQUERY:
1856        case OP_UPTO:
1857        case OP_MINUPTO:
1858        case OP_EXACT:
1859        case OP_POSSTAR:
1860        case OP_POSPLUS:
1861        case OP_POSQUERY:
1862        case OP_POSUPTO:
1863        case OP_STARI:
1864        case OP_MINSTARI:
1865        case OP_PLUSI:
1866        case OP_MINPLUSI:
1867        case OP_QUERYI:
1868        case OP_MINQUERYI:
1869        case OP_UPTOI:
1870        case OP_MINUPTOI:
1871        case OP_EXACTI:
1872        case OP_POSSTARI:
1873        case OP_POSPLUSI:
1874        case OP_POSQUERYI:
1875        case OP_POSUPTOI:
1876        case OP_NOTSTAR:
1877        case OP_NOTMINSTAR:
1878        case OP_NOTPLUS:
1879        case OP_NOTMINPLUS:
1880        case OP_NOTQUERY:
1881        case OP_NOTMINQUERY:
1882        case OP_NOTUPTO:
1883        case OP_NOTMINUPTO:
1884        case OP_NOTEXACT:
1885        case OP_NOTPOSSTAR:
1886        case OP_NOTPOSPLUS:
1887        case OP_NOTPOSQUERY:
1888        case OP_NOTPOSUPTO:
1889        case OP_NOTSTARI:
1890        case OP_NOTMINSTARI:
1891        case OP_NOTPLUSI:
1892        case OP_NOTMINPLUSI:
1893        case OP_NOTQUERYI:
1894        case OP_NOTMINQUERYI:
1895        case OP_NOTUPTOI:
1896        case OP_NOTMINUPTOI:
1897        case OP_NOTEXACTI:
1898        case OP_NOTPOSSTARI:
1899        case OP_NOTPOSPLUSI:
1900        case OP_NOTPOSQUERYI:
1901        case OP_NOTPOSUPTOI:
1902        if (utf) utf16_char = TRUE;
1903    #endif
1904        /* Fall through. */
1905    
1906        default:
1907        length = OP_lengths16[op] - 1;
1908        break;
1909    
1910        case OP_CLASS:
1911        case OP_NCLASS:
1912        /* Skip the character bit map. */
1913        ptr += 32/sizeof(pcre_uint16);
1914        length = 0;
1915        break;
1916    
1917        case OP_XCLASS:
1918        /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
1919        if (LINK_SIZE > 1)
1920          length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
1921            - (1 + LINK_SIZE + 1));
1922        else
1923          length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
1924    
1925        /* Reverse the size of the XCLASS instance. */
1926        *ptr = swap_uint16(*ptr);
1927        ptr++;
1928        if (LINK_SIZE > 1)
1929          {
1930          *ptr = swap_uint16(*ptr);
1931          ptr++;
1932          }
1933    
1934        op = *ptr;
1935        *ptr = swap_uint16(op);
1936        ptr++;
1937        if ((op & XCL_MAP) != 0)
1938          {
1939          /* Skip the character bit map. */
1940          ptr += 32/sizeof(pcre_uint16);
1941          length -= 32/sizeof(pcre_uint16);
1942          }
1943        break;
1944        }
1945      }
1946    /* Control should never reach here in 16 bit mode. */
1947    #endif /* SUPPORT_PCRE16 */
1948    }
1949    
1950    
1951    
1952  /*************************************************  /*************************************************
1953  *        Check match or recursion limit          *  *        Check match or recursion limit          *
1954  *************************************************/  *************************************************/
1955    
1956  static int  static int
1957  check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,  check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1958    int start_offset, int options, int *use_offsets, int use_size_offsets,    int start_offset, int options, int *use_offsets, int use_size_offsets,
1959    int flag, unsigned long int *limit, int errnumber, const char *msg)    int flag, unsigned long int *limit, int errnumber, const char *msg)
1960  {  {
# Line 679  for (;;) Line 1969  for (;;)
1969    {    {
1970    *limit = mid;    *limit = mid;
1971    
1972    count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,    PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
1973      use_offsets, use_size_offsets);      use_offsets, use_size_offsets);
1974    
1975    if (count == errnumber)    if (count == errnumber)
# Line 724  Returns:    < 0, = 0, or > 0, according Line 2014  Returns:    < 0, = 0, or > 0, according
2014  */  */
2015    
2016  static int  static int
2017  strncmpic(uschar *s, uschar *t, int n)  strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2018  {  {
2019  while (n--)  while (n--)
2020    {    {
# Line 740  return 0; Line 2030  return 0;
2030  *         Check newline indicator                *  *         Check newline indicator                *
2031  *************************************************/  *************************************************/
2032    
2033  /* This is used both at compile and run-time to check for <xxx> escapes, where  /* This is used both at compile and run-time to check for <xxx> escapes. Print
2034  xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is  a message and return 0 if there is no match.
 no match.  
2035    
2036  Arguments:  Arguments:
2037    p           points after the leading '<'    p           points after the leading '<'
# Line 752  Returns:      appropriate PCRE_NEWLINE_x Line 2041  Returns:      appropriate PCRE_NEWLINE_x
2041  */  */
2042    
2043  static int  static int
2044  check_newline(uschar *p, FILE *f)  check_newline(pcre_uint8 *p, FILE *f)
2045  {  {
2046  if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;  if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2047  if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;  if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2048  if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;  if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2049  if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;  if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2050  if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;  if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2051  if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;  if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2052  if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;  if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2053  fprintf(f, "Unknown newline type at: <%s\n", p);  fprintf(f, "Unknown newline type at: <%s\n", p);
2054  return 0;  return 0;
2055  }  }
# Line 782  printf("If input is a terminal, readline Line 2071  printf("If input is a terminal, readline
2071  printf("This version of pcretest is not linked with readline().\n");  printf("This version of pcretest is not linked with readline().\n");
2072  #endif  #endif
2073  printf("\nOptions:\n");  printf("\nOptions:\n");
2074    #ifdef SUPPORT_PCRE16
2075    printf("  -16      use 16-bit interface\n");
2076    #endif
2077  printf("  -b       show compiled code (bytecode)\n");  printf("  -b       show compiled code (bytecode)\n");
2078  printf("  -C       show PCRE compile-time options and exit\n");  printf("  -C       show PCRE compile-time options and exit\n");
2079    printf("  -C arg   show a specific compile-time option\n");
2080    printf("           and exit with its value. The arg can be:\n");
2081    printf("     linksize     internal link size [2, 3, 4]\n");
2082    printf("     pcre8        8 bit library support enabled [0, 1]\n");
2083    printf("     pcre16       16 bit library support enabled [0, 1]\n");
2084    printf("     utf          Unicode Transformation Format supported [0, 1]\n");
2085    printf("     ucp          Unicode Properties supported [0, 1]\n");
2086    printf("     jit          Just-in-time compiler supported [0, 1]\n");
2087    printf("     newline      Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2088  printf("  -d       debug: show compiled code and information (-b and -i)\n");  printf("  -d       debug: show compiled code and information (-b and -i)\n");
2089  #if !defined NODFA  #if !defined NODFA
2090  printf("  -dfa     force DFA matching for all subjects\n");  printf("  -dfa     force DFA matching for all subjects\n");
# Line 798  printf("  -p       use POSIX interface\n Line 2099  printf("  -p       use POSIX interface\n
2099  #endif  #endif
2100  printf("  -q       quiet: do not output PCRE version number at start\n");  printf("  -q       quiet: do not output PCRE version number at start\n");
2101  printf("  -S <n>   set stack size to <n> megabytes\n");  printf("  -S <n>   set stack size to <n> megabytes\n");
2102  printf("  -s       output store (memory) used information\n"  printf("  -s       force each pattern to be studied at basic level\n"
2103           "  -s+      force each pattern to be studied, using JIT if available\n"
2104         "  -t       time compilation and execution\n");         "  -t       time compilation and execution\n");
2105  printf("  -t <n>   time compilation and execution, repeating <n> times\n");  printf("  -t <n>   time compilation and execution, repeating <n> times\n");
2106  printf("  -tm      time execution (matching) only\n");  printf("  -tm      time execution (matching) only\n");
# Line 818  options, followed by a set of test data, Line 2120  options, followed by a set of test data,
2120  int main(int argc, char **argv)  int main(int argc, char **argv)
2121  {  {
2122  FILE *infile = stdin;  FILE *infile = stdin;
2123    const char *version;
2124  int options = 0;  int options = 0;
2125  int study_options = 0;  int study_options = 0;
2126  int default_find_match_limit = FALSE;  int default_find_match_limit = FALSE;
# Line 826  int timeit = 0; Line 2129  int timeit = 0;
2129  int timeitm = 0;  int timeitm = 0;
2130  int showinfo = 0;  int showinfo = 0;
2131  int showstore = 0;  int showstore = 0;
2132    int force_study = -1;
2133    int force_study_options = 0;
2134  int quiet = 0;  int quiet = 0;
2135  int size_offsets = 45;  int size_offsets = 45;
2136  int size_offsets_max;  int size_offsets_max;
# Line 839  int all_use_dfa = 0; Line 2144  int all_use_dfa = 0;
2144  int yield = 0;  int yield = 0;
2145  int stack_size;  int stack_size;
2146    
2147  /* These vectors store, end-to-end, a list of captured substring names. Assume  pcre_jit_stack *jit_stack = NULL;
 that 1024 is plenty long enough for the few names we'll be testing. */  
   
 uschar copynames[1024];  
 uschar getnames[1024];  
2148    
2149  uschar *copynamesptr;  /* These vectors store, end-to-end, a list of zero-terminated captured
2150  uschar *getnamesptr;  substring names, each list itself being terminated by an empty name. Assume
2151    that 1024 is plenty long enough for the few names we'll be testing. It is
2152  /* Get buffers from malloc() so that Electric Fence will check their misuse  easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
2153  when I am debugging. They grow automatically when very long lines are read. */  for the actual memory, to ensure alignment. By defining these variables always
2154    (whether or not 8-bit or 16-bit is supported), we avoid too much mess with
2155  buffer = (unsigned char *)malloc(buffer_size);  #ifdefs in the code. */
2156  dbuffer = (unsigned char *)malloc(buffer_size);  
2157  pbuffer = (unsigned char *)malloc(buffer_size);  pcre_uint16 copynames[1024];
2158    pcre_uint16 getnames[1024];
2159    
2160    pcre_uint16 *cn16ptr;
2161    pcre_uint16 *gn16ptr;
2162    
2163    pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2164    pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2165    pcre_uint8 *cn8ptr;
2166    pcre_uint8 *gn8ptr;
2167    
2168    /* Get buffers from malloc() so that valgrind will check their misuse when
2169    debugging. They grow automatically when very long lines are read. The 16-bit
2170    buffer (buffer16) is obtained only if needed. */
2171    
2172    buffer = (pcre_uint8 *)malloc(buffer_size);
2173    dbuffer = (pcre_uint8 *)malloc(buffer_size);
2174    pbuffer = (pcre_uint8 *)malloc(buffer_size);
2175    
2176  /* The outfile variable is static so that new_malloc can use it. */  /* The outfile variable is static so that new_malloc can use it. */
2177    
# Line 868  it set 0x8000, but then I was advised th Line 2186  it set 0x8000, but then I was advised th
2186  _setmode( _fileno( stdout ), _O_BINARY );  _setmode( _fileno( stdout ), _O_BINARY );
2187  #endif  #endif
2188    
2189    /* Get the version number: both pcre_version() and pcre16_version() give the
2190    same answer. We just need to ensure that we call one that is available. */
2191    
2192    #ifdef SUPPORT_PCRE8
2193    version = pcre_version();
2194    #else
2195    version = pcre16_version();
2196    #endif
2197    
2198  /* Scan options */  /* Scan options */
2199    
2200  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
2201    {    {
2202    unsigned char *endptr;    pcre_uint8 *endptr;
2203    
2204    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-m") == 0) showstore = 1;
2205      showstore = 1;    else if (strcmp(argv[op], "-s") == 0) force_study = 0;
2206      else if (strcmp(argv[op], "-s+") == 0)
2207        {
2208        force_study = 1;
2209        force_study_options = PCRE_STUDY_JIT_COMPILE;
2210        }
2211      else if (strcmp(argv[op], "-16") == 0)
2212        {
2213    #ifdef SUPPORT_PCRE16
2214        use_pcre16 = 1;
2215    #else
2216        printf("** This version of PCRE was built without 16-bit support\n");
2217        exit(1);
2218    #endif
2219        }
2220    else if (strcmp(argv[op], "-q") == 0) quiet = 1;    else if (strcmp(argv[op], "-q") == 0) quiet = 1;
2221    else if (strcmp(argv[op], "-b") == 0) debug = 1;    else if (strcmp(argv[op], "-b") == 0) debug = 1;
2222    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
# Line 885  while (argc > 1 && argv[op][0] == '-') Line 2226  while (argc > 1 && argv[op][0] == '-')
2226    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
2227  #endif  #endif
2228    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
2229        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),        ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2230          *endptr == 0))          *endptr == 0))
2231      {      {
2232      op++;      op++;
# Line 895  while (argc > 1 && argv[op][0] == '-') Line 2236  while (argc > 1 && argv[op][0] == '-')
2236      {      {
2237      int both = argv[op][2] == 0;      int both = argv[op][2] == 0;
2238      int temp;      int temp;
2239      if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),      if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2240                       *endptr == 0))                       *endptr == 0))
2241        {        {
2242        timeitm = temp;        timeitm = temp;
# Line 906  while (argc > 1 && argv[op][0] == '-') Line 2247  while (argc > 1 && argv[op][0] == '-')
2247      if (both) timeit = timeitm;      if (both) timeit = timeitm;
2248      }      }
2249    else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
2250        ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),        ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2251          *endptr == 0))          *endptr == 0))
2252      {      {
2253  #if defined(_WIN32) || defined(WIN32)  #if defined(_WIN32) || defined(WIN32) || defined(__minix)
2254      printf("PCRE: -S not supported on this OS\n");      printf("PCRE: -S not supported on this OS\n");
2255      exit(1);      exit(1);
2256  #else  #else
# Line 934  while (argc > 1 && argv[op][0] == '-') Line 2275  while (argc > 1 && argv[op][0] == '-')
2275      {      {
2276      int rc;      int rc;
2277      unsigned long int lrc;      unsigned long int lrc;
2278      printf("PCRE version %s\n", pcre_version());  
2279        if (argc > 2)
2280          {
2281          if (strcmp(argv[op + 1], "linksize") == 0)
2282            {
2283            (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2284            printf("%d\n", rc);
2285            yield = rc;
2286            goto EXIT;
2287            }
2288          if (strcmp(argv[op + 1], "pcre8") == 0)
2289            {
2290    #ifdef SUPPORT_PCRE8
2291            printf("1\n");
2292            yield = 1;
2293    #else
2294            printf("0\n");
2295            yield = 0;
2296    #endif
2297            goto EXIT;
2298            }
2299          if (strcmp(argv[op + 1], "pcre16") == 0)
2300            {
2301    #ifdef SUPPORT_PCRE16
2302            printf("1\n");
2303            yield = 1;
2304    #else
2305            printf("0\n");
2306            yield = 0;
2307    #endif
2308            goto EXIT;
2309            }
2310          if (strcmp(argv[op + 1], "utf") == 0)
2311            {
2312    #ifdef SUPPORT_PCRE8
2313            (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2314            printf("%d\n", rc);
2315            yield = rc;
2316    #else
2317            (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2318            printf("%d\n", rc);
2319            yield = rc;
2320    #endif
2321            goto EXIT;
2322            }
2323          if (strcmp(argv[op + 1], "ucp") == 0)
2324            {
2325            (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2326            printf("%d\n", rc);
2327            yield = rc;
2328            goto EXIT;
2329            }
2330          if (strcmp(argv[op + 1], "jit") == 0)
2331            {
2332            (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2333            printf("%d\n", rc);
2334            yield = rc;
2335            goto EXIT;
2336            }
2337          if (strcmp(argv[op + 1], "newline") == 0)
2338            {
2339            (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2340            /* Note that these values are always the ASCII values, even
2341            in EBCDIC environments. CR is 13 and NL is 10. */
2342            printf("%s\n", (rc == 13)? "CR" :
2343              (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2344              (rc == -2)? "ANYCRLF" :
2345              (rc == -1)? "ANY" : "???");
2346            goto EXIT;
2347            }
2348          printf("Unknown -C option: %s\n", argv[op + 1]);
2349          goto EXIT;
2350          }
2351    
2352        printf("PCRE version %s\n", version);
2353      printf("Compiled with\n");      printf("Compiled with\n");
2354    
2355    /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2356    are set, either both UTFs are supported or both are not supported. */
2357    
2358    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2359        printf("  8-bit and 16-bit support\n");
2360        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2361        if (rc)
2362          printf("  UTF-8 and UTF-16 support\n");
2363        else
2364          printf("  No UTF-8 or UTF-16 support\n");
2365    #elif defined SUPPORT_PCRE8
2366        printf("  8-bit support only\n");
2367      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2368      printf("  %sUTF-8 support\n", rc? "" : "No ");      printf("  %sUTF-8 support\n", rc? "" : "No ");
2369      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);  #else
2370        printf("  16-bit support only\n");
2371        (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2372        printf("  %sUTF-16 support\n", rc? "" : "No ");
2373    #endif
2374    
2375        (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2376      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
2377      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2378        if (rc)
2379          printf("  Just-in-time compiler support\n");
2380        else
2381          printf("  No just-in-time compiler support\n");
2382        (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2383      /* Note that these values are always the ASCII values, even      /* Note that these values are always the ASCII values, even
2384      in EBCDIC environments. CR is 13 and NL is 10. */      in EBCDIC environments. CR is 13 and NL is 10. */
2385      printf("  Newline sequence is %s\n", (rc == 13)? "CR" :      printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
2386        (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :        (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2387        (rc == -2)? "ANYCRLF" :        (rc == -2)? "ANYCRLF" :
2388        (rc == -1)? "ANY" : "???");        (rc == -1)? "ANY" : "???");
2389      (void)pcre_config(PCRE_CONFIG_BSR, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
2390      printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :      printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
2391                                       "all Unicode newlines");                                       "all Unicode newlines");
2392      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2393      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
2394      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
2395      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
2396      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);      (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
2397      printf("  Default match limit = %ld\n", lrc);      printf("  Default match limit = %ld\n", lrc);
2398      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);      (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
2399      printf("  Default recursion depth limit = %ld\n", lrc);      printf("  Default recursion depth limit = %ld\n", lrc);
2400      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
2401      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s\n", rc? "stack" : "heap");
2402      goto EXIT;      goto EXIT;
2403      }      }
# Line 1017  if (argc > 2) Line 2456  if (argc > 2)
2456    
2457  /* Set alternative malloc function */  /* Set alternative malloc function */
2458    
2459    #ifdef SUPPORT_PCRE8
2460  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
2461  pcre_free = new_free;  pcre_free = new_free;
2462  pcre_stack_malloc = stack_malloc;  pcre_stack_malloc = stack_malloc;
2463  pcre_stack_free = stack_free;  pcre_stack_free = stack_free;
2464    #endif
2465    
2466    #ifdef SUPPORT_PCRE16
2467    pcre16_malloc = new_malloc;
2468    pcre16_free = new_free;
2469    pcre16_stack_malloc = stack_malloc;
2470    pcre16_stack_free = stack_free;
2471    #endif
2472    
2473  /* Heading line unless quiet, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
2474    
2475  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
2476    
2477  /* Main loop */  /* Main loop */
2478    
# Line 1039  while (!done) Line 2487  while (!done)
2487  #endif  #endif
2488    
2489    const char *error;    const char *error;
2490    unsigned char *p, *pp, *ppp;    pcre_uint8 *markptr;
2491    unsigned char *to_file = NULL;    pcre_uint8 *p, *pp, *ppp;
2492    const unsigned char *tables = NULL;    pcre_uint8 *to_file = NULL;
2493      const pcre_uint8 *tables = NULL;
2494      pcre_uint32 get_options;
2495    unsigned long int true_size, true_study_size = 0;    unsigned long int true_size, true_study_size = 0;
2496    size_t size, regex_gotten_store;    size_t size, regex_gotten_store;
2497      int do_allcaps = 0;
2498      int do_mark = 0;
2499    int do_study = 0;    int do_study = 0;
2500      int no_force_study = 0;
2501    int do_debug = debug;    int do_debug = debug;
2502    int do_G = 0;    int do_G = 0;
2503    int do_g = 0;    int do_g = 0;
2504    int do_showinfo = showinfo;    int do_showinfo = showinfo;
2505    int do_showrest = 0;    int do_showrest = 0;
2506      int do_showcaprest = 0;
2507    int do_flip = 0;    int do_flip = 0;
2508    int erroroffset, len, delimiter, poffset;    int erroroffset, len, delimiter, poffset;
2509    
2510    use_utf8 = 0;    use_utf = 0;
2511    debug_lengths = 1;    debug_lengths = 1;
2512    
2513    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
# Line 1068  while (!done) Line 2522  while (!done)
2522    
2523    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2524      {      {
2525      unsigned long int magic, get_options;      pcre_uint32 magic;
2526      uschar sbuf[8];      pcre_uint8 sbuf[8];
2527      FILE *f;      FILE *f;
2528    
2529      p++;      p++;
2530        if (*p == '!')
2531          {
2532          do_debug = TRUE;
2533          do_showinfo = TRUE;
2534          p++;
2535          }
2536    
2537      pp = p + (int)strlen((char *)p);      pp = p + (int)strlen((char *)p);
2538      while (isspace(pp[-1])) pp--;      while (isspace(pp[-1])) pp--;
2539      *pp = 0;      *pp = 0;
# Line 1084  while (!done) Line 2545  while (!done)
2545        continue;        continue;
2546        }        }
2547    
2548        first_gotten_store = 0;
2549      if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;      if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2550    
2551      true_size =      true_size =
# Line 1092  while (!done) Line 2554  while (!done)
2554        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2555    
2556      re = (real_pcre *)new_malloc(true_size);      re = (real_pcre *)new_malloc(true_size);
2557      regex_gotten_store = gotten_store;      regex_gotten_store = first_gotten_store;
2558    
2559      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2560    
2561      magic = ((real_pcre *)re)->magic_number;      magic = ((real_pcre *)re)->magic_number;
2562      if (magic != MAGIC_NUMBER)      if (magic != MAGIC_NUMBER)
2563        {        {
2564        if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)        if (swap_uint32(magic) == MAGIC_NUMBER)
2565          {          {
2566          do_flip = 1;          do_flip = 1;
2567          }          }
# Line 1111  while (!done) Line 2573  while (!done)
2573          }          }
2574        }        }
2575    
2576      fprintf(outfile, "Compiled regex%s loaded from %s\n",      /* We hide the byte-invert info for little and big endian tests. */
2577        do_flip? " (byte-inverted)" : "", p);      fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2578          do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
     /* Need to know if UTF-8 for printing data strings */  
2579    
2580      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);      /* Now see if there is any following study data. */
     use_utf8 = (get_options & PCRE_UTF8) != 0;  
   
     /* Now see if there is any following study data */  
2581    
2582      if (true_study_size != 0)      if (true_study_size != 0)
2583        {        {
# Line 1135  while (!done) Line 2593  while (!done)
2593          {          {
2594          FAIL_READ:          FAIL_READ:
2595          fprintf(outfile, "Failed to read data from %s\n", p);          fprintf(outfile, "Failed to read data from %s\n", p);
2596          if (extra != NULL) new_free(extra);          if (extra != NULL)
2597              {
2598              PCRE_FREE_STUDY(extra);
2599              }
2600          if (re != NULL) new_free(re);          if (re != NULL) new_free(re);
2601          fclose(f);          fclose(f);
2602          continue;          continue;
# Line 1145  while (!done) Line 2606  while (!done)
2606        }        }
2607      else fprintf(outfile, "No study data\n");      else fprintf(outfile, "No study data\n");
2608    
2609        /* Flip the necessary bytes. */
2610        if (do_flip)
2611          {
2612          int rc;
2613          PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
2614          if (rc == PCRE_ERROR_BADMODE)
2615            {
2616            /* Simulate the result of the function call below. */
2617            fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2618              use_pcre16? "16" : "", PCRE_INFO_OPTIONS);
2619            fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
2620              "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
2621            continue;
2622            }
2623          }
2624    
2625        /* Need to know if UTF-8 for printing data strings. */
2626    
2627        if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0) continue;
2628        use_utf = (get_options & PCRE_UTF8) != 0;
2629    
2630      fclose(f);      fclose(f);
2631      goto SHOW_INFO;      goto SHOW_INFO;
2632      }      }
2633    
2634    /* In-line pattern (the usual case). Get the delimiter and seek the end of    /* In-line pattern (the usual case). Get the delimiter and seek the end of
2635    the pattern; if is isn't complete, read more. */    the pattern; if it isn't complete, read more. */
2636    
2637    delimiter = *p++;    delimiter = *p++;
2638    
# Line 1161  while (!done) Line 2643  while (!done)
2643      }      }
2644    
2645    pp = p;    pp = p;
2646    poffset = p - buffer;    poffset = (int)(p - buffer);
2647    
2648    for(;;)    for(;;)
2649      {      {
# Line 1215  while (!done) Line 2697  while (!done)
2697        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
2698        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
2699    
2700        case '+': do_showrest = 1; break;        case '+':
2701          if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2702          break;
2703    
2704          case '=': do_allcaps = 1; break;
2705        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
2706        case 'B': do_debug = 1; break;        case 'B': do_debug = 1; break;
2707        case 'C': options |= PCRE_AUTO_CALLOUT; break;        case 'C': options |= PCRE_AUTO_CALLOUT; break;
# Line 1225  while (!done) Line 2711  while (!done)
2711        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
2712        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
2713        case 'J': options |= PCRE_DUPNAMES; break;        case 'J': options |= PCRE_DUPNAMES; break;
2714          case 'K': do_mark = 1; break;
2715        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
2716        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
2717    
# Line 1232  while (!done) Line 2719  while (!done)
2719        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
2720  #endif  #endif
2721    
2722        case 'S': do_study = 1; break;        case 'S':
2723          if (do_study == 0)
2724            {
2725            do_study = 1;
2726            if (*pp == '+')
2727              {
2728              study_options |= PCRE_STUDY_JIT_COMPILE;
2729              pp++;
2730              }
2731            }
2732          else
2733            {
2734            do_study = 0;
2735            no_force_study = 1;
2736            }
2737          break;
2738    
2739        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
2740          case 'W': options |= PCRE_UCP; break;
2741        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
2742          case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2743        case 'Z': debug_lengths = 0; break;        case 'Z': debug_lengths = 0; break;
2744        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf = 1; break;
2745        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
2746    
2747          case 'T':
2748          switch (*pp++)
2749            {
2750            case '0': tables = tables0; break;
2751            case '1': tables = tables1; break;
2752    
2753            case '\r':
2754            case '\n':
2755            case ' ':
2756            case 0:
2757            fprintf(outfile, "** Missing table number after /T\n");
2758            goto SKIP_DATA;
2759    
2760            default:
2761            fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2762            goto SKIP_DATA;
2763            }
2764          break;
2765    
2766        case 'L':        case 'L':
2767        ppp = pp;        ppp = pp;
2768        /* The '\r' test here is so that it works on Windows. */        /* The '\r' test here is so that it works on Windows. */
# Line 1251  while (!done) Line 2775  while (!done)
2775          goto SKIP_DATA;          goto SKIP_DATA;
2776          }          }
2777        locale_set = 1;        locale_set = 1;
2778        tables = pcre_maketables();        tables = PCRE_MAKETABLES;
2779        pp = ppp;        pp = ppp;
2780        break;        break;
2781    
# Line 1264  while (!done) Line 2788  while (!done)
2788    
2789        case '<':        case '<':
2790          {          {
2791          if (strncmp((char *)pp, "JS>", 3) == 0)          if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2792            {            {
2793            options |= PCRE_JAVASCRIPT_COMPAT;            options |= PCRE_JAVASCRIPT_COMPAT;
2794            pp += 3;            pp += 3;
# Line 1292  while (!done) Line 2816  while (!done)
2816    
2817    /* Handle compiling via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
2818    timing, showing, or debugging options, nor the ability to pass over    timing, showing, or debugging options, nor the ability to pass over
2819    local character tables. */    local character tables. Neither does it have 16-bit support. */
2820    
2821  #if !defined NOPOSIX  #if !defined NOPOSIX
2822    if (posix || do_posix)    if (posix || do_posix)
# Line 1305  while (!done) Line 2829  while (!done)
2829      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
2830      if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;      if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
2831      if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;      if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
2832        if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2833        if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2834    
2835        first_gotten_store = 0;
2836      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
2837    
2838      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 1325  while (!done) Line 2852  while (!done)
2852  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
2853    
2854      {      {
2855        /* In 16-bit mode, convert the input. */
2856    
2857    #ifdef SUPPORT_PCRE16
2858        if (use_pcre16)
2859          {
2860          switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
2861            {
2862            case -1:
2863            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2864              "converted to UTF-16\n");
2865            goto SKIP_DATA;
2866    
2867            case -2:
2868            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
2869              "cannot be converted to UTF-16\n");
2870            goto SKIP_DATA;
2871    
2872            case -3: /* "Impossible error" when to16 is called arg1 FALSE */
2873            fprintf(outfile, "**Failed: character value greater than 0xffff "
2874              "cannot be converted to 16-bit in non-UTF mode\n");
2875            goto SKIP_DATA;
2876    
2877            default:
2878            break;
2879            }
2880          p = (pcre_uint8 *)buffer16;
2881          }
2882    #endif
2883    
2884        /* Compile many times when timing */
2885    
2886      if (timeit > 0)      if (timeit > 0)
2887        {        {
2888        register int i;        register int i;
# Line 1332  while (!done) Line 2890  while (!done)
2890        clock_t start_time = clock();        clock_t start_time = clock();
2891        for (i = 0; i < timeit; i++)        for (i = 0; i < timeit; i++)
2892          {          {
2893          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2894          if (re != NULL) free(re);          if (re != NULL) free(re);
2895          }          }
2896        time_taken = clock() - start_time;        time_taken = clock() - start_time;
# Line 1341  while (!done) Line 2899  while (!done)
2899            (double)CLOCKS_PER_SEC);            (double)CLOCKS_PER_SEC);
2900        }        }
2901    
2902      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      first_gotten_store = 0;
2903        PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2904    
2905      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
2906      if non-interactive. */      if non-interactive. */
# Line 1368  while (!done) Line 2927  while (!done)
2927        goto CONTINUE;        goto CONTINUE;
2928        }        }
2929    
2930      /* Compilation succeeded; print data if required. There are now two      /* Compilation succeeded. It is now possible to set the UTF-8 option from
2931      info-returning functions. The old one has a limited interface and      within the regex; check for this so that we know how to process the data
2932      returns only limited data. Check that it agrees with the newer one. */      lines. */
2933    
2934      if (log_store)      if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
2935        fprintf(outfile, "Memory allocation (code space): %d\n",        goto SKIP_DATA;
2936          (int)(gotten_store -      if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
               sizeof(real_pcre) -  
               ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));  
2937    
2938      /* Extract the size for possible writing before possibly flipping it,      /* Extract the size for possible writing before possibly flipping it,
2939      and remember the store that was got. */      and remember the store that was got. */
2940    
2941      true_size = ((real_pcre *)re)->size;      true_size = ((real_pcre *)re)->size;
2942      regex_gotten_store = gotten_store;      regex_gotten_store = first_gotten_store;
2943    
2944        /* Output code size information if requested */
2945    
2946        if (log_store)
2947          fprintf(outfile, "Memory allocation (code space): %d\n",
2948            (int)(first_gotten_store -
2949                  sizeof(real_pcre) -
2950                  ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
2951    
2952      /* If /S was present, study the regexp to generate additional info to      /* If -s or /S was present, study the regex to generate additional info to
2953      help with the matching. */      help with the matching, unless the pattern has the SS option, which
2954        suppresses the effect of /S (used for a few test patterns where studying is
2955        never sensible). */
2956    
2957      if (do_study)      if (do_study || (force_study >= 0 && !no_force_study))
2958        {        {
2959        if (timeit > 0)        if (timeit > 0)
2960          {          {
# Line 1395  while (!done) Line 2962  while (!done)
2962          clock_t time_taken;          clock_t time_taken;
2963          clock_t start_time = clock();          clock_t start_time = clock();
2964          for (i = 0; i < timeit; i++)          for (i = 0; i < timeit; i++)
2965            extra = pcre_study(re, study_options, &error);            {
2966              PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2967              }
2968          time_taken = clock() - start_time;          time_taken = clock() - start_time;
2969          if (extra != NULL) free(extra);          if (extra != NULL)
2970              {
2971              PCRE_FREE_STUDY(extra);
2972              }
2973          fprintf(outfile, "  Study time %.4f milliseconds\n",          fprintf(outfile, "  Study time %.4f milliseconds\n",
2974            (((double)time_taken * 1000.0) / (double)timeit) /            (((double)time_taken * 1000.0) / (double)timeit) /
2975              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
2976          }          }
2977        extra = pcre_study(re, study_options, &error);        PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2978        if (error != NULL)        if (error != NULL)
2979          fprintf(outfile, "Failed to study: %s\n", error);          fprintf(outfile, "Failed to study: %s\n", error);
2980        else if (extra != NULL)        else if (extra != NULL)
2981            {
2982          true_study_size = ((pcre_study_data *)(extra->study_data))->size;          true_study_size = ((pcre_study_data *)(extra->study_data))->size;
2983            if (log_store)
2984              {
2985              size_t jitsize;
2986              if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
2987                  jitsize != 0)
2988                fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
2989              }
2990            }
2991        }        }
2992    
2993      /* If the 'F' option was present, we flip the bytes of all the integer      /* If /K was present, we set up for handling MARK data. */
     fields in the regex data block and the study block. This is to make it  
     possible to test PCRE's handling of byte-flipped patterns, e.g. those  
     compiled on a different architecture. */  
2994    
2995      if (do_flip)      if (do_mark)
2996        {        {
2997        real_pcre *rre = (real_pcre *)re;        if (extra == NULL)
       rre->magic_number =  
         byteflip(rre->magic_number, sizeof(rre->magic_number));  
       rre->size = byteflip(rre->size, sizeof(rre->size));  
       rre->options = byteflip(rre->options, sizeof(rre->options));  
       rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));  
       rre->top_bracket =  
         (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));  
       rre->top_backref =  
         (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));  
       rre->first_byte =  
         (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));  
       rre->req_byte =  
         (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));  
       rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,  
         sizeof(rre->name_table_offset));  
       rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,  
         sizeof(rre->name_entry_size));  
       rre->name_count = (pcre_uint16)byteflip(rre->name_count,  
         sizeof(rre->name_count));  
   
       if (extra != NULL)  
2998          {          {
2999          pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);          extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3000          rsd->size = byteflip(rsd->size, sizeof(rsd->size));          extra->flags = 0;
         rsd->options = byteflip(rsd->options, sizeof(rsd->options));  
3001          }          }
3002          extra->mark = &markptr;
3003          extra->flags |= PCRE_EXTRA_MARK;
3004        }        }
3005    
3006      /* Extract information from the compiled data if required */      /* Extract and display information from the compiled data if required. */
3007    
3008      SHOW_INFO:      SHOW_INFO:
3009    
3010      if (do_debug)      if (do_debug)
3011        {        {
3012        fprintf(outfile, "------------------------------------------------------------------\n");        fprintf(outfile, "------------------------------------------------------------------\n");
3013        pcre_printint(re, outfile, debug_lengths);        PCRE_PRINTINT(re, outfile, debug_lengths);
3014        }        }
3015    
3016        /* We already have the options in get_options (see above) */
3017    
3018      if (do_showinfo)      if (do_showinfo)
3019        {        {
3020        unsigned long int get_options, all_options;        unsigned long int all_options;
 #if !defined NOINFOCHECK  
       int old_first_char, old_options, old_count;  
 #endif  
3021        int count, backrefmax, first_char, need_char, okpartial, jchanged,        int count, backrefmax, first_char, need_char, okpartial, jchanged,
3022          hascrorlf;          hascrorlf;
3023        int nameentrysize, namecount;        int nameentrysize, namecount;
3024        const uschar *nametable;        const pcre_uint8 *nametable;
3025    
3026        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);        if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3027        new_info(re, NULL, PCRE_INFO_SIZE, &size);            new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3028        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);            new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3029        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);            new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +
3030        new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);            new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +
3031        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);            new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3032        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);            new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3033        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);            new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3034        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);            new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3035        new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);            new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3036        new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);            new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf)
3037        new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);            != 0)
3038            goto SKIP_DATA;
 #if !defined NOINFOCHECK  
       old_count = pcre_info(re, &old_options, &old_first_char);  
       if (count < 0) fprintf(outfile,  
         "Error %d from pcre_info()\n", count);  
       else  
         {  
         if (old_count != count) fprintf(outfile,  
           "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,  
             old_count);  
   
         if (old_first_char != first_char) fprintf(outfile,  
           "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",  
             first_char, old_first_char);  
   
         if (old_options != (int)get_options) fprintf(outfile,  
           "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",  
             get_options, old_options);  
         }  
 #endif  
3039    
3040        if (size != regex_gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
3041          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
# Line 1512  while (!done) Line 3050  while (!done)
3050          fprintf(outfile, "Named capturing subpatterns:\n");          fprintf(outfile, "Named capturing subpatterns:\n");
3051          while (namecount-- > 0)          while (namecount-- > 0)
3052            {            {
3053            fprintf(outfile, "  %s %*s%3d\n", nametable + 2,  #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3054              nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",            int imm2_size = use_pcre16 ? 1 : 2;
3055              GET2(nametable, 0));  #else
3056              int imm2_size = IMM2_SIZE;
3057    #endif
3058              int length = (int)STRLEN(nametable + imm2_size);
3059              fprintf(outfile, "  ");
3060              PCHARSV(nametable, imm2_size, length, outfile);
3061              while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3062    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3063              fprintf(outfile, "%3d\n", use_pcre16?
3064                 (int)(((PCRE_SPTR16)nametable)[0])
3065                :((int)nametable[0] << 8) | (int)nametable[1]);
3066              nametable += nameentrysize * (use_pcre16 ? 2 : 1);
3067    #else
3068              fprintf(outfile, "%3d\n", GET2(nametable, 0));
3069    #ifdef SUPPORT_PCRE8
3070            nametable += nameentrysize;            nametable += nameentrysize;
3071    #else
3072              nametable += nameentrysize * 2;
3073    #endif
3074    #endif
3075            }            }
3076          }          }
3077    
# Line 1523  while (!done) Line 3079  while (!done)
3079        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3080    
3081        all_options = ((real_pcre *)re)->options;        all_options = ((real_pcre *)re)->options;
3082        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));        if (do_flip) all_options = swap_uint32(all_options);
3083    
3084        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
3085          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
3086            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
3087            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
3088            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
# Line 1539  while (!done) Line 3095  while (!done)
3095            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3096            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3097            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3098            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3099            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",            ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3100              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3101              ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3102            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3103    
3104        if (jchanged) fprintf(outfile, "Duplicate name status changes\n");        if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
# Line 1581  while (!done) Line 3139  while (!done)
3139          }          }
3140        else        else
3141          {          {
3142          int ch = first_char & 255;          const char *caseless =
3143          const char *caseless = ((first_char & REQ_CASELESS) == 0)?            ((((real_pcre *)re)->flags & PCRE_FCH_CASELESS) == 0)?
3144            "" : " (caseless)";            "" : " (caseless)";
3145          if (PRINTHEX(ch))  
3146            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);          if (PRINTOK(first_char))
3147              fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3148          else          else
3149            fprintf(outfile, "First char = %d%s\n", ch, caseless);            {
3150              fprintf(outfile, "First char = ");
3151              pchar(first_char, outfile);
3152              fprintf(outfile, "%s\n", caseless);
3153              }
3154          }          }
3155    
3156        if (need_char < 0)        if (need_char < 0)
# Line 1596  while (!done) Line 3159  while (!done)
3159          }          }
3160        else        else
3161          {          {
3162          int ch = need_char & 255;          const char *caseless =
3163          const char *caseless = ((need_char & REQ_CASELESS) == 0)?            ((((real_pcre *)re)->flags & PCRE_RCH_CASELESS) == 0)?
3164            "" : " (caseless)";            "" : " (caseless)";
3165          if (PRINTHEX(ch))  
3166            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);          if (PRINTOK(need_char))
3167              fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3168          else          else
3169            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            {
3170              fprintf(outfile, "Need char = ");
3171              pchar(need_char, outfile);
3172              fprintf(outfile, "%s\n", caseless);
3173              }
3174          }          }
3175    
3176        /* Don't output study size; at present it is in any case a fixed        /* Don't output study size; at present it is in any case a fixed
3177        value, but it varies, depending on the computer architecture, and        value, but it varies, depending on the computer architecture, and
3178        so messes up the test suite. (And with the /F option, it might be        so messes up the test suite. (And with the /F option, it might be
3179        flipped.) */        flipped.) If study was forced by an external -s, don't show this
3180          information unless -i or -d was also present. This means that, except
3181          when auto-callouts are involved, the output from runs with and without
3182          -s should be identical. */
3183    
3184        if (do_study)        if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3185          {          {
3186          if (extra == NULL)          if (extra == NULL)
3187            fprintf(outfile, "Study returned NULL\n");            fprintf(outfile, "Study returned NULL\n");
3188          else          else
3189            {            {
3190            uschar *start_bits = NULL;            pcre_uint8 *start_bits = NULL;
3191            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);            int minlength;
3192    
3193            if (start_bits == NULL)            if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3194              fprintf(outfile, "No starting byte set\n");              fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3195            else  
3196              if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
3197              {              {
3198              int i;              if (start_bits == NULL)
3199              int c = 24;                fprintf(outfile, "No set of starting bytes\n");
3200              fprintf(outfile, "Starting byte set: ");              else
             for (i = 0; i < 256; i++)  
3201                {                {
3202                if ((start_bits[i/8] & (1<<(i&7))) != 0)                int i;
3203                  int c = 24;
3204                  fprintf(outfile, "Starting byte set: ");
3205                  for (i = 0; i < 256; i++)
3206                  {                  {
3207                  if (c > 75)                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
                   {  
                   fprintf(outfile, "\n  ");  
                   c = 2;  
                   }  
                 if (PRINTHEX(i) && i != ' ')  
3208                    {                    {
3209                    fprintf(outfile, "%c ", i);                    if (c > 75)
3210                    c += 2;                      {
3211                    }                      fprintf(outfile, "\n  ");
3212                  else                      c = 2;
3213                    {                      }
3214                    fprintf(outfile, "\\x%02x ", i);                    if (PRINTOK(i) && i != ' ')
3215                    c += 5;                      {
3216                        fprintf(outfile, "%c ", i);
3217                        c += 2;
3218                        }
3219                      else
3220                        {
3221                        fprintf(outfile, "\\x%02x ", i);
3222                        c += 5;
3223                        }
3224                    }                    }
3225                  }                  }
3226                  fprintf(outfile, "\n");
3227                }                }
3228              fprintf(outfile, "\n");              }
3229              }
3230    
3231            /* Show this only if the JIT was set by /S, not by -s. */
3232    
3233            if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3234              {
3235              int jit;
3236              if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3237                {
3238                if (jit)
3239                  fprintf(outfile, "JIT study was successful\n");
3240                else
3241    #ifdef SUPPORT_JIT
3242                  fprintf(outfile, "JIT study was not successful\n");
3243    #else
3244                  fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3245    #endif
3246              }              }
3247            }            }
3248          }          }
# Line 1666  while (!done) Line 3261  while (!done)
3261          }          }
3262        else        else
3263          {          {
3264          uschar sbuf[8];          pcre_uint8 sbuf[8];
3265          sbuf[0] = (uschar)((true_size >> 24) & 255);  
3266          sbuf[1] = (uschar)((true_size >> 16) & 255);          if (do_flip) regexflip(re, extra);
3267          sbuf[2] = (uschar)((true_size >>  8) & 255);          sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
3268          sbuf[3] = (uschar)((true_size) & 255);          sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
3269            sbuf[2] = (pcre_uint8)((true_size >>  8) & 255);
3270          sbuf[4] = (uschar)((true_study_size >> 24) & 255);          sbuf[3] = (pcre_uint8)((true_size) & 255);
3271          sbuf[5] = (uschar)((true_study_size >> 16) & 255);          sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
3272          sbuf[6] = (uschar)((true_study_size >>  8) & 255);          sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
3273          sbuf[7] = (uschar)((true_study_size) & 255);          sbuf[6] = (pcre_uint8)((true_study_size >>  8) & 255);
3274            sbuf[7] = (pcre_uint8)((true_study_size) & 255);
3275    
3276          if (fwrite(sbuf, 1, 8, f) < 8 ||          if (fwrite(sbuf, 1, 8, f) < 8 ||
3277              fwrite(re, 1, true_size, f) < true_size)              fwrite(re, 1, true_size, f) < true_size)
# Line 1684  while (!done) Line 3280  while (!done)
3280            }            }
3281          else          else
3282            {            {
3283            fprintf(outfile, "Compiled regex written to %s\n", to_file);            fprintf(outfile, "Compiled pattern written to %s\n", to_file);
3284    
3285              /* If there is study data, write it. */
3286    
3287            if (extra != NULL)            if (extra != NULL)
3288              {              {
3289              if (fwrite(extra->study_data, 1, true_study_size, f) <              if (fwrite(extra->study_data, 1, true_study_size, f) <
# Line 1694  while (!done) Line 3293  while (!done)
3293                  strerror(errno));                  strerror(errno));
3294                }                }
3295              else fprintf(outfile, "Study data written to %s\n", to_file);              else fprintf(outfile, "Study data written to %s\n", to_file);
   
3296              }              }
3297            }            }
3298          fclose(f);          fclose(f);
3299          }          }
3300    
3301        new_free(re);        new_free(re);
3302        if (extra != NULL) new_free(extra);        if (extra != NULL)
3303        if (tables != NULL) new_free((void *)tables);          {
3304            PCRE_FREE_STUDY(extra);
3305            }
3306          if (locale_set)
3307            {
3308            new_free((void *)tables);
3309            setlocale(LC_CTYPE, "C");
3310            locale_set = 0;
3311            }
3312        continue;  /* With next regex */        continue;  /* With next regex */
3313        }        }
3314      }        /* End of non-POSIX compile */      }        /* End of non-POSIX compile */
# Line 1711  while (!done) Line 3317  while (!done)
3317    
3318    for (;;)    for (;;)
3319      {      {
3320      uschar *q;      pcre_uint8 *q;
3321      uschar *bptr;      pcre_uint8 *bptr;
3322      int *use_offsets = offsets;      int *use_offsets = offsets;
3323      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
3324      int callout_data = 0;      int callout_data = 0;
# Line 1724  while (!done) Line 3330  while (!done)
3330      int getlist = 0;      int getlist = 0;
3331      int gmatched = 0;      int gmatched = 0;
3332      int start_offset = 0;      int start_offset = 0;
3333        int start_offset_sign = 1;
3334      int g_notempty = 0;      int g_notempty = 0;
3335      int use_dfa = 0;      int use_dfa = 0;
3336    
     options = 0;  
   
3337      *copynames = 0;      *copynames = 0;
3338      *getnames = 0;      *getnames = 0;
3339    
3340      copynamesptr = copynames;      cn16ptr = copynames;
3341      getnamesptr = getnames;      gn16ptr = getnames;
3342        cn8ptr = copynames8;
3343        gn8ptr = getnames8;
3344    
3345      pcre_callout = callout;      SET_PCRE_CALLOUT(callout);
3346      first_callout = 1;      first_callout = 1;
3347        last_callout_mark = NULL;
3348      callout_extra = 0;      callout_extra = 0;
3349      callout_count = 0;      callout_count = 0;
3350      callout_fail_count = 999999;      callout_fail_count = 999999;
3351      callout_fail_id = -1;      callout_fail_id = -1;
3352      show_malloc = 0;      show_malloc = 0;
3353        options = 0;
3354    
3355      if (extra != NULL) extra->flags &=      if (extra != NULL) extra->flags &=
3356        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
# Line 1751  while (!done) Line 3360  while (!done)
3360        {        {
3361        if (extend_inputline(infile, buffer + len, "data> ") == NULL)        if (extend_inputline(infile, buffer + len, "data> ") == NULL)
3362          {          {
3363          if (len > 0) break;          if (len > 0)    /* Reached EOF without hitting a newline */
3364              {
3365              fprintf(outfile, "\n");
3366              break;
3367              }
3368          done = 1;          done = 1;
3369          goto CONTINUE;          goto CONTINUE;
3370          }          }
# Line 1773  while (!done) Line 3386  while (!done)
3386        int i = 0;        int i = 0;
3387        int n = 0;        int n = 0;
3388    
3389        if (c == '\\') switch ((c = *p++))        /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
3390          In non-UTF mode, allow the value of the byte to fall through to later,
3391          where values greater than 127 are turned into UTF-8 when running in
3392          16-bit mode. */
3393    
3394          if (c != '\\')
3395            {
3396            if (use_utf)
3397              {
3398              *q++ = c;
3399              continue;
3400              }
3401            }
3402    
3403          /* Handle backslash escapes */
3404    
3405          else switch ((c = *p++))
3406          {          {
3407          case 'a': c =    7; break;          case 'a': c =    7; break;
3408          case 'b': c = '\b'; break;          case 'b': c = '\b'; break;
# Line 1789  while (!done) Line 3418  while (!done)
3418          c -= '0';          c -= '0';
3419          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
3420            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
   
 #if !defined NOUTF8  
         if (use_utf8 && c > 255)  
           {  
           unsigned char buff8[8];  
           int ii, utn;  
           utn = ord2utf8(c, buff8);  
           for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];  
           c = buff8[ii];   /* Last byte */  
           }  
 #endif  
3421          break;          break;
3422    
3423          case 'x':          case 'x':
   
         /* Handle \x{..} specially - new Perl thing for utf8 */  
   
 #if !defined NOUTF8  
3424          if (*p == '{')          if (*p == '{')
3425            {            {
3426            unsigned char *pt = p;            pcre_uint8 *pt = p;
3427            c = 0;            c = 0;
3428            while (isxdigit(*(++pt)))  
3429              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');            /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
3430              when isxdigit() is a macro that refers to its argument more than
3431              once. This is banned by the C Standard, but apparently happens in at
3432              least one MacOS environment. */
3433    
3434              for (pt++; isxdigit(*pt); pt++)
3435                c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
3436            if (*pt == '}')            if (*pt == '}')
3437              {              {
             unsigned char buff8[8];  
             int ii, utn;  
             if (use_utf8)  
               {  
               utn = ord2utf8(c, buff8);  
               for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];  
               c = buff8[ii];   /* Last byte */  
               }  
             else  
              {  
              if (c > 255)  
                fprintf(outfile, "** Character \\x{%x} is greater than 255 and "  
                  "UTF-8 mode is not enabled.\n"  
                  "** Truncation will probably give the wrong result.\n", c);  
              }  
3438              p = pt + 1;              p = pt + 1;
3439              break;              break;
3440              }              }
3441            /* Not correct form; fall through */            /* Not correct form for \x{...}; fall through */
3442            }            }
 #endif  
3443    
3444          /* Ordinary \x */          /* \x without {} always defines just one byte in 8-bit mode. This
3445            allows UTF-8 characters to be constructed byte by byte, and also allows
3446            invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
3447            Otherwise, pass it down to later code so that it can be turned into
3448            UTF-8 when running in 16-bit mode. */
3449    
3450          c = 0;          c = 0;
3451          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
3452            {            {
3453            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
3454            p++;            p++;
3455            }            }
3456            if (use_utf)
3457              {
3458              *q++ = c;
3459              continue;
3460              }
3461          break;          break;
3462    
3463          case 0:   /* \ followed by EOF allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
# Line 1852  while (!done) Line 3465  while (!done)
3465          continue;          continue;
3466    
3467          case '>':          case '>':
3468            if (*p == '-')
3469              {
3470              start_offset_sign = -1;
3471              p++;
3472              }
3473          while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';          while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
3474            start_offset *= start_offset_sign;
3475          continue;          continue;
3476    
3477          case 'A':  /* Option setting */          case 'A':  /* Option setting */
# Line 1871  while (!done) Line 3490  while (!done)
3490            }            }
3491          else if (isalnum(*p))          else if (isalnum(*p))
3492            {            {
3493            uschar *npp = copynamesptr;            READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, re);
           while (isalnum(*p)) *npp++ = *p++;  
           *npp++ = 0;  
           *npp = 0;  
           n = pcre_get_stringnumber(re, (char *)copynamesptr);  
           if (n < 0)  
             fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);  
           copynamesptr = npp;  
3494            }            }
3495          else if (*p == '+')          else if (*p == '+')
3496            {            {
# Line 1887  while (!done) Line 3499  while (!done)
3499            }            }
3500          else if (*p == '-')          else if (*p == '-')
3501            {            {
3502            pcre_callout = NULL;            SET_PCRE_CALLOUT(NULL);
3503            p++;            p++;
3504            }            }
3505          else if (*p == '!')          else if (*p == '!')
# Line 1925  while (!done) Line 3537  while (!done)
3537  #endif  #endif
3538            use_dfa = 1;            use_dfa = 1;
3539          continue;          continue;
3540    #endif
3541    
3542    #if !defined NODFA
3543          case 'F':          case 'F':
3544          options |= PCRE_DFA_SHORTEST;          options |= PCRE_DFA_SHORTEST;
3545          continue;          continue;
# Line 1939  while (!done) Line 3553  while (!done)
3553            }            }
3554          else if (isalnum(*p))          else if (isalnum(*p))
3555            {            {
3556            uschar *npp = getnamesptr;            READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, re);
3557            while (isalnum(*p)) *npp++ = *p++;            }
3558            *npp++ = 0;          continue;
3559            *npp = 0;  
3560            n = pcre_get_stringnumber(re, (char *)getnamesptr);          case 'J':
3561            if (n < 0)          while(isdigit(*p)) n = n * 10 + *p++ - '0';
3562              fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);          if (extra != NULL
3563            getnamesptr = npp;              && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
3564                && extra->executable_jit != NULL)
3565              {
3566              if (jit_stack != NULL) PCRE_JIT_STACK_FREE(jit_stack);
3567              jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
3568              PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
3569            }            }
3570          continue;          continue;
3571    
# Line 1959  while (!done) Line 3578  while (!done)
3578          continue;          continue;
3579    
3580          case 'N':          case 'N':
3581          options |= PCRE_NOTEMPTY;          if ((options & PCRE_NOTEMPTY) != 0)
3582              options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
3583            else
3584              options |= PCRE_NOTEMPTY;
3585          continue;          continue;
3586    
3587          case 'O':          case 'O':
# Line 1982  while (!done) Line 3604  while (!done)
3604          continue;          continue;
3605    
3606          case 'P':          case 'P':
3607          options |= PCRE_PARTIAL;          options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
3608              PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
3609          continue;          continue;
3610    
3611          case 'Q':          case 'Q':
# Line 2038  while (!done) Line 3661  while (!done)
3661            }            }
3662          continue;          continue;
3663          }          }
3664        *q++ = c;  
3665          /* We now have a character value in c that may be greater than 255. In
3666          16-bit mode, we always convert characters to UTF-8 so that values greater
3667          than 255 can be passed to non-UTF 16-bit strings. In 8-bit mode we
3668          convert to UTF-8 if we are in UTF mode. Values greater than 127 in UTF
3669          mode must have come from \x{...} or octal constructs because values from
3670          \x.. get this far only in non-UTF mode. */
3671    
3672    #if !defined NOUTF || defined SUPPORT_PCRE16
3673          if (use_pcre16 || use_utf)
3674            {
3675            pcre_uint8 buff8[8];
3676            int ii, utn;
3677            utn = ord2utf8(c, buff8);
3678            for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];
3679            }
3680          else
3681    #endif
3682            {
3683            if (c > 255)
3684              {
3685              fprintf(outfile, "** Character \\x{%x} is greater than 255 "
3686                "and UTF-8 mode is not enabled.\n", c);
3687              fprintf(outfile, "** Truncation will probably give the wrong "
3688                "result.\n");
3689              }
3690            *q++ = c;
3691            }
3692        }        }
3693    
3694        /* Reached end of subject string */
3695    
3696      *q = 0;      *q = 0;
3697      len = q - dbuffer;      len = (int)(q - dbuffer);
3698    
3699      /* Move the data to the end of the buffer so that a read over the end of      /* Move the data to the end of the buffer so that a read over the end of
3700      the buffer will be seen by valgrind, even if it doesn't cause a crash. If      the buffer will be seen by valgrind, even if it doesn't cause a crash. If
# Line 2101  while (!done) Line 3754  while (!done)
3754            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
3755              {              {
3756              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
3757              (void)pchars(dbuffer + pmatch[i].rm_so,              PCHARSV(dbuffer, pmatch[i].rm_so,
3758                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3759              fprintf(outfile, "\n");              fprintf(outfile, "\n");
3760              if (i == 0 && do_showrest)              if (do_showcaprest || (i == 0 && do_showrest))
3761                {                {
3762                fprintf(outfile, " 0+ ");                fprintf(outfile, "%2d+ ", (int)i);
3763                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,                PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3764                  outfile);                  outfile);
3765                fprintf(outfile, "\n");                fprintf(outfile, "\n");
3766                }                }
# Line 2115  while (!done) Line 3768  while (!done)
3768            }            }
3769          }          }
3770        free(pmatch);        free(pmatch);
3771          goto NEXT_DATA;
3772        }        }
3773    
3774    #endif  /* !defined NOPOSIX */
3775    
3776      /* Handle matching via the native interface - repeats for /g and /G */      /* Handle matching via the native interface - repeats for /g and /G */
3777    
3778      else  #ifdef SUPPORT_PCRE16
3779  #endif  /* !defined NOPOSIX */      if (use_pcre16)
3780          {
3781          len = to16(TRUE, bptr, (((real_pcre *)re)->options) & PCRE_UTF8, len);
3782          switch(len)
3783            {
3784            case -1:
3785            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3786              "converted to UTF-16\n");
3787            goto NEXT_DATA;
3788    
3789            case -2:
3790            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3791              "cannot be converted to UTF-16\n");
3792            goto NEXT_DATA;
3793    
3794            case -3:
3795            fprintf(outfile, "**Failed: character value greater than 0xffff "
3796              "cannot be converted to 16-bit in non-UTF mode\n");
3797            goto NEXT_DATA;
3798    
3799            default:
3800            break;
3801            }
3802          bptr = (pcre_uint8 *)buffer16;
3803          }
3804    #endif
3805    
3806      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
3807        {        {
3808          markptr = NULL;
3809    
3810        if (timeitm > 0)        if (timeitm > 0)
3811          {          {
3812          register int i;          register int i;
# Line 2135  while (!done) Line 3818  while (!done)
3818            {            {
3819            int workspace[1000];            int workspace[1000];
3820            for (i = 0; i < timeitm; i++)            for (i = 0; i < timeitm; i++)
3821              count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,              {
3822                options | g_notempty, use_offsets, use_size_offsets, workspace,              PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3823                sizeof(workspace)/sizeof(int));                (options | g_notempty), use_offsets, use_size_offsets, workspace,
3824                  (sizeof(workspace)/sizeof(int)));
3825                }
3826            }            }
3827          else          else
3828  #endif  #endif
3829    
3830          for (i = 0; i < timeitm; i++)          for (i = 0; i < timeitm; i++)
3831            count = pcre_exec(re, extra, (char *)bptr, len,            {
3832              start_offset, options | g_notempty, use_offsets, use_size_offsets);            PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3833                (options | g_notempty), use_offsets, use_size_offsets);
3834              }
3835          time_taken = clock() - start_time;          time_taken = clock() - start_time;
3836          fprintf(outfile, "Execute time %.4f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
3837            (((double)time_taken * 1000.0) / (double)timeitm) /            (((double)time_taken * 1000.0) / (double)timeitm) /
# Line 2154  while (!done) Line 3840  while (!done)
3840    
3841        /* If find_match_limit is set, we want to do repeated matches with        /* If find_match_limit is set, we want to do repeated matches with
3842        varying limits in order to find the minimum value for the match limit and        varying limits in order to find the minimum value for the match limit and
3843        for the recursion limit. */        for the recursion limit. The match limits are relevant only to the normal
3844          running of pcre_exec(), so disable the JIT optimization. This makes it
3845          possible to run the same set of tests with and without JIT externally
3846          requested. */
3847    
3848        if (find_match_limit)        if (find_match_limit)
3849          {          {
# Line 2163  while (!done) Line 3852  while (!done)
3852            extra = (pcre_extra *)malloc(sizeof(pcre_extra));            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3853            extra->flags = 0;            extra->flags = 0;
3854            }            }
3855            else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
3856    
3857          (void)check_match_limit(re, extra, bptr, len, start_offset,          (void)check_match_limit(re, extra, bptr, len, start_offset,
3858            options|g_notempty, use_offsets, use_size_offsets,            options|g_notempty, use_offsets, use_size_offsets,
# Line 2186  while (!done) Line 3876  while (!done)
3876            }            }
3877          extra->flags |= PCRE_EXTRA_CALLOUT_DATA;          extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
3878          extra->callout_data = &callout_data;          extra->callout_data = &callout_data;
3879          count = pcre_exec(re, extra, (char *)bptr, len, start_offset,          PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3880            options | g_notempty, use_offsets, use_size_offsets);            options | g_notempty, use_offsets, use_size_offsets);
3881          extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;          extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
3882          }          }
# Line 2198  while (!done) Line 3888  while (!done)
3888        else if (all_use_dfa || use_dfa)        else if (all_use_dfa || use_dfa)
3889          {          {
3890          int workspace[1000];          int workspace[1000];
3891          count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,          PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3892            options | g_notempty, use_offsets, use_size_offsets, workspace,            (options | g_notempty), use_offsets, use_size_offsets, workspace,
3893            sizeof(workspace)/sizeof(int));            (sizeof(workspace)/sizeof(int)));
3894          if (count == 0)          if (count == 0)
3895            {            {
3896            fprintf(outfile, "Matched, but too many subsidiary matches\n");            fprintf(outfile, "Matched, but too many subsidiary matches\n");
# Line 2211  while (!done) Line 3901  while (!done)
3901    
3902        else        else
3903          {          {
3904          count = pcre_exec(re, extra, (char *)bptr, len,          PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3905            start_offset, options | g_notempty, use_offsets, use_size_offsets);            options | g_notempty, use_offsets, use_size_offsets);
3906          if (count == 0)          if (count == 0)
3907            {            {
3908            fprintf(outfile, "Matched, but too many substrings\n");            fprintf(outfile, "Matched, but too many substrings\n");
# Line 2225  while (!done) Line 3915  while (!done)
3915        if (count >= 0)        if (count >= 0)
3916          {          {
3917          int i, maxcount;          int i, maxcount;
3918            void *cnptr, *gnptr;
3919    
3920  #if !defined NODFA  #if !defined NODFA
3921          if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else          if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
# Line 2246  while (!done) Line 3937  while (!done)
3937              }              }
3938            }            }
3939    
3940            /* do_allcaps requests showing of all captures in the pattern, to check
3941            unset ones at the end. */
3942    
3943            if (do_allcaps)
3944              {
3945              if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
3946                goto SKIP_DATA;
3947              count++;   /* Allow for full match */
3948              if (count * 2 > use_size_offsets) count = use_size_offsets/2;
3949              }
3950    
3951            /* Output the captured substrings */
3952    
3953          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
3954            {            {
3955            if (use_offsets[i] < 0)            if (use_offsets[i] < 0)
3956                {
3957                if (use_offsets[i] != -1)
3958                  fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3959                    use_offsets[i], i);
3960                if (use_offsets[i+1] != -1)
3961                  fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3962                    use_offsets[i+1], i+1);
3963              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
3964                }
3965            else            else
3966              {              {
3967              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
3968              (void)pchars(bptr + use_offsets[i],              PCHARSV(bptr, use_offsets[i],
3969                use_offsets[i+1] - use_offsets[i], outfile);                use_offsets[i+1] - use_offsets[i], outfile);
3970              fprintf(outfile, "\n");              fprintf(outfile, "\n");
3971              if (i == 0)              if (do_showcaprest || (i == 0 && do_showrest))
3972                {                {
3973                if (do_showrest)                fprintf(outfile, "%2d+ ", i/2);
3974                  {                PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
3975                  fprintf(outfile, " 0+ ");                  outfile);
3976                  (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],                fprintf(outfile, "\n");
                   outfile);  
                 fprintf(outfile, "\n");  
                 }  
3977                }                }
3978              }              }
3979            }            }
3980    
3981            if (markptr != NULL)
3982              {
3983              fprintf(outfile, "MK: ");
3984              PCHARSV(markptr, 0, -1, outfile);
3985              fprintf(outfile, "\n");
3986              }
3987    
3988          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
3989            {            {
3990            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
3991              {              {
3992                int rc;
3993              char copybuffer[256];              char copybuffer[256];
3994              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,              PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
3995                i, copybuffer, sizeof(copybuffer));                copybuffer, sizeof(copybuffer));
3996              if (rc < 0)              if (rc < 0)
3997                fprintf(outfile, "copy substring %d failed %d\n", i, rc);                fprintf(outfile, "copy substring %d failed %d\n", i, rc);
3998              else              else
3999                fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);                {
4000                  fprintf(outfile, "%2dC ", i);
4001                  PCHARSV(copybuffer, 0, rc, outfile);
4002                  fprintf(outfile, " (%d)\n", rc);
4003                  }
4004              }              }
4005            }            }
4006    
4007          for (copynamesptr = copynames;          cnptr = copynames;
4008               *copynamesptr != 0;          for (;;)
              copynamesptr += (int)strlen((char*)copynamesptr) + 1)  
4009            {            {
4010              int rc;
4011            char copybuffer[256];            char copybuffer[256];
4012            int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,  
4013              count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));            if (use_pcre16)
4014                {
4015                if (*(pcre_uint16 *)cnptr == 0) break;
4016                }
4017              else
4018                {
4019                if (*(pcre_uint8 *)cnptr == 0) break;
4020                }
4021    
4022              PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4023                cnptr, copybuffer, sizeof(copybuffer));
4024    
4025            if (rc < 0)            if (rc < 0)
4026              fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);              {
4027                fprintf(outfile, "copy substring ");
4028                PCHARSV(cnptr, 0, -1, outfile);
4029                fprintf(outfile, " failed %d\n", rc);
4030                }
4031            else            else
4032              fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);              {
4033                fprintf(outfile, "  C ");
4034                PCHARSV(copybuffer, 0, rc, outfile);
4035                fprintf(outfile, " (%d) ", rc);
4036                PCHARSV(cnptr, 0, -1, outfile);
4037                putc('\n', outfile);
4038                }
4039    
4040              cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
4041            }            }
4042    
4043          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
4044            {            {
4045            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
4046              {              {
4047                int rc;
4048              const char *substring;              const char *substring;
4049              int rc = pcre_get_substring((char *)bptr, use_offsets, count,              PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
               i, &substring);  
4050              if (rc < 0)              if (rc < 0)
4051                fprintf(outfile, "get substring %d failed %d\n", i, rc);                fprintf(outfile, "get substring %d failed %d\n", i, rc);
4052              else              else
4053                {                {
4054                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);                fprintf(outfile, "%2dG ", i);
4055                pcre_free_substring(substring);                PCHARSV(substring, 0, rc, outfile);
4056                  fprintf(outfile, " (%d)\n", rc);
4057                  PCRE_FREE_SUBSTRING(substring);
4058                }                }
4059              }              }
4060            }            }
4061    
4062          for (getnamesptr = getnames;          gnptr = getnames;
4063               *getnamesptr != 0;          for (;;)
              getnamesptr += (int)strlen((char*)getnamesptr) + 1)  
4064            {            {
4065              int rc;
4066            const char *substring;            const char *substring;
4067            int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,  
4068              count, (char *)getnamesptr, &substring);            if (use_pcre16)
4069                {
4070                if (*(pcre_uint16 *)gnptr == 0) break;
4071                }
4072              else
4073                {
4074                if (*(pcre_uint8 *)gnptr == 0) break;
4075                }
4076    
4077              PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4078                gnptr, &substring);
4079            if (rc < 0)            if (rc < 0)
4080              fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);              {
4081                fprintf(outfile, "get substring ");
4082                PCHARSV(gnptr, 0, -1, outfile);
4083                fprintf(outfile, " failed %d\n", rc);
4084                }
4085            else            else
4086              {              {
4087              fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);              fprintf(outfile, "  G ");
4088              pcre_free_substring(substring);              PCHARSV(substring, 0, rc, outfile);
4089                fprintf(outfile, " (%d) ", rc);
4090                PCHARSV(gnptr, 0, -1, outfile);
4091                PCRE_FREE_SUBSTRING(substring);
4092                putc('\n', outfile);
4093              }              }
4094    
4095              gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
4096            }            }
4097    
4098          if (getlist)          if (getlist)
4099            {            {
4100              int rc;
4101            const char **stringlist;            const char **stringlist;
4102            int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,            PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
             &stringlist);  
4103            if (rc < 0)            if (rc < 0)
4104              fprintf(outfile, "get substring list failed %d\n", rc);              fprintf(outfile, "get substring list failed %d\n", rc);
4105            else            else
4106              {              {
4107              for (i = 0; i < count; i++)              for (i = 0; i < count; i++)
4108                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);                {
4109                  fprintf(outfile, "%2dL ", i);
4110                  PCHARSV(stringlist[i], 0, -1, outfile);
4111                  putc('\n', outfile);
4112                  }
4113              if (stringlist[i] != NULL)              if (stringlist[i] != NULL)
4114                fprintf(outfile, "string list not terminated by NULL\n");                fprintf(outfile, "string list not terminated by NULL\n");
4115              /* free((void *)stringlist); */              PCRE_FREE_SUBSTRING_LIST(stringlist);
             pcre_free_substring_list(stringlist);  
4116              }              }
4117            }            }
4118          }          }
# Line 2352  while (!done) Line 4121  while (!done)
4121    
4122        else if (count == PCRE_ERROR_PARTIAL)        else if (count == PCRE_ERROR_PARTIAL)
4123          {          {
4124          fprintf(outfile, "Partial match");          if (markptr == NULL) fprintf(outfile, "Partial match");
4125  #if !defined NODFA          else
4126          if ((all_use_dfa || use_dfa) && use_size_offsets > 2)            {
4127            fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],            fprintf(outfile, "Partial match, mark=");
4128              bptr + use_offsets[0]);            PCHARSV(markptr, 0, -1, outfile);
4129  #endif            }
4130            if (use_size_offsets > 1)
4131              {
4132              fprintf(outfile, ": ");
4133              PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
4134                outfile);
4135              }
4136          fprintf(outfile, "\n");          fprintf(outfile, "\n");
4137          break;  /* Out of the /g loop */          break;  /* Out of the /g loop */
4138          }          }
# Line 2367  while (!done) Line 4142  while (!done)
4142        to advance the start offset, and continue. We won't be at the end of the        to advance the start offset, and continue. We won't be at the end of the
4143        string - that was checked before setting g_notempty.        string - that was checked before setting g_notempty.
4144    
4145        Complication arises in the case when the newline option is "any" or        Complication arises in the case when the newline convention is "any",
4146        "anycrlf". If the previous match was at the end of a line terminated by        "crlf", or "anycrlf". If the previous match was at the end of a line
4147        CRLF, an advance of one character just passes the \r, whereas we should        terminated by CRLF, an advance of one character just passes the \r,
4148        prefer the longer newline sequence, as does the code in pcre_exec().        whereas we should prefer the longer newline sequence, as does the code in
4149        Fudge the offset value to achieve this.        pcre_exec(). Fudge the offset value to achieve this. We check for a
4150          newline setting in the pattern; if none was set, use PCRE_CONFIG() to
4151          find the default.
4152    
4153        Otherwise, in the case of UTF-8 matching, the advance must be one        Otherwise, in the case of UTF-8 matching, the advance must be one
4154        character, not one byte. */        character, not one byte. */
# Line 2386  while (!done) Line 4163  while (!done)
4163            if ((obits & PCRE_NEWLINE_BITS) == 0)            if ((obits & PCRE_NEWLINE_BITS) == 0)
4164              {              {
4165              int d;              int d;
4166              (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);              (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
4167              /* Note that these values are always the ASCII ones, even in              /* Note that these values are always the ASCII ones, even in
4168              EBCDIC environments. CR = 13, NL = 10. */              EBCDIC environments. CR = 13, NL = 10. */
4169              obits = (d == 13)? PCRE_NEWLINE_CR :              obits = (d == 13)? PCRE_NEWLINE_CR :
# Line 2396  while (!done) Line 4173  while (!done)
4173                      (d == -1)? PCRE_NEWLINE_ANY : 0;                      (d == -1)? PCRE_NEWLINE_ANY : 0;
4174              }              }
4175            if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||            if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
4176                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
4177                 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)                 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
4178                &&                &&
4179                start_offset < len - 1 &&                start_offset < len - 1 &&
4180                bptr[start_offset] == '\r' &&  #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
4181                bptr[start_offset+1] == '\n')                (use_pcre16?
4182                       ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4183                    && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4184                  :
4185                       bptr[start_offset] == '\r'
4186                    && bptr[start_offset + 1] == '\n')
4187    #elif defined SUPPORT_PCRE16
4188                     ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4189                  && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4190    #else
4191                     bptr[start_offset] == '\r'
4192                  && bptr[start_offset + 1] == '\n'
4193    #endif
4194                  )
4195              onechar++;              onechar++;
4196            else if (use_utf8)            else if (use_utf)
4197              {              {
4198              while (start_offset + onechar < len)              while (start_offset + onechar < len)
4199                {                {
4200                int tb = bptr[start_offset+onechar];                if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
4201                if (tb <= 127) break;                onechar++;
               tb &= 0xc0;  
               if (tb != 0 && tb != 0xc0) onechar++;  
4202                }                }
4203              }              }
4204            use_offsets[1] = start_offset + onechar;            use_offsets[1] = start_offset + onechar;
4205            }            }
4206          else          else
4207            {            {
4208            if (count == PCRE_ERROR_NOMATCH)            switch(count)
4209              {              {
4210              if (gmatched == 0) fprintf(outfile, "No match\n");              case PCRE_ERROR_NOMATCH:
4211                if (gmatched == 0)
4212                  {
4213                  if (markptr == NULL)
4214                    {
4215                    fprintf(outfile, "No match\n");
4216                    }
4217                  else
4218                    {
4219                    fprintf(outfile, "No match, mark = ");
4220             &