/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 689 by ph10, Fri Sep 9 10:34:57 2011 UTC revision 936 by ph10, Sat Feb 25 17:02:23 2012 UTC
# Line 4  Line 4 
4    
5  /* This program was hacked up as a tester for PCRE. I really should have  /* This program was hacked up as a tester for PCRE. I really should have
6  written it more tidily in the first place. Will I ever learn? It has grown and  written it more tidily in the first place. Will I ever learn? It has grown and
7  been extended and consequently is now rather, er, *very* untidy in places.  been extended and consequently is now rather, er, *very* untidy in places. The
8    addition of 16-bit support has made it even worse. :-(
9    
10  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
11  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 35  POSSIBILITY OF SUCH DAMAGE. Line 36  POSSIBILITY OF SUCH DAMAGE.
36  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
37  */  */
38    
39    /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40    libraries in a single program. This is different from the modules such as
41    pcre_compile.c in the library itself, which are compiled separately for each
42    mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43    (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44    compiled only once. Therefore, it must not make use of any of the macros from
45    pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46    however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47    only supported library functions. */
48    
49    
50  #ifdef HAVE_CONFIG_H  #ifdef HAVE_CONFIG_H
51  #include "config.h"  #include "config.h"
# Line 48  POSSIBILITY OF SUCH DAMAGE. Line 59  POSSIBILITY OF SUCH DAMAGE.
59  #include <locale.h>  #include <locale.h>
60  #include <errno.h>  #include <errno.h>
61    
62  #ifdef SUPPORT_LIBREADLINE  /* Both libreadline and libedit are optionally supported. The user-supplied
63    original patch uses readline/readline.h for libedit, but in at least one system
64    it is installed as editline/readline.h, so the configuration code now looks for
65    that first, falling back to readline/readline.h. */
66    
67    #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
68  #ifdef HAVE_UNISTD_H  #ifdef HAVE_UNISTD_H
69  #include <unistd.h>  #include <unistd.h>
70  #endif  #endif
71    #if defined(SUPPORT_LIBREADLINE)
72  #include <readline/readline.h>  #include <readline/readline.h>
73  #include <readline/history.h>  #include <readline/history.h>
74    #else
75    #if defined(HAVE_EDITLINE_READLINE_H)
76    #include <editline/readline.h>
77    #else
78    #include <readline/readline.h>
79    #endif
80    #endif
81  #endif  #endif
   
82    
83  /* A number of things vary for Windows builds. Originally, pcretest opened its  /* A number of things vary for Windows builds. Originally, pcretest opened its
84  input and output without "b"; then I was told that "b" was needed in some  input and output without "b"; then I was told that "b" was needed in some
# Line 94  input mode under Windows. */ Line 117  input mode under Windows. */
117  #define OUTPUT_MODE  "wb"  #define OUTPUT_MODE  "wb"
118  #endif  #endif
119    
120    #define PRIV(name) name
121    
122  /* We have to include pcre_internal.h because we need the internal info for  /* We have to include pcre_internal.h because we need the internal info for
123  displaying the results of pcre_study() and we also need to know about the  displaying the results of pcre_study() and we also need to know about the
# Line 105  here before pcre_internal.h so that the Line 129  here before pcre_internal.h so that the
129  appropriately for an application, not for building PCRE. */  appropriately for an application, not for building PCRE. */
130    
131  #include "pcre.h"  #include "pcre.h"
132    
133    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8
134    /* Configure internal macros to 16 bit mode. */
135    #define COMPILE_PCRE16
136    #endif
137    
138  #include "pcre_internal.h"  #include "pcre_internal.h"
139    
140    /* The pcre_printint() function, which prints the internal form of a compiled
141    regex, is held in a separate file so that (a) it can be compiled in either
142    8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
143    when that is compiled in debug mode. */
144    
145    #ifdef SUPPORT_PCRE8
146    void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
147    #endif
148    #ifdef SUPPORT_PCRE16
149    void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
150    #endif
151    
152  /* We need access to some of the data tables that PCRE uses. So as not to have  /* We need access to some of the data tables that PCRE uses. So as not to have
153  to keep two copies, we include the source file here, changing the names of the  to keep two copies, we include the source file here, changing the names of the
154  external symbols to prevent clashes. */  external symbols to prevent clashes. */
155    
156  #define _pcre_ucp_gentype      ucp_gentype  #define PCRE_INCLUDED
 #define _pcre_ucp_typerange    ucp_typerange  
 #define _pcre_utf8_table1      utf8_table1  
 #define _pcre_utf8_table1_size utf8_table1_size  
 #define _pcre_utf8_table2      utf8_table2  
 #define _pcre_utf8_table3      utf8_table3  
 #define _pcre_utf8_table4      utf8_table4  
 #define _pcre_utf8_char_sizes  utf8_char_sizes  
 #define _pcre_utt              utt  
 #define _pcre_utt_size         utt_size  
 #define _pcre_utt_names        utt_names  
 #define _pcre_OP_lengths       OP_lengths  
157    
158  #include "pcre_tables.c"  #include "pcre_tables.c"
159    
 /* We also need the pcre_printint() function for printing out compiled  
 patterns. This function is in a separate file so that it can be included in  
 pcre_compile.c when that module is compiled with debugging enabled. It needs to  
 know which case is being compiled. */  
   
 #define COMPILING_PCRETEST  
 #include "pcre_printint.src"  
   
160  /* The definition of the macro PRINTABLE, which determines whether to print an  /* The definition of the macro PRINTABLE, which determines whether to print an
161  output character as-is or as a hex value when showing compiled patterns, is  output character as-is or as a hex value when showing compiled patterns, is
162  contained in the printint.src file. We uses it here also, in cases when the  the same as in the printint.src file. We uses it here in cases when the locale
163  locale has not been explicitly changed, so as to get consistent output from  has not been explicitly changed, so as to get consistent output from systems
164  systems that differ in their output from isprint() even in the "C" locale. */  that differ in their output from isprint() even in the "C" locale. */
165    
166    #ifdef EBCDIC
167    #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
168    #else
169    #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
170    #endif
171    
172    #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
173    
174  #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))  /* Posix support is disabled in 16 bit only mode. */
175    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX
176    #define NOPOSIX
177    #endif
178    
179  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
180  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 150  Makefile. */ Line 184  Makefile. */
184  #include "pcreposix.h"  #include "pcreposix.h"
185  #endif  #endif
186    
187  /* It is also possible, for the benefit of the version currently imported into  /* It is also possible, originally for the benefit of a version that was
188  Exim, to build pcretest without support for UTF8 (define NOUTF8), without the  imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
189  interface to the DFA matcher (NODFA), and without the doublecheck of the old  NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
190  "info" function (define NOINFOCHECK). In fact, we automatically cut out the  automatically cut out the UTF support if PCRE is built without it. */
191  UTF8 support if PCRE is built without it. */  
192    #ifndef SUPPORT_UTF
193  #ifndef SUPPORT_UTF8  #ifndef NOUTF
194  #ifndef NOUTF8  #define NOUTF
 #define NOUTF8  
195  #endif  #endif
196  #endif  #endif
197    
198    /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
199    for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
200    only from one place and is handled differently). I couldn't dream up any way of
201    using a single macro to do this in a generic way, because of the many different
202    argument requirements. We know that at least one of SUPPORT_PCRE8 and
203    SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
204    use these in the definitions of generic macros.
205    
206    **** Special note about the PCHARSxxx macros: the address of the string to be
207    printed is always given as two arguments: a base address followed by an offset.
208    The base address is cast to the correct data size for 8 or 16 bit data; the
209    offset is in units of this size. If the string were given as base+offset in one
210    argument, the casting might be incorrectly applied. */
211    
212    #ifdef SUPPORT_PCRE8
213    
214    #define PCHARS8(lv, p, offset, len, f) \
215      lv = pchars((pcre_uint8 *)(p) + offset, len, f)
216    
217    #define PCHARSV8(p, offset, len, f) \
218      (void)pchars((pcre_uint8 *)(p) + offset, len, f)
219    
220    #define READ_CAPTURE_NAME8(p, cn8, cn16, re) \
221      p = read_capture_name8(p, cn8, re)
222    
223    #define STRLEN8(p) ((int)strlen((char *)p))
224    
225    #define SET_PCRE_CALLOUT8(callout) \
226      pcre_callout = callout
227    
228    #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
229       pcre_assign_jit_stack(extra, callback, userdata)
230    
231    #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
232      re = pcre_compile((char *)pat, options, error, erroffset, tables)
233    
234    #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
235        namesptr, cbuffer, size) \
236      rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
237        (char *)namesptr, cbuffer, size)
238    
239    #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
240      rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
241    
242    #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
243        offsets, size_offsets, workspace, size_workspace) \
244      count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
245        offsets, size_offsets, workspace, size_workspace)
246    
247    #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
248        offsets, size_offsets) \
249      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
250        offsets, size_offsets)
251    
252    #define PCRE_FREE_STUDY8(extra) \
253      pcre_free_study(extra)
254    
255    #define PCRE_FREE_SUBSTRING8(substring) \
256      pcre_free_substring(substring)
257    
258    #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
259      pcre_free_substring_list(listptr)
260    
261    #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
262        getnamesptr, subsptr) \
263      rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
264        (char *)getnamesptr, subsptr)
265    
266    #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
267      n = pcre_get_stringnumber(re, (char *)ptr)
268    
269    #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
270      rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
271    
272    #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
273      rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
274    
275    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
276      rc = pcre_pattern_to_host_byte_order(re, extra, tables)
277    
278    #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
279      pcre_printint(re, outfile, debug_lengths)
280    
281    #define PCRE_STUDY8(extra, re, options, error) \
282      extra = pcre_study(re, options, error)
283    
284    #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
285      pcre_jit_stack_alloc(startsize, maxsize)
286    
287    #define PCRE_JIT_STACK_FREE8(stack) \
288      pcre_jit_stack_free(stack)
289    
290    #endif /* SUPPORT_PCRE8 */
291    
292    /* -----------------------------------------------------------*/
293    
294    #ifdef SUPPORT_PCRE16
295    
296    #define PCHARS16(lv, p, offset, len, f) \
297      lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
298    
299    #define PCHARSV16(p, offset, len, f) \
300      (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
301    
302    #define READ_CAPTURE_NAME16(p, cn8, cn16, re) \
303      p = read_capture_name16(p, cn16, re)
304    
305    #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
306    
307    #define SET_PCRE_CALLOUT16(callout) \
308      pcre16_callout = (int (*)(pcre16_callout_block *))callout
309    
310    #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
311      pcre16_assign_jit_stack((pcre16_extra *)extra, \
312        (pcre16_jit_callback)callback, userdata)
313    
314    #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
315      re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
316        tables)
317    
318    #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
319        namesptr, cbuffer, size) \
320      rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
321        count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
322    
323    #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
324      rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
325        (PCRE_UCHAR16 *)cbuffer, size/2)
326    
327    #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
328        offsets, size_offsets, workspace, size_workspace) \
329      count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
330        (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
331        workspace, size_workspace)
332    
333    #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
334        offsets, size_offsets) \
335      count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
336        len, start_offset, options, offsets, size_offsets)
337    
338    #define PCRE_FREE_STUDY16(extra) \
339      pcre16_free_study((pcre16_extra *)extra)
340    
341    #define PCRE_FREE_SUBSTRING16(substring) \
342      pcre16_free_substring((PCRE_SPTR16)substring)
343    
344    #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
345      pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
346    
347    #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
348        getnamesptr, subsptr) \
349      rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
350        count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
351    
352    #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
353      n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
354    
355    #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
356      rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
357        (PCRE_SPTR16 *)(void*)subsptr)
358    
359    #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
360      rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
361        (PCRE_SPTR16 **)(void*)listptr)
362    
363    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
364      rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
365        tables)
366    
367    #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
368      pcre16_printint(re, outfile, debug_lengths)
369    
370    #define PCRE_STUDY16(extra, re, options, error) \
371      extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
372    
373    #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
374      (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
375    
376    #define PCRE_JIT_STACK_FREE16(stack) \
377      pcre16_jit_stack_free((pcre16_jit_stack *)stack)
378    
379    #endif /* SUPPORT_PCRE16 */
380    
381    
382    /* ----- Both modes are supported; a runtime test is needed, except for
383    pcre_config(), and the JIT stack functions, when it doesn't matter which
384    version is called. ----- */
385    
386    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
387    
388    #define CHAR_SIZE (use_pcre16? 2:1)
389    
390    #define PCHARS(lv, p, offset, len, f) \
391      if (use_pcre16) \
392        PCHARS16(lv, p, offset, len, f); \
393      else \
394        PCHARS8(lv, p, offset, len, f)
395    
396    #define PCHARSV(p, offset, len, f) \
397      if (use_pcre16) \
398        PCHARSV16(p, offset, len, f); \
399      else \
400        PCHARSV8(p, offset, len, f)
401    
402    #define READ_CAPTURE_NAME(p, cn8, cn16, re) \
403      if (use_pcre16) \
404        READ_CAPTURE_NAME16(p, cn8, cn16, re); \
405      else \
406        READ_CAPTURE_NAME8(p, cn8, cn16, re)
407    
408    #define SET_PCRE_CALLOUT(callout) \
409      if (use_pcre16) \
410        SET_PCRE_CALLOUT16(callout); \
411      else \
412        SET_PCRE_CALLOUT8(callout)
413    
414    #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
415    
416    #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
417      if (use_pcre16) \
418        PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
419      else \
420        PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
421    
422    #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
423      if (use_pcre16) \
424        PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
425      else \
426        PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
427    
428    #define PCRE_CONFIG pcre_config
429    
430    #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
431        namesptr, cbuffer, size) \
432      if (use_pcre16) \
433        PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
434          namesptr, cbuffer, size); \
435      else \
436        PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
437          namesptr, cbuffer, size)
438    
439    #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
440      if (use_pcre16) \
441        PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
442      else \
443        PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
444    
445    #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
446        offsets, size_offsets, workspace, size_workspace) \
447      if (use_pcre16) \
448        PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
449          offsets, size_offsets, workspace, size_workspace); \
450      else \
451        PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
452          offsets, size_offsets, workspace, size_workspace)
453    
454    #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
455        offsets, size_offsets) \
456      if (use_pcre16) \
457        PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
458          offsets, size_offsets); \
459      else \
460        PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
461          offsets, size_offsets)
462    
463    #define PCRE_FREE_STUDY(extra) \
464      if (use_pcre16) \
465        PCRE_FREE_STUDY16(extra); \
466      else \
467        PCRE_FREE_STUDY8(extra)
468    
469    #define PCRE_FREE_SUBSTRING(substring) \
470      if (use_pcre16) \
471        PCRE_FREE_SUBSTRING16(substring); \
472      else \
473        PCRE_FREE_SUBSTRING8(substring)
474    
475    #define PCRE_FREE_SUBSTRING_LIST(listptr) \
476      if (use_pcre16) \
477        PCRE_FREE_SUBSTRING_LIST16(listptr); \
478      else \
479        PCRE_FREE_SUBSTRING_LIST8(listptr)
480    
481    #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
482        getnamesptr, subsptr) \
483      if (use_pcre16) \
484        PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
485          getnamesptr, subsptr); \
486      else \
487        PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
488          getnamesptr, subsptr)
489    
490    #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
491      if (use_pcre16) \
492        PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
493      else \
494        PCRE_GET_STRINGNUMBER8(n, rc, ptr)
495    
496    #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
497      if (use_pcre16) \
498        PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
499      else \
500        PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
501    
502    #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
503      if (use_pcre16) \
504        PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
505      else \
506        PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
507    
508    #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
509      (use_pcre16 ? \
510         PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
511        :PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
512    
513    #define PCRE_JIT_STACK_FREE(stack) \
514      if (use_pcre16) \
515        PCRE_JIT_STACK_FREE16(stack); \
516      else \
517        PCRE_JIT_STACK_FREE8(stack)
518    
519    #define PCRE_MAKETABLES \
520      (use_pcre16? pcre16_maketables() : pcre_maketables())
521    
522    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
523      if (use_pcre16) \
524        PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
525      else \
526        PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
527    
528    #define PCRE_PRINTINT(re, outfile, debug_lengths) \
529      if (use_pcre16) \
530        PCRE_PRINTINT16(re, outfile, debug_lengths); \
531      else \
532        PCRE_PRINTINT8(re, outfile, debug_lengths)
533    
534    #define PCRE_STUDY(extra, re, options, error) \
535      if (use_pcre16) \
536        PCRE_STUDY16(extra, re, options, error); \
537      else \
538        PCRE_STUDY8(extra, re, options, error)
539    
540    /* ----- Only 8-bit mode is supported ----- */
541    
542    #elif defined SUPPORT_PCRE8
543    #define CHAR_SIZE                 1
544    #define PCHARS                    PCHARS8
545    #define PCHARSV                   PCHARSV8
546    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME8
547    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT8
548    #define STRLEN                    STRLEN8
549    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK8
550    #define PCRE_COMPILE              PCRE_COMPILE8
551    #define PCRE_CONFIG               pcre_config
552    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
553    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING8
554    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC8
555    #define PCRE_EXEC                 PCRE_EXEC8
556    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY8
557    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING8
558    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST8
559    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING8
560    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER8
561    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING8
562    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST8
563    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC8
564    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE8
565    #define PCRE_MAKETABLES           pcre_maketables()
566    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
567    #define PCRE_PRINTINT             PCRE_PRINTINT8
568    #define PCRE_STUDY                PCRE_STUDY8
569    
570    /* ----- Only 16-bit mode is supported ----- */
571    
572    #else
573    #define CHAR_SIZE                 2
574    #define PCHARS                    PCHARS16
575    #define PCHARSV                   PCHARSV16
576    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME16
577    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT16
578    #define STRLEN                    STRLEN16
579    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK16
580    #define PCRE_COMPILE              PCRE_COMPILE16
581    #define PCRE_CONFIG               pcre16_config
582    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
583    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING16
584    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC16
585    #define PCRE_EXEC                 PCRE_EXEC16
586    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY16
587    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING16
588    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST16
589    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING16
590    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER16
591    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING16
592    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST16
593    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC16
594    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE16
595    #define PCRE_MAKETABLES           pcre16_maketables()
596    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
597    #define PCRE_PRINTINT             PCRE_PRINTINT16
598    #define PCRE_STUDY                PCRE_STUDY16
599    #endif
600    
601    /* ----- End of mode-specific function call macros ----- */
602    
603    
604  /* Other parameters */  /* Other parameters */
605    
# Line 187  static int callout_fail_count; Line 625  static int callout_fail_count;
625  static int callout_fail_id;  static int callout_fail_id;
626  static int debug_lengths;  static int debug_lengths;
627  static int first_callout;  static int first_callout;
628    static int jit_was_used;
629  static int locale_set = 0;  static int locale_set = 0;
630  static int show_malloc;  static int show_malloc;
631  static int use_utf8;  static int use_utf;
632  static size_t gotten_store;  static size_t gotten_store;
633    static size_t first_gotten_store = 0;
634  static const unsigned char *last_callout_mark = NULL;  static const unsigned char *last_callout_mark = NULL;
635    
636  /* The buffers grow automatically if very long input lines are encountered. */  /* The buffers grow automatically if very long input lines are encountered. */
637    
638  static int buffer_size = 50000;  static int buffer_size = 50000;
639  static uschar *buffer = NULL;  static pcre_uint8 *buffer = NULL;
640  static uschar *dbuffer = NULL;  static pcre_uint8 *dbuffer = NULL;
641  static uschar *pbuffer = NULL;  static pcre_uint8 *pbuffer = NULL;
642    
643    /* Another buffer is needed translation to 16-bit character strings. It will
644    obtained and extended as required. */
645    
646    #ifdef SUPPORT_PCRE16
647    static int buffer16_size = 0;
648    static pcre_uint16 *buffer16 = NULL;
649    
650    #ifdef SUPPORT_PCRE8
651    
652    /* We need the table of operator lengths that is used for 16-bit compiling, in
653    order to swap bytes in a pattern for saving/reloading testing. Luckily, the
654    data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
655    appropriately for the 16-bit world. Just as a safety check, make sure that
656    COMPILE_PCRE16 is *not* set. */
657    
658    #ifdef COMPILE_PCRE16
659    #error COMPILE_PCRE16 must not be set when compiling pcretest.c
660    #endif
661    
662    #if LINK_SIZE == 2
663    #undef LINK_SIZE
664    #define LINK_SIZE 1
665    #elif LINK_SIZE == 3 || LINK_SIZE == 4
666    #undef LINK_SIZE
667    #define LINK_SIZE 2
668    #else
669    #error LINK_SIZE must be either 2, 3, or 4
670    #endif
671    
672    #undef IMM2_SIZE
673    #define IMM2_SIZE 1
674    
675    #endif /* SUPPORT_PCRE8 */
676    
677    static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
678    #endif  /* SUPPORT_PCRE16 */
679    
680    /* If we have 8-bit support, default use_pcre16 to false; if there is also
681    16-bit support, it can be changed by an option. If there is no 8-bit support,
682    there must be 16-bit support, so default it to 1. */
683    
684    #ifdef SUPPORT_PCRE8
685    static int use_pcre16 = 0;
686    #else
687    static int use_pcre16 = 1;
688    #endif
689    
690    /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
691    
692    static int jit_study_bits[] =
693      {
694      PCRE_STUDY_JIT_COMPILE,
695      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
696      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
697      PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
698      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
699      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
700      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
701        PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
702    };
703    
704  /* Textual explanations for runtime error codes */  /* Textual explanations for runtime error codes */
705    
# Line 213  static const char *errtexts[] = { Line 714  static const char *errtexts[] = {
714    NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */    NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */
715    "match limit exceeded",    "match limit exceeded",
716    "callout error code",    "callout error code",
717    NULL,  /* BADUTF8 is handled specially */    NULL,  /* BADUTF8/16 is handled specially */
718    "bad UTF-8 offset",    NULL,  /* BADUTF8/16 offset is handled specially */
719    NULL,  /* PARTIAL is handled specially */    NULL,  /* PARTIAL is handled specially */
720    "not used - internal error",    "not used - internal error",
721    "internal error - pattern overwritten?",    "internal error - pattern overwritten?",
# Line 228  static const char *errtexts[] = { Line 729  static const char *errtexts[] = {
729    "not used - internal error",    "not used - internal error",
730    "invalid combination of newline options",    "invalid combination of newline options",
731    "bad offset value",    "bad offset value",
732    NULL,  /* SHORTUTF8 is handled specially */    NULL,  /* SHORTUTF8/16 is handled specially */
733    "nested recursion at the same subject position",    "nested recursion at the same subject position",
734    "JIT stack limit reached"    "JIT stack limit reached",
735      "pattern compiled in wrong mode: 8-bit/16-bit error"
736  };  };
737    
738    
# Line 246  the L (locale) option also adjusts the t Line 748  the L (locale) option also adjusts the t
748  /* This is the set of tables distributed as default with PCRE. It recognizes  /* This is the set of tables distributed as default with PCRE. It recognizes
749  only ASCII characters. */  only ASCII characters. */
750    
751  static const unsigned char tables0[] = {  static const pcre_uint8 tables0[] = {
752    
753  /* This table is a lower casing table. */  /* This table is a lower casing table. */
754    
# Line 419  graph, print, punct, and cntrl. Other cl Line 921  graph, print, punct, and cntrl. Other cl
921  be at least an approximation of ISO 8859. In particular, there are characters  be at least an approximation of ISO 8859. In particular, there are characters
922  greater than 128 that are marked as spaces, letters, etc. */  greater than 128 that are marked as spaces, letters, etc. */
923    
924  static const unsigned char tables1[] = {  static const pcre_uint8 tables1[] = {
925  0,1,2,3,4,5,6,7,  0,1,2,3,4,5,6,7,
926  8,9,10,11,12,13,14,15,  8,9,10,11,12,13,14,15,
927  16,17,18,19,20,21,22,23,  16,17,18,19,20,21,22,23,
# Line 588  return sys_errlist[n]; Line 1090  return sys_errlist[n];
1090    
1091  static pcre_jit_stack* jit_callback(void *arg)  static pcre_jit_stack* jit_callback(void *arg)
1092  {  {
1093    jit_was_used = TRUE;
1094  return (pcre_jit_stack *)arg;  return (pcre_jit_stack *)arg;
1095  }  }
1096    
1097    
1098    #if !defined NOUTF || defined SUPPORT_PCRE16
1099    /*************************************************
1100    *            Convert UTF-8 string to value       *
1101    *************************************************/
1102    
1103    /* This function takes one or more bytes that represents a UTF-8 character,
1104    and returns the value of the character.
1105    
1106    Argument:
1107      utf8bytes   a pointer to the byte vector
1108      vptr        a pointer to an int to receive the value
1109    
1110    Returns:      >  0 => the number of bytes consumed
1111                  -6 to 0 => malformed UTF-8 character at offset = (-return)
1112    */
1113    
1114    static int
1115    utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1116    {
1117    int c = *utf8bytes++;
1118    int d = c;
1119    int i, j, s;
1120    
1121    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
1122      {
1123      if ((d & 0x80) == 0) break;
1124      d <<= 1;
1125      }
1126    
1127    if (i == -1) { *vptr = c; return 1; }  /* ascii character */
1128    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
1129    
1130    /* i now has a value in the range 1-5 */
1131    
1132    s = 6*i;
1133    d = (c & utf8_table3[i]) << s;
1134    
1135    for (j = 0; j < i; j++)
1136      {
1137      c = *utf8bytes++;
1138      if ((c & 0xc0) != 0x80) return -(j+1);
1139      s -= 6;
1140      d |= (c & 0x3f) << s;
1141      }
1142    
1143    /* Check that encoding was the correct unique one */
1144    
1145    for (j = 0; j < utf8_table1_size; j++)
1146      if (d <= utf8_table1[j]) break;
1147    if (j != i) return -(i+1);
1148    
1149    /* Valid value */
1150    
1151    *vptr = d;
1152    return i+1;
1153    }
1154    #endif /* NOUTF || SUPPORT_PCRE16 */
1155    
1156    
1157    
1158    #if !defined NOUTF || defined SUPPORT_PCRE16
1159    /*************************************************
1160    *       Convert character value to UTF-8         *
1161    *************************************************/
1162    
1163    /* This function takes an integer value in the range 0 - 0x7fffffff
1164    and encodes it as a UTF-8 character in 0 to 6 bytes.
1165    
1166    Arguments:
1167      cvalue     the character value
1168      utf8bytes  pointer to buffer for result - at least 6 bytes long
1169    
1170    Returns:     number of characters placed in the buffer
1171    */
1172    
1173    static int
1174    ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1175    {
1176    register int i, j;
1177    for (i = 0; i < utf8_table1_size; i++)
1178      if (cvalue <= utf8_table1[i]) break;
1179    utf8bytes += i;
1180    for (j = i; j > 0; j--)
1181     {
1182     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1183     cvalue >>= 6;
1184     }
1185    *utf8bytes = utf8_table2[i] | cvalue;
1186    return i + 1;
1187    }
1188    #endif
1189    
1190    
1191    #ifdef SUPPORT_PCRE16
1192    /*************************************************
1193    *         Convert a string to 16-bit             *
1194    *************************************************/
1195    
1196    /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1197    8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1198    double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1199    in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1200    result is always left in buffer16.
1201    
1202    Note that this function does not object to surrogate values. This is
1203    deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1204    for the purpose of testing that they are correctly faulted.
1205    
1206    Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1207    in UTF-8 so that values greater than 255 can be handled.
1208    
1209    Arguments:
1210      data       TRUE if converting a data line; FALSE for a regex
1211      p          points to a byte string
1212      utf        true if UTF-8 (to be converted to UTF-16)
1213      len        number of bytes in the string (excluding trailing zero)
1214    
1215    Returns:     number of 16-bit data items used (excluding trailing zero)
1216                 OR -1 if a UTF-8 string is malformed
1217                 OR -2 if a value > 0x10ffff is encountered
1218                 OR -3 if a value > 0xffff is encountered when not in UTF mode
1219    */
1220    
1221    static int
1222    to16(int data, pcre_uint8 *p, int utf, int len)
1223    {
1224    pcre_uint16 *pp;
1225    
1226    if (buffer16_size < 2*len + 2)
1227      {
1228      if (buffer16 != NULL) free(buffer16);
1229      buffer16_size = 2*len + 2;
1230      buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1231      if (buffer16 == NULL)
1232        {
1233        fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1234        exit(1);
1235        }
1236      }
1237    
1238    pp = buffer16;
1239    
1240    if (!utf && !data)
1241      {
1242      while (len-- > 0) *pp++ = *p++;
1243      }
1244    
1245    else
1246      {
1247      int c = 0;
1248      while (len > 0)
1249        {
1250        int chlen = utf82ord(p, &c);
1251        if (chlen <= 0) return -1;
1252        if (c > 0x10ffff) return -2;
1253        p += chlen;
1254        len -= chlen;
1255        if (c < 0x10000) *pp++ = c; else
1256          {
1257          if (!utf) return -3;
1258          c -= 0x10000;
1259          *pp++ = 0xD800 | (c >> 10);
1260          *pp++ = 0xDC00 | (c & 0x3ff);
1261          }
1262        }
1263      }
1264    
1265    *pp = 0;
1266    return pp - buffer16;
1267    }
1268    #endif
1269    
1270    
1271  /*************************************************  /*************************************************
1272  *        Read or extend an input line            *  *        Read or extend an input line            *
1273  *************************************************/  *************************************************/
# Line 615  Returns:       pointer to the start of n Line 1291  Returns:       pointer to the start of n
1291                 NULL if no data read and EOF reached                 NULL if no data read and EOF reached
1292  */  */
1293    
1294  static uschar *  static pcre_uint8 *
1295  extend_inputline(FILE *f, uschar *start, const char *prompt)  extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1296  {  {
1297  uschar *here = start;  pcre_uint8 *here = start;
1298    
1299  for (;;)  for (;;)
1300    {    {
1301    int rlen = (int)(buffer_size - (here - buffer));    size_t rlen = (size_t)(buffer_size - (here - buffer));
1302    
1303    if (rlen > 1000)    if (rlen > 1000)
1304      {      {
1305      int dlen;      int dlen;
1306    
1307      /* If libreadline support is required, use readline() to read a line if the      /* If libreadline or libedit support is required, use readline() to read a
1308      input is a terminal. Note that readline() removes the trailing newline, so      line if the input is a terminal. Note that readline() removes the trailing
1309      we must put it back again, to be compatible with fgets(). */      newline, so we must put it back again, to be compatible with fgets(). */
1310    
1311  #ifdef SUPPORT_LIBREADLINE  #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1312      if (isatty(fileno(f)))      if (isatty(fileno(f)))
1313        {        {
1314        size_t len;        size_t len;
# Line 665  for (;;) Line 1341  for (;;)
1341    else    else
1342      {      {
1343      int new_buffer_size = 2*buffer_size;      int new_buffer_size = 2*buffer_size;
1344      uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1345      uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1346      uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1347    
1348      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1349        {        {
# Line 698  return NULL;  /* Control never gets here Line 1374  return NULL;  /* Control never gets here
1374    
1375    
1376    
   
   
   
   
1377  /*************************************************  /*************************************************
1378  *          Read number from string               *  *          Read number from string               *
1379  *************************************************/  *************************************************/
# Line 718  Returns:        the unsigned long Line 1390  Returns:        the unsigned long
1390  */  */
1391    
1392  static int  static int
1393  get_value(unsigned char *str, unsigned char **endptr)  get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1394  {  {
1395  int result = 0;  int result = 0;
1396  while(*str != 0 && isspace(*str)) str++;  while(*str != 0 && isspace(*str)) str++;
# Line 729  return(result); Line 1401  return(result);
1401    
1402    
1403    
   
1404  /*************************************************  /*************************************************
1405  *            Convert UTF-8 string to value       *  *             Print one character                *
1406  *************************************************/  *************************************************/
1407    
1408  /* This function takes one or more bytes that represents a UTF-8 character,  /* Print a single character either literally, or as a hex escape. */
 and returns the value of the character.  
   
 Argument:  
   utf8bytes   a pointer to the byte vector  
   vptr        a pointer to an int to receive the value  
1409    
1410  Returns:      >  0 => the number of bytes consumed  static int pchar(int c, FILE *f)
               -6 to 0 => malformed UTF-8 character at offset = (-return)  
 */  
   
 #if !defined NOUTF8  
   
 static int  
 utf82ord(unsigned char *utf8bytes, int *vptr)  
1411  {  {
1412  int c = *utf8bytes++;  if (PRINTOK(c))
1413  int d = c;    {
1414  int i, j, s;    if (f != NULL) fprintf(f, "%c", c);
1415      return 1;
1416      }
1417    
1418  for (i = -1; i < 6; i++)               /* i is number of additional bytes */  if (c < 0x100)
1419    {    {
1420    if ((d & 0x80) == 0) break;    if (use_utf)
1421    d <<= 1;      {
1422        if (f != NULL) fprintf(f, "\\x{%02x}", c);
1423        return 6;
1424        }
1425      else
1426        {
1427        if (f != NULL) fprintf(f, "\\x%02x", c);
1428        return 4;
1429        }
1430    }    }
1431    
1432  if (i == -1) { *vptr = c; return 1; }  /* ascii character */  if (f != NULL) fprintf(f, "\\x{%02x}", c);
1433  if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */  return (c <= 0x000000ff)? 6 :
1434           (c <= 0x00000fff)? 7 :
1435           (c <= 0x0000ffff)? 8 :
1436           (c <= 0x000fffff)? 9 : 10;
1437    }
1438    
 /* i now has a value in the range 1-5 */  
1439    
 s = 6*i;  
 d = (c & utf8_table3[i]) << s;  
1440    
1441  for (j = 0; j < i; j++)  #ifdef SUPPORT_PCRE8
1442    {  /*************************************************
1443    c = *utf8bytes++;  *         Print 8-bit character string           *
1444    if ((c & 0xc0) != 0x80) return -(j+1);  *************************************************/
   s -= 6;  
   d |= (c & 0x3f) << s;  
   }  
1445    
1446  /* Check that encoding was the correct unique one */  /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1447    If handed a NULL file, just counts chars without printing. */
1448    
1449  for (j = 0; j < utf8_table1_size; j++)  static int pchars(pcre_uint8 *p, int length, FILE *f)
1450    if (d <= utf8_table1[j]) break;  {
1451  if (j != i) return -(i+1);  int c = 0;
1452    int yield = 0;
1453    
1454  /* Valid value */  if (length < 0)
1455      length = strlen((char *)p);
1456    
1457  *vptr = d;  while (length-- > 0)
1458  return i+1;    {
1459  }  #if !defined NOUTF
1460      if (use_utf)
1461        {
1462        int rc = utf82ord(p, &c);
1463        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
1464          {
1465          length -= rc - 1;
1466          p += rc;
1467          yield += pchar(c, f);
1468          continue;
1469          }
1470        }
1471    #endif
1472      c = *p++;
1473      yield += pchar(c, f);
1474      }
1475    
1476    return yield;
1477    }
1478  #endif  #endif
1479    
1480    
1481    
1482    #ifdef SUPPORT_PCRE16
1483  /*************************************************  /*************************************************
1484  *       Convert character value to UTF-8         *  *    Find length of 0-terminated 16-bit string   *
1485  *************************************************/  *************************************************/
1486    
1487  /* This function takes an integer value in the range 0 - 0x7fffffff  static int strlen16(PCRE_SPTR16 p)
 and encodes it as a UTF-8 character in 0 to 6 bytes.  
   
 Arguments:  
   cvalue     the character value  
   utf8bytes  pointer to buffer for result - at least 6 bytes long  
   
 Returns:     number of characters placed in the buffer  
 */  
   
 #if !defined NOUTF8  
   
 static int  
 ord2utf8(int cvalue, uschar *utf8bytes)  
1488  {  {
1489  register int i, j;  int len = 0;
1490  for (i = 0; i < utf8_table1_size; i++)  while (*p++ != 0) len++;
1491    if (cvalue <= utf8_table1[i]) break;  return len;
 utf8bytes += i;  
 for (j = i; j > 0; j--)  
  {  
  *utf8bytes-- = 0x80 | (cvalue & 0x3f);  
  cvalue >>= 6;  
  }  
 *utf8bytes = utf8_table2[i] | cvalue;  
 return i + 1;  
1492  }  }
1493    #endif  /* SUPPORT_PCRE16 */
 #endif  
   
1494    
1495    
1496    #ifdef SUPPORT_PCRE16
1497  /*************************************************  /*************************************************
1498  *             Print character string             *  *           Print 16-bit character string        *
1499  *************************************************/  *************************************************/
1500    
1501  /* Character string printing function. Must handle UTF-8 strings in utf8  /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1502  mode. Yields number of characters printed. If handed a NULL file, just counts  If handed a NULL file, just counts chars without printing. */
 chars without printing. */  
1503    
1504  static int pchars(unsigned char *p, int length, FILE *f)  static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1505  {  {
 int c = 0;  
1506  int yield = 0;  int yield = 0;
1507    
1508    if (length < 0)
1509      length = strlen16(p);
1510    
1511  while (length-- > 0)  while (length-- > 0)
1512    {    {
1513  #if !defined NOUTF8    int c = *p++ & 0xffff;
1514    if (use_utf8)  #if !defined NOUTF
1515      if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1516      {      {
1517      int rc = utf82ord(p, &c);      int d = *p & 0xffff;
1518        if (d >= 0xDC00 && d < 0xDFFF)
     if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */  
1519        {        {
1520        length -= rc - 1;        c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1521        p += rc;        length--;
1522        if (PRINTHEX(c))        p++;
         {  
         if (f != NULL) fprintf(f, "%c", c);  
         yield++;  
         }  
       else  
         {  
         int n = 4;  
         if (f != NULL) fprintf(f, "\\x{%02x}", c);  
         yield += (n <= 0x000000ff)? 2 :  
                  (n <= 0x00000fff)? 3 :  
                  (n <= 0x0000ffff)? 4 :  
                  (n <= 0x000fffff)? 5 : 6;  
         }  
       continue;  
1523        }        }
1524      }      }
1525  #endif  #endif
1526      yield += pchar(c, f);
1527      }
1528    
1529     /* Not UTF-8, or malformed UTF-8  */  return yield;
1530    }
1531    #endif  /* SUPPORT_PCRE16 */
1532    
1533    c = *p++;  
1534    if (PRINTHEX(c))  
1535      {  #ifdef SUPPORT_PCRE8
1536      if (f != NULL) fprintf(f, "%c", c);  /*************************************************
1537      yield++;  *     Read a capture name (8-bit) and check it   *
1538      }  *************************************************/
1539    else  
1540      {  static pcre_uint8 *
1541      if (f != NULL) fprintf(f, "\\x%02x", c);  read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1542      yield += 4;  {
1543      }  pcre_uint8 *npp = *pp;
1544    while (isalnum(*p)) *npp++ = *p++;
1545    *npp++ = 0;
1546    *npp = 0;
1547    if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1548      {
1549      fprintf(outfile, "no parentheses with name \"");
1550      PCHARSV(*pp, 0, -1, outfile);
1551      fprintf(outfile, "\"\n");
1552    }    }
1553    
1554  return yield;  *pp = npp;
1555    return p;
1556    }
1557    #endif  /* SUPPORT_PCRE8 */
1558    
1559    
1560    
1561    #ifdef SUPPORT_PCRE16
1562    /*************************************************
1563    *     Read a capture name (16-bit) and check it  *
1564    *************************************************/
1565    
1566    /* Note that the text being read is 8-bit. */
1567    
1568    static pcre_uint8 *
1569    read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1570    {
1571    pcre_uint16 *npp = *pp;
1572    while (isalnum(*p)) *npp++ = *p++;
1573    *npp++ = 0;
1574    *npp = 0;
1575    if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
1576      {
1577      fprintf(outfile, "no parentheses with name \"");
1578      PCHARSV(*pp, 0, -1, outfile);
1579      fprintf(outfile, "\"\n");
1580      }
1581    *pp = npp;
1582    return p;
1583  }  }
1584    #endif  /* SUPPORT_PCRE16 */
1585    
1586    
1587    
# Line 916  if (callout_extra) Line 1610  if (callout_extra)
1610      else      else
1611        {        {
1612        fprintf(f, "%2d: ", i/2);        fprintf(f, "%2d: ", i/2);
1613        (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],        PCHARSV(cb->subject, cb->offset_vector[i],
1614          cb->offset_vector[i+1] - cb->offset_vector[i], f);          cb->offset_vector[i+1] - cb->offset_vector[i], f);
1615        fprintf(f, "\n");        fprintf(f, "\n");
1616        }        }
# Line 929  printed lengths of the substrings. */ Line 1623  printed lengths of the substrings. */
1623    
1624  if (f != NULL) fprintf(f, "--->");  if (f != NULL) fprintf(f, "--->");
1625    
1626  pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);  PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
1627  post_start = pchars((unsigned char *)(cb->subject + cb->start_match),  PCHARS(post_start, cb->subject, cb->start_match,
1628    cb->current_position - cb->start_match, f);    cb->current_position - cb->start_match, f);
1629    
1630  subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);  PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
1631    
1632  (void)pchars((unsigned char *)(cb->subject + cb->current_position),  PCHARSV(cb->subject, cb->current_position,
1633    cb->subject_length - cb->current_position, f);    cb->subject_length - cb->current_position, f);
1634    
1635  if (f != NULL) fprintf(f, "\n");  if (f != NULL) fprintf(f, "\n");
# Line 974  first_callout = 0; Line 1668  first_callout = 0;
1668    
1669  if (cb->mark != last_callout_mark)  if (cb->mark != last_callout_mark)
1670    {    {
1671    fprintf(outfile, "Latest Mark: %s\n",    if (cb->mark == NULL)
1672      (cb->mark == NULL)? "<unset>" : (char *)(cb->mark));      fprintf(outfile, "Latest Mark: <unset>\n");
1673      else
1674        {
1675        fprintf(outfile, "Latest Mark: ");
1676        PCHARSV(cb->mark, 0, -1, outfile);
1677        putc('\n', outfile);
1678        }
1679    last_callout_mark = cb->mark;    last_callout_mark = cb->mark;
1680    }    }
1681    
# Line 999  return (cb->callout_number != callout_fa Line 1699  return (cb->callout_number != callout_fa
1699  *************************************************/  *************************************************/
1700    
1701  /* Alternative malloc function, to test functionality and save the size of a  /* Alternative malloc function, to test functionality and save the size of a
1702  compiled re. The show_malloc variable is set only during matching. */  compiled re, which is the first store request that pcre_compile() makes. The
1703    show_malloc variable is set only during matching. */
1704    
1705  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
1706  {  {
1707  void *block = malloc(size);  void *block = malloc(size);
1708  gotten_store = size;  gotten_store = size;
1709    if (first_gotten_store == 0) first_gotten_store = size;
1710  if (show_malloc)  if (show_malloc)
1711    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
1712  return block;  return block;
# Line 1039  free(block); Line 1741  free(block);
1741  *          Call pcre_fullinfo()                  *  *          Call pcre_fullinfo()                  *
1742  *************************************************/  *************************************************/
1743    
1744  /* Get one piece of information from the pcre_fullinfo() function */  /* Get one piece of information from the pcre_fullinfo() function. When only
1745    one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1746    value, but the code is defensive.
1747    
1748    Arguments:
1749      re        compiled regex
1750      study     study data
1751      option    PCRE_INFO_xxx option
1752      ptr       where to put the data
1753    
1754    Returns:    0 when OK, < 0 on error
1755    */
1756    
1757  static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)  static int
1758    new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1759  {  {
1760  int rc;  int rc;
1761  if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)  
1762    fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);  if (use_pcre16)
1763    #ifdef SUPPORT_PCRE16
1764      rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
1765    #else
1766      rc = PCRE_ERROR_BADMODE;
1767    #endif
1768    else
1769    #ifdef SUPPORT_PCRE8
1770      rc = pcre_fullinfo(re, study, option, ptr);
1771    #else
1772      rc = PCRE_ERROR_BADMODE;
1773    #endif
1774    
1775    if (rc < 0)
1776      {
1777      fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1778        use_pcre16? "16" : "", option);
1779      if (rc == PCRE_ERROR_BADMODE)
1780        fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
1781          "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
1782      }
1783    
1784    return rc;
1785  }  }
1786    
1787    
1788    
1789  /*************************************************  /*************************************************
1790  *         Byte flipping function                 *  *             Swap byte functions                *
1791  *************************************************/  *************************************************/
1792    
1793  static unsigned long int  /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
1794  byteflip(unsigned long int value, int n)  value, respectively.
1795    
1796    Arguments:
1797      value        any number
1798    
1799    Returns:       the byte swapped value
1800    */
1801    
1802    static pcre_uint32
1803    swap_uint32(pcre_uint32 value)
1804  {  {
 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);  
1805  return ((value & 0x000000ff) << 24) |  return ((value & 0x000000ff) << 24) |
1806         ((value & 0x0000ff00) <<  8) |         ((value & 0x0000ff00) <<  8) |
1807         ((value & 0x00ff0000) >>  8) |         ((value & 0x00ff0000) >>  8) |
1808         ((value & 0xff000000) >> 24);         (value >> 24);
1809  }  }
1810    
1811    static pcre_uint16
1812    swap_uint16(pcre_uint16 value)
1813    {
1814    return (value >> 8) | (value << 8);
1815    }
1816    
1817    
1818    
1819    /*************************************************
1820    *        Flip bytes in a compiled pattern        *
1821    *************************************************/
1822    
1823    /* This function is called if the 'F' option was present on a pattern that is
1824    to be written to a file. We flip the bytes of all the integer fields in the
1825    regex data block and the study block. In 16-bit mode this also flips relevant
1826    bytes in the pattern itself. This is to make it possible to test PCRE's
1827    ability to reload byte-flipped patterns, e.g. those compiled on a different
1828    architecture. */
1829    
1830    static void
1831    regexflip(pcre *ere, pcre_extra *extra)
1832    {
1833    REAL_PCRE *re = (REAL_PCRE *)ere;
1834    #ifdef SUPPORT_PCRE16
1835    int op;
1836    pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1837    int length = re->name_count * re->name_entry_size;
1838    #ifdef SUPPORT_UTF
1839    BOOL utf = (re->options & PCRE_UTF16) != 0;
1840    BOOL utf16_char = FALSE;
1841    #endif /* SUPPORT_UTF */
1842    #endif /* SUPPORT_PCRE16 */
1843    
1844    /* Always flip the bytes in the main data block and study blocks. */
1845    
1846    re->magic_number = REVERSED_MAGIC_NUMBER;
1847    re->size = swap_uint32(re->size);
1848    re->options = swap_uint32(re->options);
1849    re->flags = swap_uint16(re->flags);
1850    re->top_bracket = swap_uint16(re->top_bracket);
1851    re->top_backref = swap_uint16(re->top_backref);
1852    re->first_char = swap_uint16(re->first_char);
1853    re->req_char = swap_uint16(re->req_char);
1854    re->name_table_offset = swap_uint16(re->name_table_offset);
1855    re->name_entry_size = swap_uint16(re->name_entry_size);
1856    re->name_count = swap_uint16(re->name_count);
1857    
1858    if (extra != NULL)
1859      {
1860      pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1861      rsd->size = swap_uint32(rsd->size);
1862      rsd->flags = swap_uint32(rsd->flags);
1863      rsd->minlength = swap_uint32(rsd->minlength);
1864      }
1865    
1866    /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1867    in the name table, if present, and then in the pattern itself. */
1868    
1869    #ifdef SUPPORT_PCRE16
1870    if (!use_pcre16) return;
1871    
1872    while(TRUE)
1873      {
1874      /* Swap previous characters. */
1875      while (length-- > 0)
1876        {
1877        *ptr = swap_uint16(*ptr);
1878        ptr++;
1879        }
1880    #ifdef SUPPORT_UTF
1881      if (utf16_char)
1882        {
1883        if ((ptr[-1] & 0xfc00) == 0xd800)
1884          {
1885          /* We know that there is only one extra character in UTF-16. */
1886          *ptr = swap_uint16(*ptr);
1887          ptr++;
1888          }
1889        }
1890      utf16_char = FALSE;
1891    #endif /* SUPPORT_UTF */
1892    
1893      /* Get next opcode. */
1894    
1895      length = 0;
1896      op = *ptr;
1897      *ptr++ = swap_uint16(op);
1898    
1899      switch (op)
1900        {
1901        case OP_END:
1902        return;
1903    
1904    #ifdef SUPPORT_UTF
1905        case OP_CHAR:
1906        case OP_CHARI:
1907        case OP_NOT:
1908        case OP_NOTI:
1909        case OP_STAR:
1910        case OP_MINSTAR:
1911        case OP_PLUS:
1912        case OP_MINPLUS:
1913        case OP_QUERY:
1914        case OP_MINQUERY:
1915        case OP_UPTO:
1916        case OP_MINUPTO:
1917        case OP_EXACT:
1918        case OP_POSSTAR:
1919        case OP_POSPLUS:
1920        case OP_POSQUERY:
1921        case OP_POSUPTO:
1922        case OP_STARI:
1923        case OP_MINSTARI:
1924        case OP_PLUSI:
1925        case OP_MINPLUSI:
1926        case OP_QUERYI:
1927        case OP_MINQUERYI:
1928        case OP_UPTOI:
1929        case OP_MINUPTOI:
1930        case OP_EXACTI:
1931        case OP_POSSTARI:
1932        case OP_POSPLUSI:
1933        case OP_POSQUERYI:
1934        case OP_POSUPTOI:
1935        case OP_NOTSTAR:
1936        case OP_NOTMINSTAR:
1937        case OP_NOTPLUS:
1938        case OP_NOTMINPLUS:
1939        case OP_NOTQUERY:
1940        case OP_NOTMINQUERY:
1941        case OP_NOTUPTO:
1942        case OP_NOTMINUPTO:
1943        case OP_NOTEXACT:
1944        case OP_NOTPOSSTAR:
1945        case OP_NOTPOSPLUS:
1946        case OP_NOTPOSQUERY:
1947        case OP_NOTPOSUPTO:
1948        case OP_NOTSTARI:
1949        case OP_NOTMINSTARI:
1950        case OP_NOTPLUSI:
1951        case OP_NOTMINPLUSI:
1952        case OP_NOTQUERYI:
1953        case OP_NOTMINQUERYI:
1954        case OP_NOTUPTOI:
1955        case OP_NOTMINUPTOI:
1956        case OP_NOTEXACTI:
1957        case OP_NOTPOSSTARI:
1958        case OP_NOTPOSPLUSI:
1959        case OP_NOTPOSQUERYI:
1960        case OP_NOTPOSUPTOI:
1961        if (utf) utf16_char = TRUE;
1962    #endif
1963        /* Fall through. */
1964    
1965        default:
1966        length = OP_lengths16[op] - 1;
1967        break;
1968    
1969        case OP_CLASS:
1970        case OP_NCLASS:
1971        /* Skip the character bit map. */
1972        ptr += 32/sizeof(pcre_uint16);
1973        length = 0;
1974        break;
1975    
1976        case OP_XCLASS:
1977        /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
1978        if (LINK_SIZE > 1)
1979          length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
1980            - (1 + LINK_SIZE + 1));
1981        else
1982          length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
1983    
1984        /* Reverse the size of the XCLASS instance. */
1985        *ptr = swap_uint16(*ptr);
1986        ptr++;
1987        if (LINK_SIZE > 1)
1988          {
1989          *ptr = swap_uint16(*ptr);
1990          ptr++;
1991          }
1992    
1993        op = *ptr;
1994        *ptr = swap_uint16(op);
1995        ptr++;
1996        if ((op & XCL_MAP) != 0)
1997          {
1998          /* Skip the character bit map. */
1999          ptr += 32/sizeof(pcre_uint16);
2000          length -= 32/sizeof(pcre_uint16);
2001          }
2002        break;
2003        }
2004      }
2005    /* Control should never reach here in 16 bit mode. */
2006    #endif /* SUPPORT_PCRE16 */
2007    }
2008    
2009    
2010    
# Line 1072  return ((value & 0x000000ff) << 24) | Line 2013  return ((value & 0x000000ff) << 24) |
2013  *************************************************/  *************************************************/
2014    
2015  static int  static int
2016  check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,  check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
2017    int start_offset, int options, int *use_offsets, int use_size_offsets,    int start_offset, int options, int *use_offsets, int use_size_offsets,
2018    int flag, unsigned long int *limit, int errnumber, const char *msg)    int flag, unsigned long int *limit, int errnumber, const char *msg)
2019  {  {
# Line 1087  for (;;) Line 2028  for (;;)
2028    {    {
2029    *limit = mid;    *limit = mid;
2030    
2031    count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,    PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2032      use_offsets, use_size_offsets);      use_offsets, use_size_offsets);
2033    
2034    if (count == errnumber)    if (count == errnumber)
# Line 1132  Returns:    < 0, = 0, or > 0, according Line 2073  Returns:    < 0, = 0, or > 0, according
2073  */  */
2074    
2075  static int  static int
2076  strncmpic(uschar *s, uschar *t, int n)  strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2077  {  {
2078  while (n--)  while (n--)
2079    {    {
# Line 1159  Returns:      appropriate PCRE_NEWLINE_x Line 2100  Returns:      appropriate PCRE_NEWLINE_x
2100  */  */
2101    
2102  static int  static int
2103  check_newline(uschar *p, FILE *f)  check_newline(pcre_uint8 *p, FILE *f)
2104  {  {
2105  if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;  if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2106  if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;  if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2107  if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;  if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2108  if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;  if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2109  if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;  if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2110  if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;  if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2111  if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;  if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2112  fprintf(f, "Unknown newline type at: <%s\n", p);  fprintf(f, "Unknown newline type at: <%s\n", p);
2113  return 0;  return 0;
2114  }  }
# Line 1183  usage(void) Line 2124  usage(void)
2124  {  {
2125  printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");  printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
2126  printf("Input and output default to stdin and stdout.\n");  printf("Input and output default to stdin and stdout.\n");
2127  #ifdef SUPPORT_LIBREADLINE  #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
2128  printf("If input is a terminal, readline() is used to read from it.\n");  printf("If input is a terminal, readline() is used to read from it.\n");
2129  #else  #else
2130  printf("This version of pcretest is not linked with readline().\n");  printf("This version of pcretest is not linked with readline().\n");
2131  #endif  #endif
2132  printf("\nOptions:\n");  printf("\nOptions:\n");
2133  printf("  -b       show compiled code (bytecode)\n");  #ifdef SUPPORT_PCRE16
2134    printf("  -16      use the 16-bit library\n");
2135    #endif
2136    printf("  -b       show compiled code\n");
2137  printf("  -C       show PCRE compile-time options and exit\n");  printf("  -C       show PCRE compile-time options and exit\n");
2138    printf("  -C arg   show a specific compile-time option\n");
2139    printf("           and exit with its value. The arg can be:\n");
2140    printf("     linksize     internal link size [2, 3, 4]\n");
2141    printf("     pcre8        8 bit library support enabled [0, 1]\n");
2142    printf("     pcre16       16 bit library support enabled [0, 1]\n");
2143    printf("     utf          Unicode Transformation Format supported [0, 1]\n");
2144    printf("     ucp          Unicode Properties supported [0, 1]\n");
2145    printf("     jit          Just-in-time compiler supported [0, 1]\n");
2146    printf("     newline      Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2147  printf("  -d       debug: show compiled code and information (-b and -i)\n");  printf("  -d       debug: show compiled code and information (-b and -i)\n");
2148  #if !defined NODFA  #if !defined NODFA
2149  printf("  -dfa     force DFA matching for all subjects\n");  printf("  -dfa     force DFA matching for all subjects\n");
# Line 1207  printf("  -q       quiet: do not output Line 2160  printf("  -q       quiet: do not output
2160  printf("  -S <n>   set stack size to <n> megabytes\n");  printf("  -S <n>   set stack size to <n> megabytes\n");
2161  printf("  -s       force each pattern to be studied at basic level\n"  printf("  -s       force each pattern to be studied at basic level\n"
2162         "  -s+      force each pattern to be studied, using JIT if available\n"         "  -s+      force each pattern to be studied, using JIT if available\n"
2163           "  -s++     ditto, verifying when JIT was actually used\n"
2164           "  -s+n     force each pattern to be studied, using JIT if available,\n"
2165           "             where 1 <= n <= 7 selects JIT options\n"
2166           "  -s++n    ditto, verifying when JIT was actually used\n"
2167         "  -t       time compilation and execution\n");         "  -t       time compilation and execution\n");
2168  printf("  -t <n>   time compilation and execution, repeating <n> times\n");  printf("  -t <n>   time compilation and execution, repeating <n> times\n");
2169  printf("  -tm      time execution (matching) only\n");  printf("  -tm      time execution (matching) only\n");
# Line 1226  options, followed by a set of test data, Line 2183  options, followed by a set of test data,
2183  int main(int argc, char **argv)  int main(int argc, char **argv)
2184  {  {
2185  FILE *infile = stdin;  FILE *infile = stdin;
2186    const char *version;
2187  int options = 0;  int options = 0;
2188  int study_options = 0;  int study_options = 0;
2189  int default_find_match_limit = FALSE;  int default_find_match_limit = FALSE;
# Line 1246  int posix = 0; Line 2204  int posix = 0;
2204  int debug = 0;  int debug = 0;
2205  int done = 0;  int done = 0;
2206  int all_use_dfa = 0;  int all_use_dfa = 0;
2207    int verify_jit = 0;
2208  int yield = 0;  int yield = 0;
2209  int stack_size;  int stack_size;
2210    
2211  pcre_jit_stack *jit_stack = NULL;  pcre_jit_stack *jit_stack = NULL;
2212    
2213    /* These vectors store, end-to-end, a list of zero-terminated captured
2214    substring names, each list itself being terminated by an empty name. Assume
2215    that 1024 is plenty long enough for the few names we'll be testing. It is
2216    easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
2217    for the actual memory, to ensure alignment. */
2218    
2219    pcre_uint16 copynames[1024];
2220    pcre_uint16 getnames[1024];
2221    
2222    #ifdef SUPPORT_PCRE16
2223    pcre_uint16 *cn16ptr;
2224    pcre_uint16 *gn16ptr;
2225    #endif
2226    
2227  /* These vectors store, end-to-end, a list of captured substring names. Assume  #ifdef SUPPORT_PCRE8
2228  that 1024 is plenty long enough for the few names we'll be testing. */  pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2229    pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2230  uschar copynames[1024];  pcre_uint8 *cn8ptr;
2231  uschar getnames[1024];  pcre_uint8 *gn8ptr;
2232    #endif
 uschar *copynamesptr;  
 uschar *getnamesptr;  
   
 /* Get buffers from malloc() so that Electric Fence will check their misuse  
 when I am debugging. They grow automatically when very long lines are read. */  
2233    
2234  buffer = (unsigned char *)malloc(buffer_size);  /* Get buffers from malloc() so that valgrind will check their misuse when
2235  dbuffer = (unsigned char *)malloc(buffer_size);  debugging. They grow automatically when very long lines are read. The 16-bit
2236  pbuffer = (unsigned char *)malloc(buffer_size);  buffer (buffer16) is obtained only if needed. */
2237    
2238    buffer = (pcre_uint8 *)malloc(buffer_size);
2239    dbuffer = (pcre_uint8 *)malloc(buffer_size);
2240    pbuffer = (pcre_uint8 *)malloc(buffer_size);
2241    
2242  /* The outfile variable is static so that new_malloc can use it. */  /* The outfile variable is static so that new_malloc can use it. */
2243    
# Line 1281  it set 0x8000, but then I was advised th Line 2252  it set 0x8000, but then I was advised th
2252  _setmode( _fileno( stdout ), _O_BINARY );  _setmode( _fileno( stdout ), _O_BINARY );
2253  #endif  #endif
2254    
2255    /* Get the version number: both pcre_version() and pcre16_version() give the
2256    same answer. We just need to ensure that we call one that is available. */
2257    
2258    #ifdef SUPPORT_PCRE8
2259    version = pcre_version();
2260    #else
2261    version = pcre16_version();
2262    #endif
2263    
2264  /* Scan options */  /* Scan options */
2265    
2266  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
2267    {    {
2268    unsigned char *endptr;    pcre_uint8 *endptr;
2269      char *arg = argv[op];
2270    
2271    if (strcmp(argv[op], "-m") == 0) showstore = 1;    if (strcmp(arg, "-m") == 0) showstore = 1;
2272    else if (strcmp(argv[op], "-s") == 0) force_study = 0;    else if (strcmp(arg, "-s") == 0) force_study = 0;
2273    else if (strcmp(argv[op], "-s+") == 0)  
2274      else if (strncmp(arg, "-s+", 3) == 0)
2275      {      {
2276        arg += 3;
2277        if (*arg == '+') { arg++; verify_jit = TRUE; }
2278      force_study = 1;      force_study = 1;
2279      force_study_options = PCRE_STUDY_JIT_COMPILE;      if (*arg == 0)
2280      }        force_study_options = jit_study_bits[6];
2281    else if (strcmp(argv[op], "-q") == 0) quiet = 1;      else if (*arg >= '1' && *arg <= '7')
2282    else if (strcmp(argv[op], "-b") == 0) debug = 1;        force_study_options = jit_study_bits[*arg - '1'];
2283    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;      else goto BAD_ARG;
2284    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;      }
2285    else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;    else if (strcmp(arg, "-16") == 0)
2286        {
2287    #ifdef SUPPORT_PCRE16
2288        use_pcre16 = 1;
2289    #else
2290        printf("** This version of PCRE was built without 16-bit support\n");
2291        exit(1);
2292    #endif
2293        }
2294      else if (strcmp(arg, "-q") == 0) quiet = 1;
2295      else if (strcmp(arg, "-b") == 0) debug = 1;
2296      else if (strcmp(arg, "-i") == 0) showinfo = 1;
2297      else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
2298      else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
2299  #if !defined NODFA  #if !defined NODFA
2300    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;    else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
2301  #endif  #endif
2302    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(arg, "-o") == 0 && argc > 2 &&
2303        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),        ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2304          *endptr == 0))          *endptr == 0))
2305      {      {
2306      op++;      op++;
2307      argc--;      argc--;
2308      }      }
2309    else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)    else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0)
2310      {      {
2311      int both = argv[op][2] == 0;      int both = arg[2] == 0;
2312      int temp;      int temp;
2313      if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),      if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2314                       *endptr == 0))                       *endptr == 0))
2315        {        {
2316        timeitm = temp;        timeitm = temp;
# Line 1323  while (argc > 1 && argv[op][0] == '-') Line 2320  while (argc > 1 && argv[op][0] == '-')
2320      else timeitm = LOOPREPEAT;      else timeitm = LOOPREPEAT;
2321      if (both) timeit = timeitm;      if (both) timeit = timeitm;
2322      }      }
2323    else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&    else if (strcmp(arg, "-S") == 0 && argc > 2 &&
2324        ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),        ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2325          *endptr == 0))          *endptr == 0))
2326      {      {
2327  #if defined(_WIN32) || defined(WIN32) || defined(__minix)  #if defined(_WIN32) || defined(WIN32) || defined(__minix)
# Line 1346  while (argc > 1 && argv[op][0] == '-') Line 2343  while (argc > 1 && argv[op][0] == '-')
2343  #endif  #endif
2344      }      }
2345  #if !defined NOPOSIX  #if !defined NOPOSIX
2346    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(arg, "-p") == 0) posix = 1;
2347  #endif  #endif
2348    else if (strcmp(argv[op], "-C") == 0)    else if (strcmp(arg, "-C") == 0)
2349      {      {
2350      int rc;      int rc;
2351      unsigned long int lrc;      unsigned long int lrc;
2352      printf("PCRE version %s\n", pcre_version());  
2353        if (argc > 2)
2354          {
2355          if (strcmp(argv[op + 1], "linksize") == 0)
2356            {
2357            (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2358            printf("%d\n", rc);
2359            yield = rc;
2360            goto EXIT;
2361            }
2362          if (strcmp(argv[op + 1], "pcre8") == 0)
2363            {
2364    #ifdef SUPPORT_PCRE8
2365            printf("1\n");
2366            yield = 1;
2367    #else
2368            printf("0\n");
2369            yield = 0;
2370    #endif
2371            goto EXIT;
2372            }
2373          if (strcmp(argv[op + 1], "pcre16") == 0)
2374            {
2375    #ifdef SUPPORT_PCRE16
2376            printf("1\n");
2377            yield = 1;
2378    #else
2379            printf("0\n");
2380            yield = 0;
2381    #endif
2382            goto EXIT;
2383            }
2384          if (strcmp(argv[op + 1], "utf") == 0)
2385            {
2386    #ifdef SUPPORT_PCRE8
2387            (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2388            printf("%d\n", rc);
2389            yield = rc;
2390    #else
2391            (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2392            printf("%d\n", rc);
2393            yield = rc;
2394    #endif
2395            goto EXIT;
2396            }
2397          if (strcmp(argv[op + 1], "ucp") == 0)
2398            {
2399            (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2400            printf("%d\n", rc);
2401            yield = rc;
2402            goto EXIT;
2403            }
2404          if (strcmp(argv[op + 1], "jit") == 0)
2405            {
2406            (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2407            printf("%d\n", rc);
2408            yield = rc;
2409            goto EXIT;
2410            }
2411          if (strcmp(argv[op + 1], "newline") == 0)
2412            {
2413            (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2414            /* Note that these values are always the ASCII values, even
2415            in EBCDIC environments. CR is 13 and NL is 10. */
2416            printf("%s\n", (rc == 13)? "CR" :
2417              (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2418              (rc == -2)? "ANYCRLF" :
2419              (rc == -1)? "ANY" : "???");
2420            goto EXIT;
2421            }
2422          printf("Unknown -C option: %s\n", argv[op + 1]);
2423          goto EXIT;
2424          }
2425    
2426        printf("PCRE version %s\n", version);
2427      printf("Compiled with\n");      printf("Compiled with\n");
2428    
2429    /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2430    are set, either both UTFs are supported or both are not supported. */
2431    
2432    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2433        printf("  8-bit and 16-bit support\n");
2434        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2435        if (rc)
2436          printf("  UTF-8 and UTF-16 support\n");
2437        else
2438          printf("  No UTF-8 or UTF-16 support\n");
2439    #elif defined SUPPORT_PCRE8
2440        printf("  8-bit support only\n");
2441      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2442      printf("  %sUTF-8 support\n", rc? "" : "No ");      printf("  %sUTF-8 support\n", rc? "" : "No ");
2443      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);  #else
2444        printf("  16-bit support only\n");
2445        (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2446        printf("  %sUTF-16 support\n", rc? "" : "No ");
2447    #endif
2448    
2449        (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2450      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
2451      (void)pcre_config(PCRE_CONFIG_JIT, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2452      if (rc)      if (rc)
2453        printf("  Just-in-time compiler support\n");        {
2454          const char *arch;
2455          (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
2456          printf("  Just-in-time compiler support: %s\n", arch);
2457          }
2458      else      else
2459        printf("  No just-in-time compiler support\n");        printf("  No just-in-time compiler support\n");
2460      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2461      /* Note that these values are always the ASCII values, even      /* Note that these values are always the ASCII values, even
2462      in EBCDIC environments. CR is 13 and NL is 10. */      in EBCDIC environments. CR is 13 and NL is 10. */
2463      printf("  Newline sequence is %s\n", (rc == 13)? "CR" :      printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
2464        (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :        (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2465        (rc == -2)? "ANYCRLF" :        (rc == -2)? "ANYCRLF" :
2466        (rc == -1)? "ANY" : "???");        (rc == -1)? "ANY" : "???");
2467      (void)pcre_config(PCRE_CONFIG_BSR, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
2468      printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :      printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
2469                                       "all Unicode newlines");                                       "all Unicode newlines");
2470      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2471      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
2472      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
2473      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
2474      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);      (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
2475      printf("  Default match limit = %ld\n", lrc);      printf("  Default match limit = %ld\n", lrc);
2476      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);      (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
2477      printf("  Default recursion depth limit = %ld\n", lrc);      printf("  Default recursion depth limit = %ld\n", lrc);
2478      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
2479      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s", rc? "stack" : "heap");
2480        if (showstore)
2481          {
2482          PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
2483          printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
2484          }
2485        printf("\n");
2486      goto EXIT;      goto EXIT;
2487      }      }
2488    else if (strcmp(argv[op], "-help") == 0 ||    else if (strcmp(arg, "-help") == 0 ||
2489             strcmp(argv[op], "--help") == 0)             strcmp(arg, "--help") == 0)
2490      {      {
2491      usage();      usage();
2492      goto EXIT;      goto EXIT;
2493      }      }
2494    else    else
2495      {      {
2496      printf("** Unknown or malformed option %s\n", argv[op]);      BAD_ARG:
2497        printf("** Unknown or malformed option %s\n", arg);
2498      usage();      usage();
2499      yield = 1;      yield = 1;
2500      goto EXIT;      goto EXIT;
# Line 1440  if (argc > 2) Line 2541  if (argc > 2)
2541    
2542  /* Set alternative malloc function */  /* Set alternative malloc function */
2543    
2544    #ifdef SUPPORT_PCRE8
2545  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
2546  pcre_free = new_free;  pcre_free = new_free;
2547  pcre_stack_malloc = stack_malloc;  pcre_stack_malloc = stack_malloc;
2548  pcre_stack_free = stack_free;  pcre_stack_free = stack_free;
2549    #endif
2550    
2551    #ifdef SUPPORT_PCRE16
2552    pcre16_malloc = new_malloc;
2553    pcre16_free = new_free;
2554    pcre16_stack_malloc = stack_malloc;
2555    pcre16_stack_free = stack_free;
2556    #endif
2557    
2558  /* Heading line unless quiet, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
2559    
2560  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
2561    
2562  /* Main loop */  /* Main loop */
2563    
# Line 1462  while (!done) Line 2572  while (!done)
2572  #endif  #endif
2573    
2574    const char *error;    const char *error;
2575    unsigned char *markptr;    pcre_uint8 *markptr;
2576    unsigned char *p, *pp, *ppp;    pcre_uint8 *p, *pp, *ppp;
2577    unsigned char *to_file = NULL;    pcre_uint8 *to_file = NULL;
2578    const unsigned char *tables = NULL;    const pcre_uint8 *tables = NULL;
2579      unsigned long int get_options;
2580    unsigned long int true_size, true_study_size = 0;    unsigned long int true_size, true_study_size = 0;
2581    size_t size, regex_gotten_store;    size_t size, regex_gotten_store;
2582    int do_allcaps = 0;    int do_allcaps = 0;
# Line 1481  while (!done) Line 2592  while (!done)
2592    int do_flip = 0;    int do_flip = 0;
2593    int erroroffset, len, delimiter, poffset;    int erroroffset, len, delimiter, poffset;
2594    
2595    use_utf8 = 0;    use_utf = 0;
2596    debug_lengths = 1;    debug_lengths = 1;
2597    
2598    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
# Line 1496  while (!done) Line 2607  while (!done)
2607    
2608    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2609      {      {
2610      unsigned long int magic, get_options;      pcre_uint32 magic;
2611      uschar sbuf[8];      pcre_uint8 sbuf[8];
2612      FILE *f;      FILE *f;
2613    
2614      p++;      p++;
2615        if (*p == '!')
2616          {
2617          do_debug = TRUE;
2618          do_showinfo = TRUE;
2619          p++;
2620          }
2621    
2622      pp = p + (int)strlen((char *)p);      pp = p + (int)strlen((char *)p);
2623      while (isspace(pp[-1])) pp--;      while (isspace(pp[-1])) pp--;
2624      *pp = 0;      *pp = 0;
# Line 1512  while (!done) Line 2630  while (!done)
2630        continue;        continue;
2631        }        }
2632    
2633        first_gotten_store = 0;
2634      if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;      if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2635    
2636      true_size =      true_size =
# Line 1519  while (!done) Line 2638  while (!done)
2638      true_study_size =      true_study_size =
2639        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2640    
2641      re = (real_pcre *)new_malloc(true_size);      re = (pcre *)new_malloc(true_size);
2642      regex_gotten_store = gotten_store;      regex_gotten_store = first_gotten_store;
2643    
2644      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2645    
2646      magic = ((real_pcre *)re)->magic_number;      magic = ((REAL_PCRE *)re)->magic_number;
2647      if (magic != MAGIC_NUMBER)      if (magic != MAGIC_NUMBER)
2648        {        {
2649        if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)        if (swap_uint32(magic) == MAGIC_NUMBER)
2650          {          {
2651          do_flip = 1;          do_flip = 1;
2652          }          }
# Line 1539  while (!done) Line 2658  while (!done)
2658          }          }
2659        }        }
2660    
2661        /* We hide the byte-invert info for little and big endian tests. */
2662      fprintf(outfile, "Compiled pattern%s loaded from %s\n",      fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2663        do_flip? " (byte-inverted)" : "", p);        do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
   
     /* Need to know if UTF-8 for printing data strings */  
   
     new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);  
     use_utf8 = (get_options & PCRE_UTF8) != 0;  
2664    
2665      /* Now see if there is any following study data. */      /* Now see if there is any following study data. */
2666    
# Line 1563  while (!done) Line 2678  while (!done)
2678          {          {
2679          FAIL_READ:          FAIL_READ:
2680          fprintf(outfile, "Failed to read data from %s\n", p);          fprintf(outfile, "Failed to read data from %s\n", p);
2681          if (extra != NULL) pcre_free_study(extra);          if (extra != NULL)
2682              {
2683              PCRE_FREE_STUDY(extra);
2684              }
2685          if (re != NULL) new_free(re);          if (re != NULL) new_free(re);
2686          fclose(f);          fclose(f);
2687          continue;          continue;
# Line 1573  while (!done) Line 2691  while (!done)
2691        }        }
2692      else fprintf(outfile, "No study data\n");      else fprintf(outfile, "No study data\n");
2693    
2694        /* Flip the necessary bytes. */
2695        if (do_flip)
2696          {
2697          int rc;
2698          PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
2699          if (rc == PCRE_ERROR_BADMODE)
2700            {
2701            /* Simulate the result of the function call below. */
2702            fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2703              use_pcre16? "16" : "", PCRE_INFO_OPTIONS);
2704            fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
2705              "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
2706            continue;
2707            }
2708          }
2709    
2710        /* Need to know if UTF-8 for printing data strings. */
2711    
2712        if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0) continue;
2713        use_utf = (get_options & PCRE_UTF8) != 0;
2714    
2715      fclose(f);      fclose(f);
2716      goto SHOW_INFO;      goto SHOW_INFO;
2717      }      }
2718    
2719    /* In-line pattern (the usual case). Get the delimiter and seek the end of    /* In-line pattern (the usual case). Get the delimiter and seek the end of
2720    the pattern; if is isn't complete, read more. */    the pattern; if it isn't complete, read more. */
2721    
2722    delimiter = *p++;    delimiter = *p++;
2723    
# Line 1629  while (!done) Line 2768  while (!done)
2768    /* Look for options after final delimiter */    /* Look for options after final delimiter */
2769    
2770    options = 0;    options = 0;
2771      study_options = 0;
2772    log_store = showstore;  /* default from command line */    log_store = showstore;  /* default from command line */
2773    
2774    while (*pp != 0)    while (*pp != 0)
# Line 1665  while (!done) Line 2805  while (!done)
2805  #endif  #endif
2806    
2807        case 'S':        case 'S':
2808        if (do_study == 0)        if (do_study == 0)
2809          {          {
2810          do_study = 1;          do_study = 1;
2811          if (*pp == '+')          if (*pp == '+')
2812            {            {
2813            study_options |= PCRE_STUDY_JIT_COMPILE;            if (*(++pp) == '+')
2814            pp++;              {
2815            }              verify_jit = TRUE;
2816          }              pp++;
2817                }
2818              if (*pp >= '1' && *pp <= '7')
2819                study_options |= jit_study_bits[*pp++ - '1'];
2820              else
2821                study_options |= jit_study_bits[6];
2822              }
2823            }
2824        else        else
2825          {          {
2826          do_study = 0;          do_study = 0;
# Line 1686  while (!done) Line 2833  while (!done)
2833        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
2834        case 'Y': options |= PCRE_NO_START_OPTIMISE; break;        case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2835        case 'Z': debug_lengths = 0; break;        case 'Z': debug_lengths = 0; break;
2836        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf = 1; break;
2837        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
2838    
2839        case 'T':        case 'T':
# Line 1720  while (!done) Line 2867  while (!done)
2867          goto SKIP_DATA;          goto SKIP_DATA;
2868          }          }
2869        locale_set = 1;        locale_set = 1;
2870        tables = pcre_maketables();        tables = PCRE_MAKETABLES;
2871        pp = ppp;        pp = ppp;
2872        break;        break;
2873    
# Line 1733  while (!done) Line 2880  while (!done)
2880    
2881        case '<':        case '<':
2882          {          {
2883          if (strncmpic(pp, (uschar *)"JS>", 3) == 0)          if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2884            {            {
2885            options |= PCRE_JAVASCRIPT_COMPAT;            options |= PCRE_JAVASCRIPT_COMPAT;
2886            pp += 3;            pp += 3;
# Line 1761  while (!done) Line 2908  while (!done)
2908    
2909    /* Handle compiling via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
2910    timing, showing, or debugging options, nor the ability to pass over    timing, showing, or debugging options, nor the ability to pass over
2911    local character tables. */    local character tables. Neither does it have 16-bit support. */
2912    
2913  #if !defined NOPOSIX  #if !defined NOPOSIX
2914    if (posix || do_posix)    if (posix || do_posix)
# Line 1777  while (!done) Line 2924  while (!done)
2924      if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;      if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2925      if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;      if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2926    
2927        first_gotten_store = 0;
2928      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
2929    
2930      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 1796  while (!done) Line 2944  while (!done)
2944  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
2945    
2946      {      {
2947      unsigned long int get_options;      /* In 16-bit mode, convert the input. */
2948    
2949    #ifdef SUPPORT_PCRE16
2950        if (use_pcre16)
2951          {
2952          switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
2953            {
2954            case -1:
2955            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2956              "converted to UTF-16\n");
2957            goto SKIP_DATA;
2958    
2959            case -2:
2960            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
2961              "cannot be converted to UTF-16\n");
2962            goto SKIP_DATA;
2963    
2964            case -3: /* "Impossible error" when to16 is called arg1 FALSE */
2965            fprintf(outfile, "**Failed: character value greater than 0xffff "
2966              "cannot be converted to 16-bit in non-UTF mode\n");
2967            goto SKIP_DATA;
2968    
2969            default:
2970            break;
2971            }
2972          p = (pcre_uint8 *)buffer16;
2973          }
2974    #endif
2975    
2976        /* Compile many times when timing */
2977    
2978      if (timeit > 0)      if (timeit > 0)
2979        {        {
# Line 1805  while (!done) Line 2982  while (!done)
2982        clock_t start_time = clock();        clock_t start_time = clock();
2983        for (i = 0; i < timeit; i++)        for (i = 0; i < timeit; i++)
2984          {          {
2985          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2986          if (re != NULL) free(re);          if (re != NULL) free(re);
2987          }          }
2988        time_taken = clock() - start_time;        time_taken = clock() - start_time;
# Line 1814  while (!done) Line 2991  while (!done)
2991            (double)CLOCKS_PER_SEC);            (double)CLOCKS_PER_SEC);
2992        }        }
2993    
2994      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      first_gotten_store = 0;
2995        PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2996    
2997      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
2998      if non-interactive. */      if non-interactive. */
# Line 1845  while (!done) Line 3023  while (!done)
3023      within the regex; check for this so that we know how to process the data      within the regex; check for this so that we know how to process the data
3024      lines. */      lines. */
3025    
3026      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);      if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3027      if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;        goto SKIP_DATA;
3028        if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
     /* Print information if required. There are now two info-returning  
     functions. The old one has a limited interface and returns only limited  
     data. Check that it agrees with the newer one. */  
   
     if (log_store)  
       fprintf(outfile, "Memory allocation (code space): %d\n",  
         (int)(gotten_store -  
               sizeof(real_pcre) -  
               ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));  
3029    
3030      /* Extract the size for possible writing before possibly flipping it,      /* Extract the size for possible writing before possibly flipping it,
3031      and remember the store that was got. */      and remember the store that was got. */
3032    
3033      true_size = ((real_pcre *)re)->size;      true_size = ((REAL_PCRE *)re)->size;
3034      regex_gotten_store = gotten_store;      regex_gotten_store = first_gotten_store;
3035    
3036        /* Output code size information if requested */
3037    
3038        if (log_store)
3039          fprintf(outfile, "Memory allocation (code space): %d\n",
3040            (int)(first_gotten_store -
3041                  sizeof(REAL_PCRE) -
3042                  ((REAL_PCRE *)re)->name_count * ((REAL_PCRE *)re)->name_entry_size));
3043    
3044      /* If -s or /S was present, study the regex to generate additional info to      /* If -s or /S was present, study the regex to generate additional info to
3045      help with the matching, unless the pattern has the SS option, which      help with the matching, unless the pattern has the SS option, which
# Line 1877  while (!done) Line 3054  while (!done)
3054          clock_t time_taken;          clock_t time_taken;
3055          clock_t start_time = clock();          clock_t start_time = clock();
3056          for (i = 0; i < timeit; i++)          for (i = 0; i < timeit; i++)
3057            extra = pcre_study(re, study_options | force_study_options, &error);            {
3058              PCRE_STUDY(extra, re, study_options | force_study_options, &error);
3059              }
3060          time_taken = clock() - start_time;          time_taken = clock() - start_time;
3061          if (extra != NULL) pcre_free_study(extra);          if (extra != NULL)
3062              {
3063              PCRE_FREE_STUDY(extra);
3064              }
3065          fprintf(outfile, "  Study time %.4f milliseconds\n",          fprintf(outfile, "  Study time %.4f milliseconds\n",
3066            (((double)time_taken * 1000.0) / (double)timeit) /            (((double)time_taken * 1000.0) / (double)timeit) /
3067              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
3068          }          }
3069        extra = pcre_study(re, study_options | force_study_options, &error);        PCRE_STUDY(extra, re, study_options | force_study_options, &error);
3070        if (error != NULL)        if (error != NULL)
3071          fprintf(outfile, "Failed to study: %s\n", error);          fprintf(outfile, "Failed to study: %s\n", error);
3072        else if (extra != NULL)        else if (extra != NULL)
3073            {
3074          true_study_size = ((pcre_study_data *)(extra->study_data))->size;          true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3075            if (log_store)
3076              {
3077              size_t jitsize;
3078              if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3079                  jitsize != 0)
3080                fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3081              }
3082            }
3083        }        }
3084    
3085      /* If /K was present, we set up for handling MARK data. */      /* If /K was present, we set up for handling MARK data. */
# Line 1904  while (!done) Line 3095  while (!done)
3095        extra->flags |= PCRE_EXTRA_MARK;        extra->flags |= PCRE_EXTRA_MARK;
3096        }        }
3097    
3098      /* If the 'F' option was present, we flip the bytes of all the integer      /* Extract and display information from the compiled data if required. */
     fields in the regex data block and the study block. This is to make it  
     possible to test PCRE's handling of byte-flipped patterns, e.g. those  
     compiled on a different architecture. */  
   
     if (do_flip)  
       {  
       real_pcre *rre = (real_pcre *)re;  
       rre->magic_number =  
         byteflip(rre->magic_number, sizeof(rre->magic_number));  
       rre->size = byteflip(rre->size, sizeof(rre->size));  
       rre->options = byteflip(rre->options, sizeof(rre->options));  
       rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));  
       rre->top_bracket =  
         (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));  
       rre->top_backref =  
         (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));  
       rre->first_byte =  
         (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));  
       rre->req_byte =  
         (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));  
       rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,  
         sizeof(rre->name_table_offset));  
       rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,  
         sizeof(rre->name_entry_size));  
       rre->name_count = (pcre_uint16)byteflip(rre->name_count,  
         sizeof(rre->name_count));  
   
       if (extra != NULL)  
         {  
         pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);  
         rsd->size = byteflip(rsd->size, sizeof(rsd->size));  
         rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));  
         rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));  
         }  
       }  
   
     /* Extract information from the compiled data if required */  
3099    
3100      SHOW_INFO:      SHOW_INFO:
3101    
3102      if (do_debug)      if (do_debug)
3103        {        {
3104        fprintf(outfile, "------------------------------------------------------------------\n");        fprintf(outfile, "------------------------------------------------------------------\n");
3105        pcre_printint(re, outfile, debug_lengths);        PCRE_PRINTINT(re, outfile, debug_lengths);
3106        }        }
3107    
3108      /* We already have the options in get_options (see above) */      /* We already have the options in get_options (see above) */
# Line 1956  while (!done) Line 3110  while (!done)
3110      if (do_showinfo)      if (do_showinfo)
3111        {        {
3112        unsigned long int all_options;        unsigned long int all_options;
 #if !defined NOINFOCHECK  
       int old_first_char, old_options, old_count;  
 #endif  
3113        int count, backrefmax, first_char, need_char, okpartial, jchanged,        int count, backrefmax, first_char, need_char, okpartial, jchanged,
3114          hascrorlf;          hascrorlf, maxlookbehind;
3115        int nameentrysize, namecount;        int nameentrysize, namecount;
3116        const uschar *nametable;        const pcre_uint8 *nametable;
3117    
3118        new_info(re, NULL, PCRE_INFO_SIZE, &size);        if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3119        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);            new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3120        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);            new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3121        new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);            new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +
3122        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);            new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +
3123        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);            new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3124        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);            new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3125        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);            new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3126        new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);            new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3127        new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);            new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3128        new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);            new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) +
3129              new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind)
3130  #if !defined NOINFOCHECK            != 0)
3131        old_count = pcre_info(re, &old_options, &old_first_char);          goto SKIP_DATA;
       if (count < 0) fprintf(outfile,  
         "Error %d from pcre_info()\n", count);  
       else  
         {  
         if (old_count != count) fprintf(outfile,  
           "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,  
             old_count);  
   
         if (old_first_char != first_char) fprintf(outfile,  
           "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",  
             first_char, old_first_char);  
   
         if (old_options != (int)get_options) fprintf(outfile,  
           "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",  
             get_options, old_options);  
         }  
 #endif  
3132    
3133        if (size != regex_gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
3134          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
# Line 2009  while (!done) Line 3143  while (!done)
3143          fprintf(outfile, "Named capturing subpatterns:\n");          fprintf(outfile, "Named capturing subpatterns:\n");
3144          while (namecount-- > 0)          while (namecount-- > 0)
3145            {            {
3146            fprintf(outfile, "  %s %*s%3d\n", nametable + 2,  #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3147              nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",            int imm2_size = use_pcre16 ? 1 : 2;
3148              GET2(nametable, 0));  #else
3149              int imm2_size = IMM2_SIZE;
3150    #endif
3151              int length = (int)STRLEN(nametable + imm2_size);
3152              fprintf(outfile, "  ");
3153              PCHARSV(nametable, imm2_size, length, outfile);
3154              while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3155    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3156              fprintf(outfile, "%3d\n", use_pcre16?
3157                 (int)(((PCRE_SPTR16)nametable)[0])
3158                :((int)nametable[0] << 8) | (int)nametable[1]);
3159              nametable += nameentrysize * (use_pcre16 ? 2 : 1);
3160    #else
3161              fprintf(outfile, "%3d\n", GET2(nametable, 0));
3162    #ifdef SUPPORT_PCRE8
3163            nametable += nameentrysize;            nametable += nameentrysize;
3164    #else
3165              nametable += nameentrysize * 2;
3166    #endif
3167    #endif
3168            }            }
3169          }          }
3170    
3171        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3172        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3173    
3174        all_options = ((real_pcre *)re)->options;        all_options = ((REAL_PCRE *)re)->options;
3175        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));        if (do_flip) all_options = swap_uint32(all_options);
3176    
3177        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
3178          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
# Line 2036  while (!done) Line 3188  while (!done)
3188            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3189            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3190            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3191            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3192            ((get_options & PCRE_UCP) != 0)? " ucp" : "",            ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3193            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3194            ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",            ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3195            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3196    
# Line 2080  while (!done) Line 3232  while (!done)
3232          }          }
3233        else        else
3234          {          {
3235          int ch = first_char & 255;          const char *caseless =
3236          const char *caseless = ((first_char & REQ_CASELESS) == 0)?            ((((REAL_PCRE *)re)->flags & PCRE_FCH_CASELESS) == 0)?
3237            "" : " (caseless)";            "" : " (caseless)";
3238          if (PRINTHEX(ch))  
3239            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);          if (PRINTOK(first_char))
3240              fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3241          else          else
3242            fprintf(outfile, "First char = %d%s\n", ch, caseless);            {
3243              fprintf(outfile, "First char = ");
3244              pchar(first_char, outfile);
3245              fprintf(outfile, "%s\n", caseless);
3246              }
3247          }          }
3248    
3249        if (need_char < 0)        if (need_char < 0)
# Line 2095  while (!done) Line 3252  while (!done)
3252          }          }
3253        else        else
3254          {          {
3255          int ch = need_char & 255;          const char *caseless =
3256          const char *caseless = ((need_char & REQ_CASELESS) == 0)?            ((((REAL_PCRE *)re)->flags & PCRE_RCH_CASELESS) == 0)?
3257            "" : " (caseless)";            "" : " (caseless)";
3258          if (PRINTHEX(ch))  
3259            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);          if (PRINTOK(need_char))
3260              fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3261          else          else
3262            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            {
3263              fprintf(outfile, "Need char = ");
3264              pchar(need_char, outfile);
3265              fprintf(outfile, "%s\n", caseless);
3266              }
3267          }          }
3268    
3269          if (maxlookbehind > 0)
3270            fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
3271    
3272        /* Don't output study size; at present it is in any case a fixed        /* Don't output study size; at present it is in any case a fixed
3273        value, but it varies, depending on the computer architecture, and        value, but it varies, depending on the computer architecture, and
# Line 2118  while (!done) Line 3283  while (!done)
3283            fprintf(outfile, "Study returned NULL\n");            fprintf(outfile, "Study returned NULL\n");
3284          else          else
3285            {            {
3286            uschar *start_bits = NULL;            pcre_uint8 *start_bits = NULL;
3287            int minlength;            int minlength;
3288    
3289            new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);            if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3290            fprintf(outfile, "Subject length lower bound = %d\n", minlength);              fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3291    
3292            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);            if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
           if (start_bits == NULL)  
             fprintf(outfile, "No set of starting bytes\n");  
           else  
3293              {              {
3294              int i;              if (start_bits == NULL)
3295              int c = 24;                fprintf(outfile, "No set of starting bytes\n");
3296              fprintf(outfile, "Starting byte set: ");              else
             for (i = 0; i < 256; i++)  
3297                {                {
3298                if ((start_bits[i/8] & (1<<(i&7))) != 0)                int i;
3299                  int c = 24;
3300                  fprintf(outfile, "Starting byte set: ");
3301                  for (i = 0; i < 256; i++)
3302                  {                  {
3303                  if (c > 75)                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
                   {  
                   fprintf(outfile, "\n  ");  
                   c = 2;  
                   }  
                 if (PRINTHEX(i) && i != ' ')  
3304                    {                    {
3305                    fprintf(outfile, "%c ", i);                    if (c > 75)
3306                    c += 2;                      {
3307                    }                      fprintf(outfile, "\n  ");
3308                  else                      c = 2;
3309                    {                      }
3310                    fprintf(outfile, "\\x%02x ", i);                    if (PRINTOK(i) && i != ' ')
3311                    c += 5;                      {
3312                        fprintf(outfile, "%c ", i);
3313                        c += 2;
3314                        }
3315                      else
3316                        {
3317                        fprintf(outfile, "\\x%02x ", i);
3318                        c += 5;
3319                        }
3320                    }                    }
3321                  }                  }
3322                  fprintf(outfile, "\n");
3323                }                }
             fprintf(outfile, "\n");  
3324              }              }
3325            }            }
3326    
3327          /* Show this only if the JIT was set by /S, not by -s. */          /* Show this only if the JIT was set by /S, not by -s. */
3328    
3329          if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)          if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3330            {            {
3331            int jit;            int jit;
3332            new_info(re, extra, PCRE_INFO_JIT, &jit);            if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3333            if (jit)              {
3334              fprintf(outfile, "JIT study was successful\n");              if (jit)
3335            else                fprintf(outfile, "JIT study was successful\n");
3336  #ifdef SUPPORT_JIT              else
3337              fprintf(outfile, "JIT study was not successful\n");  #ifdef SUPPORT_JIT
3338                  fprintf(outfile, "JIT study was not successful\n");
3339  #else  #else
3340              fprintf(outfile, "JIT support is not available in this version of PCRE\n");                fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3341  #endif  #endif
3342            }              }
3343              }
3344          }          }
3345        }        }
3346    
# Line 2188  while (!done) Line 3357  while (!done)
3357          }          }
3358        else        else
3359          {          {
3360          uschar sbuf[8];          pcre_uint8 sbuf[8];
3361          sbuf[0] = (uschar)((true_size >> 24) & 255);  
3362          sbuf[1] = (uschar)((true_size >> 16) & 255);          if (do_flip) regexflip(re, extra);
3363          sbuf[2] = (uschar)((true_size >>  8) & 255);          sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
3364          sbuf[3] = (uschar)((true_size) & 255);          sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
3365            sbuf[2] = (pcre_uint8)((true_size >>  8) & 255);
3366          sbuf[4] = (uschar)((true_study_size >> 24) & 255);          sbuf[3] = (pcre_uint8)((true_size) & 255);
3367          sbuf[5] = (uschar)((true_study_size >> 16) & 255);          sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
3368          sbuf[6] = (uschar)((true_study_size >>  8) & 255);          sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
3369          sbuf[7] = (uschar)((true_study_size) & 255);          sbuf[6] = (pcre_uint8)((true_study_size >>  8) & 255);
3370            sbuf[7] = (pcre_uint8)((true_study_size) & 255);
3371    
3372          if (fwrite(sbuf, 1, 8, f) < 8 ||          if (fwrite(sbuf, 1, 8, f) < 8 ||
3373              fwrite(re, 1, true_size, f) < true_size)              fwrite(re, 1, true_size, f) < true_size)
# Line 2225  while (!done) Line 3395  while (!done)
3395          }          }
3396    
3397        new_free(re);        new_free(re);
3398        if (extra != NULL) pcre_free_study(extra);        if (extra != NULL)
3399            {
3400            PCRE_FREE_STUDY(extra);
3401            }
3402        if (locale_set)        if (locale_set)
3403          {          {
3404          new_free((void *)tables);          new_free((void *)tables);
# Line 2240  while (!done) Line 3413  while (!done)
3413    
3414    for (;;)    for (;;)
3415      {      {
3416      uschar *q;      pcre_uint8 *q;
3417      uschar *bptr;      pcre_uint8 *bptr;
3418      int *use_offsets = offsets;      int *use_offsets = offsets;
3419      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
3420      int callout_data = 0;      int callout_data = 0;
# Line 2257  while (!done) Line 3430  while (!done)
3430      int g_notempty = 0;      int g_notempty = 0;
3431      int use_dfa = 0;      int use_dfa = 0;
3432    
     options = 0;  
   
3433      *copynames = 0;      *copynames = 0;
3434      *getnames = 0;      *getnames = 0;
3435    
3436      copynamesptr = copynames;  #ifdef SUPPORT_PCRE16
3437      getnamesptr = getnames;      cn16ptr = copynames;
3438        gn16ptr = getnames;
3439    #endif
3440    #ifdef SUPPORT_PCRE8
3441        cn8ptr = copynames8;
3442        gn8ptr = getnames8;
3443    #endif
3444    
3445      pcre_callout = callout;      SET_PCRE_CALLOUT(callout);
3446      first_callout = 1;      first_callout = 1;
3447      last_callout_mark = NULL;      last_callout_mark = NULL;
3448      callout_extra = 0;      callout_extra = 0;
# Line 2273  while (!done) Line 3450  while (!done)
3450      callout_fail_count = 999999;      callout_fail_count = 999999;
3451      callout_fail_id = -1;      callout_fail_id = -1;
3452      show_malloc = 0;      show_malloc = 0;
3453        options = 0;
3454    
3455      if (extra != NULL) extra->flags &=      if (extra != NULL) extra->flags &=
3456        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
# Line 2308  while (!done) Line 3486  while (!done)
3486        int i = 0;        int i = 0;
3487        int n = 0;        int n = 0;
3488    
3489        if (c == '\\') switch ((c = *p++))        /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
3490          In non-UTF mode, allow the value of the byte to fall through to later,
3491          where values greater than 127 are turned into UTF-8 when running in
3492          16-bit mode. */
3493    
3494          if (c != '\\')
3495            {
3496            if (use_utf)
3497              {
3498              *q++ = c;
3499              continue;
3500              }
3501            }
3502    
3503          /* Handle backslash escapes */
3504    
3505          else switch ((c = *p++))
3506          {          {
3507          case 'a': c =    7; break;          case 'a': c =    7; break;
3508          case 'b': c = '\b'; break;          case 'b': c = '\b'; break;
# Line 2324  while (!done) Line 3518  while (!done)
3518          c -= '0';          c -= '0';
3519          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
3520            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
   
 #if !defined NOUTF8  
         if (use_utf8 && c > 255)  
           {  
           unsigned char buff8[8];  
           int ii, utn;  
           utn = ord2utf8(c, buff8);  
           for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];  
           c = buff8[ii];   /* Last byte */  
           }  
 #endif  
3521          break;          break;
3522    
3523          case 'x':          case 'x':
   
         /* Handle \x{..} specially - new Perl thing for utf8 */  
   
 #if !defined NOUTF8  
3524          if (*p == '{')          if (*p == '{')
3525            {            {
3526            unsigned char *pt = p;            pcre_uint8 *pt = p;
3527            c = 0;            c = 0;
3528            while (isxdigit(*(++pt)))  
3529              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');            /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
3530              when isxdigit() is a macro that refers to its argument more than
3531              once. This is banned by the C Standard, but apparently happens in at
3532              least one MacOS environment. */
3533    
3534              for (pt++; isxdigit(*pt); pt++)
3535                {
3536                if (++i == 9)
3537                  fprintf(outfile, "** Too many hex digits in \\x{...} item; "
3538                                   "using only the first eight.\n");
3539                else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
3540                }
3541            if (*pt == '}')            if (*pt == '}')
3542              {              {
             unsigned char buff8[8];  
             int ii, utn;  
             if (use_utf8)  
               {  
               utn = ord2utf8(c, buff8);  
               for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];  
               c = buff8[ii];   /* Last byte */  
               }  
             else  
              {  
              if (c > 255)  
                fprintf(outfile, "** Character \\x{%x} is greater than 255 and "  
                  "UTF-8 mode is not enabled.\n"  
                  "** Truncation will probably give the wrong result.\n", c);  
              }  
3543              p = pt + 1;              p = pt + 1;
3544              break;              break;
3545              }              }
3546            /* Not correct form; fall through */            /* Not correct form for \x{...}; fall through */
3547            }            }
 #endif  
3548    
3549          /* Ordinary \x */          /* \x without {} always defines just one byte in 8-bit mode. This
3550            allows UTF-8 characters to be constructed byte by byte, and also allows
3551            invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
3552            Otherwise, pass it down to later code so that it can be turned into
3553            UTF-8 when running in 16-bit mode. */
3554    
3555          c = 0;          c = 0;
3556          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
3557            {            {
3558            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
3559            p++;            p++;
3560            }            }
3561            if (use_utf)
3562              {
3563              *q++ = c;
3564              continue;
3565              }
3566          break;          break;
3567    
3568          case 0:   /* \ followed by EOF allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
# Line 2412  while (!done) Line 3595  while (!done)
3595            }            }
3596          else if (isalnum(*p))          else if (isalnum(*p))
3597            {            {
3598            uschar *npp = copynamesptr;            READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, re);
           while (isalnum(*p)) *npp++ = *p++;  
           *npp++ = 0;  
           *npp = 0;  
           n = pcre_get_stringnumber(re, (char *)copynamesptr);  
           if (n < 0)  
             fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);  
           copynamesptr = npp;  
3599            }            }
3600          else if (*p == '+')          else if (*p == '+')
3601            {            {
# Line 2428  while (!done) Line 3604  while (!done)
3604            }            }
3605          else if (*p == '-')          else if (*p == '-')
3606            {            {
3607            pcre_callout = NULL;            SET_PCRE_CALLOUT(NULL);
3608            p++;            p++;
3609            }            }
3610          else if (*p == '!')          else if (*p == '!')
# Line 2482  while (!done) Line 3658  while (!done)
3658            }            }
3659          else if (isalnum(*p))          else if (isalnum(*p))
3660            {            {
3661            uschar *npp = getnamesptr;            READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, re);
           while (isalnum(*p)) *npp++ = *p++;  
           *npp++ = 0;  
           *npp = 0;  
           n = pcre_get_stringnumber(re, (char *)getnamesptr);  
           if (n < 0)  
             fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);  
           getnamesptr = npp;  
3662            }            }
3663          continue;          continue;
3664    
3665          case 'J':          case 'J':
3666          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
3667          if (extra != NULL          if (extra != NULL
3668              && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0              && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
3669              && extra->executable_jit != NULL)              && extra->executable_jit != NULL)
3670            {            {
3671            if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);            if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
3672            jit_stack = pcre_jit_stack_alloc(1, n * 1024);            jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
3673            pcre_assign_jit_stack(extra, jit_callback, jit_stack);            PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
3674            }            }
3675          continue;          continue;
3676    
3677          case 'L':          case 'L':
# Line 2597  while (!done) Line 3766  while (!done)
3766            }            }
3767          continue;          continue;
3768          }          }
3769        *q++ = c;  
3770          /* We now have a character value in c that may be greater than 255. In
3771          16-bit mode, we always convert characters to UTF-8 so that values greater
3772          than 255 can be passed to non-UTF 16-bit strings. In 8-bit mode we
3773          convert to UTF-8 if we are in UTF mode. Values greater than 127 in UTF
3774          mode must have come from \x{...} or octal constructs because values from
3775          \x.. get this far only in non-UTF mode. */
3776    
3777    #if !defined NOUTF || defined SUPPORT_PCRE16
3778          if (use_pcre16 || use_utf)
3779            {
3780            pcre_uint8 buff8[8];
3781            int ii, utn;
3782            utn = ord2utf8(c, buff8);
3783            for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];
3784            }
3785          else
3786    #endif
3787            {
3788            if (c > 255)
3789              {
3790              fprintf(outfile, "** Character \\x{%x} is greater than 255 "
3791                "and UTF-8 mode is not enabled.\n", c);
3792              fprintf(outfile, "** Truncation will probably give the wrong "
3793                "result.\n");
3794              }
3795            *q++ = c;
3796            }
3797        }        }
3798    
3799        /* Reached end of subject string */
3800    
3801      *q = 0;      *q = 0;
3802      len = (int)(q - dbuffer);      len = (int)(q - dbuffer);
3803    
# Line 2660  while (!done) Line 3859  while (!done)
3859            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
3860              {              {
3861              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
3862              (void)pchars(dbuffer + pmatch[i].rm_so,              PCHARSV(dbuffer, pmatch[i].rm_so,
3863                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3864              fprintf(outfile, "\n");              fprintf(outfile, "\n");
3865              if (do_showcaprest || (i == 0 && do_showrest))              if (do_showcaprest || (i == 0 && do_showrest))
3866                {                {
3867                fprintf(outfile, "%2d+ ", (int)i);                fprintf(outfile, "%2d+ ", (int)i);
3868                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,                PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3869                  outfile);                  outfile);
3870                fprintf(outfile, "\n");                fprintf(outfile, "\n");
3871                }                }
# Line 2674  while (!done) Line 3873  while (!done)
3873            }            }
3874          }          }
3875        free(pmatch);        free(pmatch);
3876          goto NEXT_DATA;
3877        }        }
3878    
3879    #endif  /* !defined NOPOSIX */
3880    
3881      /* Handle matching via the native interface - repeats for /g and /G */      /* Handle matching via the native interface - repeats for /g and /G */
3882    
3883      else  #ifdef SUPPORT_PCRE16
3884  #endif  /* !defined NOPOSIX */      if (use_pcre16)
3885          {
3886          len = to16(TRUE, bptr, (((REAL_PCRE *)re)->options) & PCRE_UTF8, len);
3887          switch(len)
3888            {
3889            case -1:
3890            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3891              "converted to UTF-16\n");
3892            goto NEXT_DATA;
3893    
3894            case -2:
3895            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3896              "cannot be converted to UTF-16\n");
3897            goto NEXT_DATA;
3898    
3899            case -3:
3900            fprintf(outfile, "**Failed: character value greater than 0xffff "
3901              "cannot be converted to 16-bit in non-UTF mode\n");
3902            goto NEXT_DATA;
3903    
3904            default:
3905            break;
3906            }
3907          bptr = (pcre_uint8 *)buffer16;
3908          }
3909    #endif
3910    
3911        /* Ensure that there is a JIT callback if we want to verify that JIT was
3912        actually used. If jit_stack == NULL, no stack has yet been assigned. */
3913    
3914        if (verify_jit && jit_stack == NULL && extra != NULL)
3915           { PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack); }
3916    
3917      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
3918        {        {
3919        markptr = NULL;        markptr = NULL;
3920          jit_was_used = FALSE;
3921    
3922        if (timeitm > 0)        if (timeitm > 0)
3923          {          {
# Line 2696  while (!done) Line 3930  while (!done)
3930            {            {
3931            int workspace[1000];            int workspace[1000];
3932            for (i = 0; i < timeitm; i++)            for (i = 0; i < timeitm; i++)
3933              count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,              {
3934                options | g_notempty, use_offsets, use_size_offsets, workspace,              PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3935                sizeof(workspace)/sizeof(int));                (options | g_notempty), use_offsets, use_size_offsets, workspace,
3936                  (sizeof(workspace)/sizeof(int)));
3937                }
3938            }            }
3939          else          else
3940  #endif  #endif
3941    
3942          for (i = 0; i < timeitm; i++)          for (i = 0; i < timeitm; i++)
3943            count = pcre_exec(re, extra, (char *)bptr, len,            {
3944              start_offset, options | g_notempty, use_offsets, use_size_offsets);            PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3945                (options | g_notempty), use_offsets, use_size_offsets);
3946              }
3947          time_taken = clock() - start_time;          time_taken = clock() - start_time;
3948          fprintf(outfile, "Execute time %.4f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
3949            (((double)time_taken * 1000.0) / (double)timeitm) /            (((double)time_taken * 1000.0) / (double)timeitm) /
# Line 2728  while (!done) Line 3965  while (!done)
3965            extra->flags = 0;            extra->flags = 0;
3966            }            }
3967          else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;          else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
3968    
3969          (void)check_match_limit(re, extra, bptr, len, start_offset,          (void)check_match_limit(re, extra, bptr, len, start_offset,
3970            options|g_notempty, use_offsets, use_size_offsets,            options|g_notempty, use_offsets, use_size_offsets,
3971            PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),            PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
# Line 2751  while (!done) Line 3988  while (!done)
3988            }            }
3989          extra->flags |= PCRE_EXTRA_CALLOUT_DATA;          extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
3990          extra->callout_data = &callout_data;          extra->callout_data = &callout_data;
3991          count = pcre_exec(re, extra, (char *)bptr, len, start_offset,          PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3992            options | g_notempty, use_offsets, use_size_offsets);            options | g_notempty, use_offsets, use_size_offsets);
3993          extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;          extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
3994          }          }
# Line 2763  while (!done) Line 4000  while (!done)
4000        else if (all_use_dfa || use_dfa)        else if (all_use_dfa || use_dfa)
4001          {          {
4002          int workspace[1000];          int workspace[1000];
4003          count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,          PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
4004            options | g_notempty, use_offsets, use_size_offsets, workspace,            (options | g_notempty), use_offsets, use_size_offsets, workspace,
4005            sizeof(workspace)/sizeof(int));            (sizeof(workspace)/sizeof(int)));
4006          if (count == 0)          if (count == 0)
4007            {            {
4008            fprintf(outfile, "Matched, but too many subsidiary matches\n");            fprintf(outfile, "Matched, but too many subsidiary matches\n");
# Line 2776  while (!done) Line 4013  while (!done)
4013    
4014        else        else
4015          {          {
4016          count = pcre_exec(re, extra, (char *)bptr, len,          PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4017            start_offset, options | g_notempty, use_offsets, use_size_offsets);            options | g_notempty, use_offsets, use_size_offsets);
4018          if (count == 0)          if (count == 0)
4019            {            {
4020            fprintf(outfile, "Matched, but too many substrings\n");            fprintf(outfile, "Matched, but too many substrings\n");
4021            count = use_size_offsets/3;            count = use_size_offsets/3;
4022            }            }
4023          }          }
4024    
4025        /* Matched */        /* Matched */
4026    
4027        if (count >= 0)        if (count >= 0)
4028          {          {
4029          int i, maxcount;          int i, maxcount;
4030            void *cnptr, *gnptr;
4031    
4032  #if !defined NODFA  #if !defined NODFA
4033          if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else          if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
# Line 2816  while (!done) Line 4054  while (!done)
4054    
4055          if (do_allcaps)          if (do_allcaps)
4056            {            {
4057            new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);            if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
4058                goto SKIP_DATA;
4059            count++;   /* Allow for full match */            count++;   /* Allow for full match */
4060            if (count * 2 > use_size_offsets) count = use_size_offsets/2;            if (count * 2 > use_size_offsets) count = use_size_offsets/2;
4061            }            }
# Line 2838  while (!done) Line 4077  while (!done)
4077            else            else
4078              {              {
4079              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
4080              (void)pchars(bptr + use_offsets[i],              PCHARSV(bptr, use_offsets[i],
4081                use_offsets[i+1] - use_offsets[i], outfile);                use_offsets[i+1] - use_offsets[i], outfile);
4082                if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
4083              fprintf(outfile, "\n");              fprintf(outfile, "\n");
4084              if (do_showcaprest || (i == 0 && do_showrest))              if (do_showcaprest || (i == 0 && do_showrest))
4085                {                {
4086                fprintf(outfile, "%2d+ ", i/2);                fprintf(outfile, "%2d+ ", i/2);
4087                (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],                PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
4088                  outfile);                  outfile);
4089                fprintf(outfile, "\n");                fprintf(outfile, "\n");
4090                }                }
4091              }              }
4092            }            }
4093    
4094          if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);          if (markptr != NULL)
4095              {
4096              fprintf(outfile, "MK: ");
4097              PCHARSV(markptr, 0, -1, outfile);
4098              fprintf(outfile, "\n");
4099              }
4100    
4101          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
4102            {            {
4103            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
4104              {              {
4105                int rc;
4106              char copybuffer[256];              char copybuffer[256];
4107              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,              PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
4108                i, copybuffer, sizeof(copybuffer));                copybuffer, sizeof(copybuffer));
4109              if (rc < 0)              if (rc < 0)
4110                fprintf(outfile, "copy substring %d failed %d\n", i, rc);                fprintf(outfile, "copy substring %d failed %d\n", i, rc);
4111              else              else
4112                fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);                {
4113                  fprintf(outfile, "%2dC ", i);
4114                  PCHARSV(copybuffer, 0, rc, outfile);
4115                  fprintf(outfile, " (%d)\n", rc);
4116                  }
4117              }              }
4118            }            }
4119    
4120          for (copynamesptr = copynames;          cnptr = copynames;
4121               *copynamesptr != 0;          for (;;)
              copynamesptr += (int)strlen((char*)copynamesptr) + 1)  
4122            {            {
4123              int rc;
4124            char copybuffer[256];            char copybuffer[256];
4125            int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,  
4126              count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));            if (use_pcre16)
4127                {
4128                if (*(pcre_uint16 *)cnptr == 0) break;
4129                }
4130              else
4131                {
4132                if (*(pcre_uint8 *)cnptr == 0) break;
4133                }
4134    
4135              PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4136                cnptr, copybuffer, sizeof(copybuffer));
4137    
4138            if (rc < 0)            if (rc < 0)
4139              fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);              {
4140                fprintf(outfile, "copy substring ");
4141                PCHARSV(cnptr, 0, -1, outfile);
4142                fprintf(outfile, " failed %d\n", rc);
4143                }
4144            else            else
4145              fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);              {
4146                fprintf(outfile, "  C ");
4147                PCHARSV(copybuffer, 0, rc, outfile);
4148                fprintf(outfile, " (%d) ", rc);
4149                PCHARSV(cnptr, 0, -1, outfile);
4150                putc('\n', outfile);
4151                }
4152    
4153              cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
4154            }            }
4155    
4156          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
4157            {            {
4158            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
4159              {              {
4160                int rc;
4161              const char *substring;              const char *substring;
4162              int rc = pcre_get_substring((char *)bptr, use_offsets, count,              PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
               i, &substring);  
4163              if (rc < 0)              if (rc < 0)
4164                fprintf(outfile, "get substring %d failed %d\n", i, rc);                fprintf(outfile, "get substring %d failed %d\n", i, rc);
4165              else              else
4166                {                {
4167                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);                fprintf(outfile, "%2dG ", i);
4168                pcre_free_substring(substring);                PCHARSV(substring, 0, rc, outfile);
4169                  fprintf(outfile, " (%d)\n", rc);
4170                  PCRE_FREE_SUBSTRING(substring);
4171                }                }
4172              }              }
4173            }            }
4174    
4175          for (getnamesptr = getnames;          gnptr = getnames;
4176               *getnamesptr != 0;          for (;;)
              getnamesptr += (int)strlen((char*)getnamesptr) + 1)  
4177            {            {
4178              int rc;
4179            const char *substring;            const char *substring;
4180            int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,  
4181              count, (char *)getnamesptr, &substring);            if (use_pcre16)
4182                {
4183                if (*(pcre_uint16 *)gnptr == 0) break;
4184                }
4185              else
4186                {
4187                if (*(pcre_uint8 *)gnptr == 0) break;
4188                }
4189    
4190              PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4191                gnptr, &substring);
4192            if (rc < 0)            if (rc < 0)
4193              fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);              {
4194                fprintf(outfile, "get substring ");
4195                PCHARSV(gnptr, 0, -1, outfile);
4196                fprintf(outfile, " failed %d\n", rc);
4197                }
4198            else            else
4199              {              {
4200              fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);              fprintf(outfile, "  G ");
4201              pcre_free_substring(substring);              PCHARSV(substring, 0, rc, outfile);
4202                fprintf(outfile, " (%d) ", rc);
4203                PCHARSV(gnptr, 0, -1, outfile);
4204                PCRE_FREE_SUBSTRING(substring);
4205                putc('\n', outfile);
4206              }              }
4207    
4208              gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
4209            }            }
4210    
4211          if (getlist)          if (getlist)
4212            {            {
4213              int rc;
4214            const char **stringlist;            const char **stringlist;
4215            int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,            PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
             &stringlist);  
4216            if (rc < 0)            if (rc < 0)
4217              fprintf(outfile, "get substring list failed %d\n", rc);              fprintf(outfile, "get substring list failed %d\n", rc);
4218            else            else
4219              {              {
4220              for (i = 0; i < count; i++)              for (i = 0; i < count; i++)
4221                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);                {
4222                  fprintf(outfile, "%2dL ", i);
4223                  PCHARSV(stringlist[i], 0, -1, outfile);
4224                  putc('\n', outfile);
4225                  }
4226              if (stringlist[i] != NULL)              if (stringlist[i] != NULL)
4227                fprintf(outfile, "string list not terminated by NULL\n");                fprintf(outfile, "string list not terminated by NULL\n");
4228              pcre_free_substring_list(stringlist);              PCRE_FREE_SUBSTRING_LIST(stringlist);
4229              }              }
4230            }            }
4231          }          }
# Line 2936  while (!done) Line 4235  while (!done)
4235        else if (count == PCRE_ERROR_PARTIAL)        else if (count == PCRE_ERROR_PARTIAL)
4236          {          {
4237          if (markptr == NULL) fprintf(outfile, "Partial match");          if (markptr == NULL) fprintf(outfile, "Partial match");
4238            else fprintf(outfile, "Partial match, mark=%s", markptr);          else
4239              {
4240              fprintf(outfile, "Partial match, mark=");
4241              PCHARSV(markptr, 0, -1, outfile);
4242              }
4243          if (use_size_offsets > 1)          if (use_size_offsets > 1)
4244            {            {
4245            fprintf(outfile, ": ");            fprintf(outfile, ": ");
4246            pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],            PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
4247              outfile);              outfile);
4248            }            }
4249            if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
4250          fprintf(outfile, "\n");          fprintf(outfile, "\n");
4251          break;  /* Out of the /g loop */          break;  /* Out of the /g loop */
4252          }          }
# Line 2957  while (!done) Line 4261  while (!done)
4261        terminated by CRLF, an advance of one character just passes the \r,        terminated by CRLF, an advance of one character just passes the \r,
4262        whereas we should prefer the longer newline sequence, as does the code in        whereas we should prefer the longer newline sequence, as does the code in
4263        pcre_exec(). Fudge the offset value to achieve this. We check for a        pcre_exec(). Fudge the offset value to achieve this. We check for a
4264        newline setting in the pattern; if none was set, use pcre_config() to        newline setting in the pattern; if none was set, use PCRE_CONFIG() to
4265        find the default.        find the default.
4266    
4267        Otherwise, in the case of UTF-8 matching, the advance must be one        Otherwise, in the case of UTF-8 matching, the advance must be one
# Line 2968  while (!done) Line 4272  while (!done)
4272          if (g_notempty != 0)          if (g_notempty != 0)
4273            {            {
4274            int onechar = 1;            int onechar = 1;
4275            unsigned int obits = ((real_pcre *)re)->options;            unsigned int obits = ((REAL_PCRE *)re)->options;
4276            use_offsets[0] = start_offset;            use_offsets[0] = start_offset;
4277            if ((obits & PCRE_NEWLINE_BITS) == 0)            if ((obits & PCRE_NEWLINE_BITS) == 0)
4278              {              {
4279              int d;              int d;
4280              (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);              (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
4281              /* Note that these values are always the ASCII ones, even in              /* Note that these values are always the ASCII ones, even in
4282              EBCDIC environments. CR = 13, NL = 10. */              EBCDIC environments. CR = 13, NL = 10. */
4283              obits = (d == 13)? PCRE_NEWLINE_CR :              obits = (d == 13)? PCRE_NEWLINE_CR :
# Line 2987  while (!done) Line 4291  while (!done)
4291                 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)                 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
4292                &&                &&
4293                start_offset < len - 1 &&                start_offset < len - 1 &&
4294                bptr[start_offset] == '\r' &&  #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
4295                bptr[start_offset+1] == '\n')                (use_pcre16?
4296                       ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4297                    && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4298                  :
4299                       bptr[start_offset] == '\r'
4300                    && bptr[start_offset + 1] == '\n')
4301    #elif defined SUPPORT_PCRE16
4302                     ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4303                  && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4304    #else
4305                     bptr[start_offset] == '\r'
4306                  && bptr[start_offset + 1] == '\n'
4307    #endif
4308                  )
4309              onechar++;              onechar++;
4310            else if (use_utf8)            else if (use_utf)
4311              {              {
4312              while (start_offset + onechar < len)              while (start_offset + onechar < len)
4313                {                {
# Line 3007  while (!done) Line 4324  while (!done)
4324              case PCRE_ERROR_NOMATCH:              case PCRE_ERROR_NOMATCH:
4325              if (gmatched == 0)              if (gmatched == 0)
4326                {                {
4327                if (markptr == NULL) fprintf(outfile, "No match\n");                if (markptr == NULL)
4328                  else fprintf(outfile, "No match, mark = %s\n", markptr);                  {
4329                    fprintf(outfile, "No match");
4330                    }
4331                  else
4332                    {
4333                    fprintf(outfile, "No match, mark = ");
4334                    PCHARSV(markptr, 0, -1, outfile);
4335                    }
4336                  if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
4337                  putc('\n', outfile);
4338                }                }
4339              break;              break;
4340    
4341              case PCRE_ERROR_BADUTF8:              case PCRE_ERROR_BADUTF8:
4342              case PCRE_ERROR_SHORTUTF8:              case PCRE_ERROR_SHORTUTF8:
4343              fprintf(outfile, "Error %d (%s UTF-8 string)", count,              fprintf(outfile, "Error %d (%s UTF-%s string)", count,
4344                (count == PCRE_ERROR_BADUTF8)? "bad" : "short");                (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
4345                  use_pcre16? "16" : "8");
4346              if (use_size_offsets >= 2)              if (use_size_offsets >= 2)
4347                fprintf(outfile, " offset=%d reason=%d", use_offsets[0],                fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
4348                  use_offsets[1]);                  use_offsets[1]);
4349              fprintf(outfile, "\n");              fprintf(outfile, "\n");
4350              break;              break;
4351    
4352                case PCRE_ERROR_BADUTF8_OFFSET:
4353                fprintf(outfile, "Error %d (bad UTF-%s offset)\n", count,
4354                  use_pcre16? "16" : "8");
4355                break;
4356    
4357              default:              default:
4358              if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))              if (count < 0 &&
4359                    (-count) < (int)(sizeof(errtexts)/sizeof(const char *)))
4360                fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);                fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
4361              else              else
4362                fprintf(outfile, "Error %d (Unexpected value)\n", count);                fprintf(outfile, "Error %d (Unexpected value)\n", count);
# Line 3061  while (!done) Line 4394  while (!done)
4394    
4395        else        else
4396          {          {
4397          bptr += use_offsets[1];          bptr += use_offsets[1] * CHAR_SIZE;
4398          len -= use_offsets[1];          len -= use_offsets[1];
4399          }          }
4400        }  /* End of loop for /g and /G */        }  /* End of loop for /g and /G */
# Line 3076  while (!done) Line 4409  while (!done)
4409  #endif  #endif
4410    
4411    if (re != NULL) new_free(re);    if (re != NULL) new_free(re);
4412    if (extra != NULL) pcre_free_study(extra);    if (extra != NULL)
4413        {
4414        PCRE_FREE_STUDY(extra);
4415        }
4416    if (locale_set)    if (locale_set)
4417      {      {
4418      new_free((void *)tables);      new_free((void *)tables);
4419      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
4420      locale_set = 0;      locale_set = 0;
4421      }      }
4422    if (jit_stack != NULL)    if (jit_stack != NULL)
4423      {      {
4424      pcre_jit_stack_free(jit_stack);      PCRE_JIT_STACK_FREE(jit_stack);
4425      jit_stack = NULL;      jit_stack = NULL;
4426      }      }
4427    }    }
4428    
4429  if (infile == stdin) fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
# Line 3102  free(dbuffer); Line 4438  free(dbuffer);
4438  free(pbuffer);  free(pbuffer);
4439  free(offsets);  free(offsets);
4440    
4441    #ifdef SUPPORT_PCRE16
4442    if (buffer16 != NULL) free(buffer16);
4443    #endif
4444    
4445  return yield;  return yield;
4446  }  }
4447    

Legend:
Removed from v.689  
changed lines
  Added in v.936

  ViewVC Help
Powered by ViewVC 1.1.5