/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 87 by nigel, Sat Feb 24 21:41:21 2007 UTC revision 1087 by chpe, Tue Oct 16 15:55:38 2012 UTC
# Line 4  Line 4 
4    
5  /* This program was hacked up as a tester for PCRE. I really should have  /* This program was hacked up as a tester for PCRE. I really should have
6  written it more tidily in the first place. Will I ever learn? It has grown and  written it more tidily in the first place. Will I ever learn? It has grown and
7  been extended and consequently is now rather, er, *very* untidy in places.  been extended and consequently is now rather, er, *very* untidy in places. The
8    addition of 16-bit support has made it even worse. :-(
9    
10  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
11  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 35  POSSIBILITY OF SUCH DAMAGE. Line 36  POSSIBILITY OF SUCH DAMAGE.
36  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
37  */  */
38    
39    /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40    libraries in a single program. This is different from the modules such as
41    pcre_compile.c in the library itself, which are compiled separately for each
42    mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43    (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44    compiled only once. Therefore, it must not make use of any of the macros from
45    pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46    however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47    only supported library functions. */
48    
49    #ifdef HAVE_CONFIG_H
50    #include "config.h"
51    #endif
52    
53  #include <ctype.h>  #include <ctype.h>
54  #include <stdio.h>  #include <stdio.h>
# Line 44  POSSIBILITY OF SUCH DAMAGE. Line 58  POSSIBILITY OF SUCH DAMAGE.
58  #include <locale.h>  #include <locale.h>
59  #include <errno.h>  #include <errno.h>
60    
61  #define PCRE_SPY        /* For Win32 build, import data, not export */  /* Both libreadline and libedit are optionally supported. The user-supplied
62    original patch uses readline/readline.h for libedit, but in at least one system
63    it is installed as editline/readline.h, so the configuration code now looks for
64    that first, falling back to readline/readline.h. */
65    
66    #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
67    #ifdef HAVE_UNISTD_H
68    #include <unistd.h>
69    #endif
70    #if defined(SUPPORT_LIBREADLINE)
71    #include <readline/readline.h>
72    #include <readline/history.h>
73    #else
74    #if defined(HAVE_EDITLINE_READLINE_H)
75    #include <editline/readline.h>
76    #else
77    #include <readline/readline.h>
78    #endif
79    #endif
80    #endif
81    
82    /* A number of things vary for Windows builds. Originally, pcretest opened its
83    input and output without "b"; then I was told that "b" was needed in some
84    environments, so it was added for release 5.0 to both the input and output. (It
85    makes no difference on Unix-like systems.) Later I was told that it is wrong
86    for the input on Windows. I've now abstracted the modes into two macros that
87    are set here, to make it easier to fiddle with them, and removed "b" from the
88    input mode under Windows. */
89    
90    #if defined(_WIN32) || defined(WIN32)
91    #include <io.h>                /* For _setmode() */
92    #include <fcntl.h>             /* For _O_BINARY */
93    #define INPUT_MODE   "r"
94    #define OUTPUT_MODE  "wb"
95    
96    #ifndef isatty
97    #define isatty _isatty         /* This is what Windows calls them, I'm told, */
98    #endif                         /* though in some environments they seem to   */
99                                   /* be already defined, hence the #ifndefs.    */
100    #ifndef fileno
101    #define fileno _fileno
102    #endif
103    
104    /* A user sent this fix for Borland Builder 5 under Windows. */
105    
106    #ifdef __BORLANDC__
107    #define _setmode(handle, mode) setmode(handle, mode)
108    #endif
109    
110    /* Not Windows */
111    
112  /* We include pcre_internal.h because we need the internal info for displaying  #else
113  the results of pcre_study() and we also need to know about the internal  #include <sys/time.h>          /* These two includes are needed */
114  macros, structures, and other internal data values; pcretest has "inside  #include <sys/resource.h>      /* for setrlimit(). */
115  information" compared to a program that strictly follows the PCRE API. */  #if defined NATIVE_ZOS         /* z/OS uses non-binary I/O */
116    #define INPUT_MODE   "r"
117    #define OUTPUT_MODE  "w"
118    #else
119    #define INPUT_MODE   "rb"
120    #define OUTPUT_MODE  "wb"
121    #endif
122    #endif
123    
124    #define PRIV(name) name
125    
126    /* We have to include pcre_internal.h because we need the internal info for
127    displaying the results of pcre_study() and we also need to know about the
128    internal macros, structures, and other internal data values; pcretest has
129    "inside information" compared to a program that strictly follows the PCRE API.
130    
131    Although pcre_internal.h does itself include pcre.h, we explicitly include it
132    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
133    appropriately for an application, not for building PCRE. */
134    
135    #include "pcre.h"
136    
137    #if defined SUPPORT_PCRE32 && !defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE16
138    /* Configure internal macros to 32 bit mode. */
139    #define COMPILE_PCRE32
140    #endif
141    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE32
142    /* Configure internal macros to 16 bit mode. */
143    #define COMPILE_PCRE16
144    #endif
145    #if defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE32
146    /* Configure internal macros to 16 bit mode. */
147    #define COMPILE_PCRE8
148    #endif
149    
150  #include "pcre_internal.h"  #include "pcre_internal.h"
151    
152  /* We need access to the data tables that PCRE uses. So as not to have to keep  /* The pcre_printint() function, which prints the internal form of a compiled
153  two copies, we include the source file here, changing the names of the external  regex, is held in a separate file so that (a) it can be compiled in either
154  symbols to prevent clashes. */  8-, 16- or 32-bit mode, and (b) it can be #included directly in pcre_compile.c
155    when that is compiled in debug mode. */
156  #define _pcre_utf8_table1      utf8_table1  
157  #define _pcre_utf8_table1_size utf8_table1_size  #ifdef SUPPORT_PCRE8
158  #define _pcre_utf8_table2      utf8_table2  void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
159  #define _pcre_utf8_table3      utf8_table3  #endif
160  #define _pcre_utf8_table4      utf8_table4  #ifdef SUPPORT_PCRE16
161  #define _pcre_utt              utt  void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
162  #define _pcre_utt_size         utt_size  #endif
163  #define _pcre_OP_lengths       OP_lengths  #ifdef SUPPORT_PCRE32
164    void pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths);
165    #endif
166    
167    /* We need access to some of the data tables that PCRE uses. So as not to have
168    to keep two copies, we include the source files here, changing the names of the
169    external symbols to prevent clashes. */
170    
171    #define PCRE_INCLUDED
172    
173  #include "pcre_tables.c"  #include "pcre_tables.c"
174    #include "pcre_ucd.c"
175    
176    /* The definition of the macro PRINTABLE, which determines whether to print an
177    output character as-is or as a hex value when showing compiled patterns, is
178    the same as in the printint.src file. We uses it here in cases when the locale
179    has not been explicitly changed, so as to get consistent output from systems
180    that differ in their output from isprint() even in the "C" locale. */
181    
182  /* We also need the pcre_printint() function for printing out compiled  #ifdef EBCDIC
183  patterns. This function is in a separate file so that it can be included in  #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
184  pcre_compile.c when that module is compiled with debugging enabled. */  #else
185    #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
186    #endif
187    
188  #include "pcre_printint.src"  #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
189    
190    /* Posix support is disabled in 16 or 32 bit only mode. */
191    #if !defined SUPPORT_PCRE8 && !defined NOPOSIX
192    #define NOPOSIX
193    #endif
194    
195  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
196  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 83  Makefile. */ Line 200  Makefile. */
200  #include "pcreposix.h"  #include "pcreposix.h"
201  #endif  #endif
202    
203  /* It is also possible, for the benefit of the version imported into Exim, to  /* It is also possible, originally for the benefit of a version that was
204  build pcretest without support for UTF8 (define NOUTF8), without the interface  imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
205  to the DFA matcher (NODFA), and without the doublecheck of the old "info"  NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
206  function (define NOINFOCHECK). */  automatically cut out the UTF support if PCRE is built without it. */
207    
208    #ifndef SUPPORT_UTF
209    #ifndef NOUTF
210    #define NOUTF
211    #endif
212    #endif
213    
214    /* To make the code a bit tidier for 8/16/32-bit support, we define macros
215    for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
216    only from one place and is handled differently). I couldn't dream up any way of
217    using a single macro to do this in a generic way, because of the many different
218    argument requirements. We know that at least one of SUPPORT_PCRE8 and
219    SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
220    use these in the definitions of generic macros.
221    
222    **** Special note about the PCHARSxxx macros: the address of the string to be
223    printed is always given as two arguments: a base address followed by an offset.
224    The base address is cast to the correct data size for 8 or 16 bit data; the
225    offset is in units of this size. If the string were given as base+offset in one
226    argument, the casting might be incorrectly applied. */
227    
228    #ifdef SUPPORT_PCRE8
229    
230    #define PCHARS8(lv, p, offset, len, f) \
231      lv = pchars((pcre_uint8 *)(p) + offset, len, f)
232    
233    #define PCHARSV8(p, offset, len, f) \
234      (void)pchars((pcre_uint8 *)(p) + offset, len, f)
235    
236    #define READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re) \
237      p = read_capture_name8(p, cn8, re)
238    
239    #define STRLEN8(p) ((int)strlen((char *)p))
240    
241    #define SET_PCRE_CALLOUT8(callout) \
242      pcre_callout = callout
243    
244    #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
245       pcre_assign_jit_stack(extra, callback, userdata)
246    
247    #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
248      re = pcre_compile((char *)pat, options, error, erroffset, tables)
249    
250    #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
251        namesptr, cbuffer, size) \
252      rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
253        (char *)namesptr, cbuffer, size)
254    
255    #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
256      rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
257    
258    #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
259        offsets, size_offsets, workspace, size_workspace) \
260      count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
261        offsets, size_offsets, workspace, size_workspace)
262    
263    #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
264        offsets, size_offsets) \
265      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
266        offsets, size_offsets)
267    
268    #define PCRE_FREE_STUDY8(extra) \
269      pcre_free_study(extra)
270    
271    #define PCRE_FREE_SUBSTRING8(substring) \
272      pcre_free_substring(substring)
273    
274    #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
275      pcre_free_substring_list(listptr)
276    
277    #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
278        getnamesptr, subsptr) \
279      rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
280        (char *)getnamesptr, subsptr)
281    
282    #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
283      n = pcre_get_stringnumber(re, (char *)ptr)
284    
285    #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
286      rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
287    
288    #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
289      rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
290    
291    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
292      rc = pcre_pattern_to_host_byte_order(re, extra, tables)
293    
294    #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
295      pcre_printint(re, outfile, debug_lengths)
296    
297    #define PCRE_STUDY8(extra, re, options, error) \
298      extra = pcre_study(re, options, error)
299    
300    #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
301      pcre_jit_stack_alloc(startsize, maxsize)
302    
303    #define PCRE_JIT_STACK_FREE8(stack) \
304      pcre_jit_stack_free(stack)
305    
306    #endif /* SUPPORT_PCRE8 */
307    
308    /* -----------------------------------------------------------*/
309    
310    #ifdef SUPPORT_PCRE16
311    
312    #define PCHARS16(lv, p, offset, len, f) \
313      lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
314    
315    #define PCHARSV16(p, offset, len, f) \
316      (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
317    
318    #define READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re) \
319      p = read_capture_name16(p, cn16, re)
320    
321    #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
322    
323    #define SET_PCRE_CALLOUT16(callout) \
324      pcre16_callout = (int (*)(pcre16_callout_block *))callout
325    
326    #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
327      pcre16_assign_jit_stack((pcre16_extra *)extra, \
328        (pcre16_jit_callback)callback, userdata)
329    
330    #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
331      re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
332        tables)
333    
334    #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
335        namesptr, cbuffer, size) \
336      rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
337        count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
338    
339    #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
340      rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
341        (PCRE_UCHAR16 *)cbuffer, size/2)
342    
343    #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
344        offsets, size_offsets, workspace, size_workspace) \
345      count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
346        (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
347        workspace, size_workspace)
348    
349    #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
350        offsets, size_offsets) \
351      count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
352        len, start_offset, options, offsets, size_offsets)
353    
354    #define PCRE_FREE_STUDY16(extra) \
355      pcre16_free_study((pcre16_extra *)extra)
356    
357    #define PCRE_FREE_SUBSTRING16(substring) \
358      pcre16_free_substring((PCRE_SPTR16)substring)
359    
360    #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
361      pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
362    
363    #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
364        getnamesptr, subsptr) \
365      rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
366        count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
367    
368    #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
369      n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
370    
371    #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
372      rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
373        (PCRE_SPTR16 *)(void*)subsptr)
374    
375    #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
376      rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
377        (PCRE_SPTR16 **)(void*)listptr)
378    
379    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
380      rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
381        tables)
382    
383    #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
384      pcre16_printint(re, outfile, debug_lengths)
385    
386    #define PCRE_STUDY16(extra, re, options, error) \
387      extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
388    
389    #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
390      (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
391    
392    #define PCRE_JIT_STACK_FREE16(stack) \
393      pcre16_jit_stack_free((pcre16_jit_stack *)stack)
394    
395    #endif /* SUPPORT_PCRE16 */
396    
397    /* -----------------------------------------------------------*/
398    
399    #ifdef SUPPORT_PCRE32
400    
401    #define PCHARS32(lv, p, offset, len, f) \
402      lv = pchars32((PCRE_SPTR32)(p) + offset, len, f)
403    
404    #define PCHARSV32(p, offset, len, f) \
405      (void)pchars32((PCRE_SPTR32)(p) + offset, len, f)
406    
407    #define READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re) \
408      p = read_capture_name32(p, cn32, re)
409    
410    #define STRLEN32(p) ((int)strlen32((PCRE_SPTR32)p))
411    
412    #define SET_PCRE_CALLOUT32(callout) \
413      pcre32_callout = (int (*)(pcre32_callout_block *))callout
414    
415    #define PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata) \
416      pcre32_assign_jit_stack((pcre32_extra *)extra, \
417        (pcre32_jit_callback)callback, userdata)
418    
419    #define PCRE_COMPILE32(re, pat, options, error, erroffset, tables) \
420      re = (pcre *)pcre32_compile((PCRE_SPTR32)pat, options, error, erroffset, \
421        tables)
422    
423    #define PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
424        namesptr, cbuffer, size) \
425      rc = pcre32_copy_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
426        count, (PCRE_SPTR32)namesptr, (PCRE_UCHAR32 *)cbuffer, size/2)
427    
428    #define PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size) \
429      rc = pcre32_copy_substring((PCRE_SPTR32)bptr, offsets, count, i, \
430        (PCRE_UCHAR32 *)cbuffer, size/2)
431    
432    #define PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
433        offsets, size_offsets, workspace, size_workspace) \
434      count = pcre32_dfa_exec((pcre32 *)re, (pcre32_extra *)extra, \
435        (PCRE_SPTR32)bptr, len, start_offset, options, offsets, size_offsets, \
436        workspace, size_workspace)
437    
438    #define PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
439        offsets, size_offsets) \
440      count = pcre32_exec((pcre32 *)re, (pcre32_extra *)extra, (PCRE_SPTR32)bptr, \
441        len, start_offset, options, offsets, size_offsets)
442    
443    #define PCRE_FREE_STUDY32(extra) \
444      pcre32_free_study((pcre32_extra *)extra)
445    
446    #define PCRE_FREE_SUBSTRING32(substring) \
447      pcre32_free_substring((PCRE_SPTR32)substring)
448    
449    #define PCRE_FREE_SUBSTRING_LIST32(listptr) \
450      pcre32_free_substring_list((PCRE_SPTR32 *)listptr)
451    
452    #define PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
453        getnamesptr, subsptr) \
454      rc = pcre32_get_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
455        count, (PCRE_SPTR32)getnamesptr, (PCRE_SPTR32 *)(void*)subsptr)
456    
457    #define PCRE_GET_STRINGNUMBER32(n, rc, ptr) \
458      n = pcre32_get_stringnumber(re, (PCRE_SPTR32)ptr)
459    
460    #define PCRE_GET_SUBSTRING32(rc, bptr, offsets, count, i, subsptr) \
461      rc = pcre32_get_substring((PCRE_SPTR32)bptr, offsets, count, i, \
462        (PCRE_SPTR32 *)(void*)subsptr)
463    
464    #define PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr) \
465      rc = pcre32_get_substring_list((PCRE_SPTR32)bptr, offsets, count, \
466        (PCRE_SPTR32 **)(void*)listptr)
467    
468    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables) \
469      rc = pcre32_pattern_to_host_byte_order((pcre32 *)re, (pcre32_extra *)extra, \
470        tables)
471    
472    #define PCRE_PRINTINT32(re, outfile, debug_lengths) \
473      pcre32_printint(re, outfile, debug_lengths)
474    
475    #define PCRE_STUDY32(extra, re, options, error) \
476      extra = (pcre_extra *)pcre32_study((pcre32 *)re, options, error)
477    
478    #define PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
479      (pcre_jit_stack *)pcre32_jit_stack_alloc(startsize, maxsize)
480    
481    #define PCRE_JIT_STACK_FREE32(stack) \
482      pcre32_jit_stack_free((pcre32_jit_stack *)stack)
483    
484    #endif /* SUPPORT_PCRE32 */
485    
486    
487    /* ----- Both modes are supported; a runtime test is needed, except for
488    pcre_config(), and the JIT stack functions, when it doesn't matter which
489    version is called. ----- */
490    
491    enum {
492      PCRE8_MODE,
493      PCRE16_MODE,
494      PCRE32_MODE
495    };
496    
497    #if (defined (SUPPORT_PCRE8) + defined (SUPPORT_PCRE16) + defined (SUPPORT_PCRE32)) >= 2
498    
499    #define CHAR_SIZE (1 << pcre_mode)
500    
501    #define PCHARS(lv, p, offset, len, f) \
502      if (pcre_mode == PCRE32_MODE) \
503        PCHARS32(lv, p, offset, len, f); \
504      else if (pcre_mode == PCRE16_MODE) \
505        PCHARS16(lv, p, offset, len, f); \
506      else \
507        PCHARS8(lv, p, offset, len, f)
508    
509    #define PCHARSV(p, offset, len, f) \
510      if (pcre_mode == PCRE32_MODE) \
511        PCHARSV32(p, offset, len, f); \
512      else if (pcre_mode == PCRE16_MODE) \
513        PCHARSV16(p, offset, len, f); \
514      else \
515        PCHARSV8(p, offset, len, f)
516    
517    #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
518      if (pcre_mode == PCRE32_MODE) \
519        READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re); \
520      else if (pcre_mode == PCRE16_MODE) \
521        READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re); \
522      else \
523        READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re)
524    
525    #define SET_PCRE_CALLOUT(callout) \
526      if (pcre_mode == PCRE32_MODE) \
527        SET_PCRE_CALLOUT32(callout); \
528      else if (pcre_mode == PCRE16_MODE) \
529        SET_PCRE_CALLOUT16(callout); \
530      else \
531        SET_PCRE_CALLOUT8(callout)
532    
533    #define STRLEN(p) (pcre_mode == PCRE32_MODE ? STRLEN32(p) : pcre_mode == PCRE16_MODE ? STRLEN16(p) : STRLEN8(p))
534    
535    #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
536      if (pcre_mode == PCRE32_MODE) \
537        PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata); \
538      else if (pcre_mode == PCRE16_MODE) \
539        PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
540      else \
541        PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
542    
543    #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
544      if (pcre_mode == PCRE32_MODE) \
545        PCRE_COMPILE32(re, pat, options, error, erroffset, tables); \
546      else if (pcre_mode == PCRE16_MODE) \
547        PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
548      else \
549        PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
550    
551    #define PCRE_CONFIG pcre_config
552    
553    #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
554        namesptr, cbuffer, size) \
555      if (pcre_mode == PCRE32_MODE) \
556        PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
557          namesptr, cbuffer, size); \
558      else if (pcre_mode == PCRE16_MODE) \
559        PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
560          namesptr, cbuffer, size); \
561      else \
562        PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
563          namesptr, cbuffer, size)
564    
565    #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
566      if (pcre_mode == PCRE32_MODE) \
567        PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size); \
568      else if (pcre_mode == PCRE16_MODE) \
569        PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
570      else \
571        PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
572    
573    #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
574        offsets, size_offsets, workspace, size_workspace) \
575      if (pcre_mode == PCRE32_MODE) \
576        PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
577          offsets, size_offsets, workspace, size_workspace); \
578      else if (pcre_mode == PCRE16_MODE) \
579        PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
580          offsets, size_offsets, workspace, size_workspace); \
581      else \
582        PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
583          offsets, size_offsets, workspace, size_workspace)
584    
585    #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
586        offsets, size_offsets) \
587      if (pcre_mode == PCRE32_MODE) \
588        PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
589          offsets, size_offsets); \
590      else if (pcre_mode == PCRE16_MODE) \
591        PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
592          offsets, size_offsets); \
593      else \
594        PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
595          offsets, size_offsets)
596    
597    #define PCRE_FREE_STUDY(extra) \
598      if (pcre_mode == PCRE32_MODE) \
599        PCRE_FREE_STUDY32(extra); \
600      else if (pcre_mode == PCRE16_MODE) \
601        PCRE_FREE_STUDY16(extra); \
602      else \
603        PCRE_FREE_STUDY8(extra)
604    
605    #define PCRE_FREE_SUBSTRING(substring) \
606      if (pcre_mode == PCRE32_MODE) \
607        PCRE_FREE_SUBSTRING32(substring); \
608      else if (pcre_mode == PCRE16_MODE) \
609        PCRE_FREE_SUBSTRING16(substring); \
610      else \
611        PCRE_FREE_SUBSTRING8(substring)
612    
613    #define PCRE_FREE_SUBSTRING_LIST(listptr) \
614      if (pcre_mode == PCRE32_MODE) \
615        PCRE_FREE_SUBSTRING_LIST32(listptr); \
616      else if (pcre_mode == PCRE16_MODE) \
617        PCRE_FREE_SUBSTRING_LIST16(listptr); \
618      else \
619        PCRE_FREE_SUBSTRING_LIST8(listptr)
620    
621    #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
622        getnamesptr, subsptr) \
623      if (pcre_mode == PCRE32_MODE) \
624        PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
625          getnamesptr, subsptr); \
626      else if (pcre_mode == PCRE16_MODE) \
627        PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
628          getnamesptr, subsptr); \
629      else \
630        PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
631          getnamesptr, subsptr)
632    
633    #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
634      if (pcre_mode == PCRE32_MODE) \
635        PCRE_GET_STRINGNUMBER32(n, rc, ptr); \
636      else if (pcre_mode == PCRE16_MODE) \
637        PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
638      else \
639        PCRE_GET_STRINGNUMBER8(n, rc, ptr)
640    
641    #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
642      if (pcre_mode == PCRE32_MODE) \
643        PCRE_GET_SUBSTRING32(rc, bptr, use_offsets, count, i, subsptr); \
644      else if (pcre_mode == PCRE16_MODE) \
645        PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
646      else \
647        PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
648    
649    #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
650      if (pcre_mode == PCRE32_MODE) \
651        PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr); \
652      else if (pcre_mode == PCRE16_MODE) \
653        PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
654      else \
655        PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
656    
657    #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
658      (pcre_mode == PCRE32_MODE ? \
659         PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
660        : pcre_mode == PCRE16_MODE ? \
661          PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
662          : PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
663    
664    #define PCRE_JIT_STACK_FREE(stack) \
665      if (pcre_mode == PCRE32_MODE) \
666        PCRE_JIT_STACK_FREE32(stack); \
667      else if (pcre_mode == PCRE16_MODE) \
668        PCRE_JIT_STACK_FREE16(stack); \
669      else \
670        PCRE_JIT_STACK_FREE8(stack)
671    
672    #define PCRE_MAKETABLES \
673      (pcre_mode == PCRE32_MODE ? pcre32_maketables() : pcre_mode == PCRE16_MODE ? pcre16_maketables() : pcre_maketables())
674    
675    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
676      if (pcre_mode == PCRE32_MODE) \
677        PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables); \
678      else if (pcre_mode == PCRE16_MODE) \
679        PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
680      else \
681        PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
682    
683    #define PCRE_PRINTINT(re, outfile, debug_lengths) \
684      if (pcre_mode == PCRE32_MODE) \
685        PCRE_PRINTINT32(re, outfile, debug_lengths); \
686      else if (pcre_mode == PCRE16_MODE) \
687        PCRE_PRINTINT16(re, outfile, debug_lengths); \
688      else \
689        PCRE_PRINTINT8(re, outfile, debug_lengths)
690    
691    #define PCRE_STUDY(extra, re, options, error) \
692      if (pcre_mode == PCRE32_MODE) \
693        PCRE_STUDY32(extra, re, options, error); \
694      else if (pcre_mode == PCRE16_MODE) \
695        PCRE_STUDY16(extra, re, options, error); \
696      else \
697        PCRE_STUDY8(extra, re, options, error)
698    
699    /* ----- Only 8-bit mode is supported ----- */
700    
701    #elif defined SUPPORT_PCRE8
702    #define CHAR_SIZE                 1
703    #define PCHARS                    PCHARS8
704    #define PCHARSV                   PCHARSV8
705    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME8
706    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT8
707    #define STRLEN                    STRLEN8
708    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK8
709    #define PCRE_COMPILE              PCRE_COMPILE8
710    #define PCRE_CONFIG               pcre_config
711    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
712    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING8
713    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC8
714    #define PCRE_EXEC                 PCRE_EXEC8
715    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY8
716    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING8
717    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST8
718    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING8
719    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER8
720    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING8
721    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST8
722    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC8
723    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE8
724    #define PCRE_MAKETABLES           pcre_maketables()
725    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
726    #define PCRE_PRINTINT             PCRE_PRINTINT8
727    #define PCRE_STUDY                PCRE_STUDY8
728    
729    /* ----- Only 16-bit mode is supported ----- */
730    
731    #elif defined SUPPORT_PCRE16
732    #define CHAR_SIZE                 2
733    #define PCHARS                    PCHARS16
734    #define PCHARSV                   PCHARSV16
735    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME16
736    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT16
737    #define STRLEN                    STRLEN16
738    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK16
739    #define PCRE_COMPILE              PCRE_COMPILE16
740    #define PCRE_CONFIG               pcre16_config
741    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
742    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING16
743    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC16
744    #define PCRE_EXEC                 PCRE_EXEC16
745    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY16
746    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING16
747    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST16
748    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING16
749    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER16
750    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING16
751    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST16
752    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC16
753    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE16
754    #define PCRE_MAKETABLES           pcre16_maketables()
755    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
756    #define PCRE_PRINTINT             PCRE_PRINTINT16
757    #define PCRE_STUDY                PCRE_STUDY16
758    
759    /* ----- Only 32-bit mode is supported ----- */
760    
761    #elif defined SUPPORT_PCRE32
762    #define CHAR_SIZE                 4
763    #define PCHARS                    PCHARS32
764    #define PCHARSV                   PCHARSV32
765    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME32
766    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT32
767    #define STRLEN                    STRLEN32
768    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK32
769    #define PCRE_COMPILE              PCRE_COMPILE32
770    #define PCRE_CONFIG               pcre32_config
771    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING32
772    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING32
773    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC32
774    #define PCRE_EXEC                 PCRE_EXEC32
775    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY32
776    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING32
777    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST32
778    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING32
779    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER32
780    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING32
781    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST32
782    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC32
783    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE32
784    #define PCRE_MAKETABLES           pcre32_maketables()
785    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER32
786    #define PCRE_PRINTINT             PCRE_PRINTINT32
787    #define PCRE_STUDY                PCRE_STUDY32
788    
789    #endif
790    
791    /* ----- End of mode-specific function call macros ----- */
792    
793    
794  /* Other parameters */  /* Other parameters */
# Line 99  function (define NOINFOCHECK). */ Line 801  function (define NOINFOCHECK). */
801  #endif  #endif
802  #endif  #endif
803    
804  #define LOOPREPEAT 500000  #if !defined NODFA
805    #define DFA_WS_DIMENSION 1000
806    #endif
807    
808  #define BUFFER_SIZE 30000  /* This is the default loop count for timing. */
 #define PBUFFER_SIZE BUFFER_SIZE  
 #define DBUFFER_SIZE BUFFER_SIZE  
809    
810    #define LOOPREPEAT 500000
811    
812  /* Static variables */  /* Static variables */
813    
# Line 114  static int callout_count; Line 817  static int callout_count;
817  static int callout_extra;  static int callout_extra;
818  static int callout_fail_count;  static int callout_fail_count;
819  static int callout_fail_id;  static int callout_fail_id;
820    static int debug_lengths;
821  static int first_callout;  static int first_callout;
822    static int jit_was_used;
823    static int locale_set = 0;
824  static int show_malloc;  static int show_malloc;
825  static int use_utf8;  static int use_utf;
826  static size_t gotten_store;  static size_t gotten_store;
827    static size_t first_gotten_store = 0;
828    static const unsigned char *last_callout_mark = NULL;
829    
830    /* The buffers grow automatically if very long input lines are encountered. */
831    
832    static int buffer_size = 50000;
833    static pcre_uint8 *buffer = NULL;
834    static pcre_uint8 *dbuffer = NULL;
835    static pcre_uint8 *pbuffer = NULL;
836    
837    /* Another buffer is needed translation to 16/32-bit character strings. It will
838    obtained and extended as required. */
839    
840  static uschar *pbuffer = NULL;  #if defined SUPPORT_PCRE8 && (defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32)
841    
842    /* We need the table of operator lengths that is used for 16/32-bit compiling, in
843    order to swap bytes in a pattern for saving/reloading testing. Luckily, the
844    data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
845    appropriately for the 16/32-bit world. Just as a safety check, make sure that
846    COMPILE_PCRE[16|32] is *not* set. */
847    
848    #ifdef COMPILE_PCRE16
849    #error COMPILE_PCRE16 must not be set when compiling pcretest.c
850    #endif
851    
852    #ifdef COMPILE_PCRE32
853    #error COMPILE_PCRE32 must not be set when compiling pcretest.c
854    #endif
855    
856    #if LINK_SIZE == 2
857    #undef LINK_SIZE
858    #define LINK_SIZE 1
859    #elif LINK_SIZE == 3 || LINK_SIZE == 4
860    #undef LINK_SIZE
861    #define LINK_SIZE 2
862    #else
863    #error LINK_SIZE must be either 2, 3, or 4
864    #endif
865    
866    #undef IMM2_SIZE
867    #define IMM2_SIZE 1
868    
869    #endif /* SUPPORT_PCRE8 && (SUPPORT_PCRE16 || SUPPORT_PCRE32) */
870    
871    #ifdef SUPPORT_PCRE16
872    static int buffer16_size = 0;
873    static pcre_uint16 *buffer16 = NULL;
874    static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
875    #endif  /* SUPPORT_PCRE16 */
876    
877    #ifdef SUPPORT_PCRE32
878    static int buffer32_size = 0;
879    static pcre_uint32 *buffer32 = NULL;
880    static const pcre_uint32 OP_lengths32[] = { OP_LENGTHS };
881    #endif  /* SUPPORT_PCRE32 */
882    
883    /* If we have 8-bit support, default to it; if there is also
884    16-or 32-bit support, it can be changed by an option. If there is no 8-bit support,
885    there must be 16-or 32-bit support, so default it to 1. */
886    
887    #if defined SUPPORT_PCRE8
888    static int pcre_mode = PCRE8_MODE;
889    #elif defined SUPPORT_PCRE16
890    static int pcre_mode = PCRE16_MODE;
891    #elif defined SUPPORT_PCRE32
892    static int pcre_mode = PCRE32_MODE;
893    #endif
894    
895    /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
896    
897    static int jit_study_bits[] =
898      {
899      PCRE_STUDY_JIT_COMPILE,
900      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
901      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
902      PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
903      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
904      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
905      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
906        PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
907    };
908    
909    #define PCRE_STUDY_ALLJIT (PCRE_STUDY_JIT_COMPILE | \
910      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)
911    
912    /* Textual explanations for runtime error codes */
913    
914    static const char *errtexts[] = {
915      NULL,  /* 0 is no error */
916      NULL,  /* NOMATCH is handled specially */
917      "NULL argument passed",
918      "bad option value",
919      "magic number missing",
920      "unknown opcode - pattern overwritten?",
921      "no more memory",
922      NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */
923      "match limit exceeded",
924      "callout error code",
925      NULL,  /* BADUTF8/16 is handled specially */
926      NULL,  /* BADUTF8/16 offset is handled specially */
927      NULL,  /* PARTIAL is handled specially */
928      "not used - internal error",
929      "internal error - pattern overwritten?",
930      "bad count value",
931      "item unsupported for DFA matching",
932      "backreference condition or recursion test not supported for DFA matching",
933      "match limit not supported for DFA matching",
934      "workspace size exceeded in DFA matching",
935      "too much recursion for DFA matching",
936      "recursion limit exceeded",
937      "not used - internal error",
938      "invalid combination of newline options",
939      "bad offset value",
940      NULL,  /* SHORTUTF8/16 is handled specially */
941      "nested recursion at the same subject position",
942      "JIT stack limit reached",
943      "pattern compiled in wrong mode: 8-bit/16-bit error",
944      "pattern compiled with other endianness",
945      "invalid data in workspace for DFA restart"
946    };
947    
948    
949  /*************************************************  /*************************************************
950  *          Read number from string               *  *         Alternate character tables             *
951  *************************************************/  *************************************************/
952    
953  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess  /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
954  around with conditional compilation, just do the job by hand. It is only used  using the default tables of the library. However, the T option can be used to
955  for unpicking the -o argument, so just keep it simple.  select alternate sets of tables, for different kinds of testing. Note also that
956    the L (locale) option also adjusts the tables. */
957    
958    /* This is the set of tables distributed as default with PCRE. It recognizes
959    only ASCII characters. */
960    
961    static const pcre_uint8 tables0[] = {
962    
963    /* This table is a lower casing table. */
964    
965        0,  1,  2,  3,  4,  5,  6,  7,
966        8,  9, 10, 11, 12, 13, 14, 15,
967       16, 17, 18, 19, 20, 21, 22, 23,
968       24, 25, 26, 27, 28, 29, 30, 31,
969       32, 33, 34, 35, 36, 37, 38, 39,
970       40, 41, 42, 43, 44, 45, 46, 47,
971       48, 49, 50, 51, 52, 53, 54, 55,
972       56, 57, 58, 59, 60, 61, 62, 63,
973       64, 97, 98, 99,100,101,102,103,
974      104,105,106,107,108,109,110,111,
975      112,113,114,115,116,117,118,119,
976      120,121,122, 91, 92, 93, 94, 95,
977       96, 97, 98, 99,100,101,102,103,
978      104,105,106,107,108,109,110,111,
979      112,113,114,115,116,117,118,119,
980      120,121,122,123,124,125,126,127,
981      128,129,130,131,132,133,134,135,
982      136,137,138,139,140,141,142,143,
983      144,145,146,147,148,149,150,151,
984      152,153,154,155,156,157,158,159,
985      160,161,162,163,164,165,166,167,
986      168,169,170,171,172,173,174,175,
987      176,177,178,179,180,181,182,183,
988      184,185,186,187,188,189,190,191,
989      192,193,194,195,196,197,198,199,
990      200,201,202,203,204,205,206,207,
991      208,209,210,211,212,213,214,215,
992      216,217,218,219,220,221,222,223,
993      224,225,226,227,228,229,230,231,
994      232,233,234,235,236,237,238,239,
995      240,241,242,243,244,245,246,247,
996      248,249,250,251,252,253,254,255,
997    
998    /* This table is a case flipping table. */
999    
1000        0,  1,  2,  3,  4,  5,  6,  7,
1001        8,  9, 10, 11, 12, 13, 14, 15,
1002       16, 17, 18, 19, 20, 21, 22, 23,
1003       24, 25, 26, 27, 28, 29, 30, 31,
1004       32, 33, 34, 35, 36, 37, 38, 39,
1005       40, 41, 42, 43, 44, 45, 46, 47,
1006       48, 49, 50, 51, 52, 53, 54, 55,
1007       56, 57, 58, 59, 60, 61, 62, 63,
1008       64, 97, 98, 99,100,101,102,103,
1009      104,105,106,107,108,109,110,111,
1010      112,113,114,115,116,117,118,119,
1011      120,121,122, 91, 92, 93, 94, 95,
1012       96, 65, 66, 67, 68, 69, 70, 71,
1013       72, 73, 74, 75, 76, 77, 78, 79,
1014       80, 81, 82, 83, 84, 85, 86, 87,
1015       88, 89, 90,123,124,125,126,127,
1016      128,129,130,131,132,133,134,135,
1017      136,137,138,139,140,141,142,143,
1018      144,145,146,147,148,149,150,151,
1019      152,153,154,155,156,157,158,159,
1020      160,161,162,163,164,165,166,167,
1021      168,169,170,171,172,173,174,175,
1022      176,177,178,179,180,181,182,183,
1023      184,185,186,187,188,189,190,191,
1024      192,193,194,195,196,197,198,199,
1025      200,201,202,203,204,205,206,207,
1026      208,209,210,211,212,213,214,215,
1027      216,217,218,219,220,221,222,223,
1028      224,225,226,227,228,229,230,231,
1029      232,233,234,235,236,237,238,239,
1030      240,241,242,243,244,245,246,247,
1031      248,249,250,251,252,253,254,255,
1032    
1033    /* This table contains bit maps for various character classes. Each map is 32
1034    bytes long and the bits run from the least significant end of each byte. The
1035    classes that have their own maps are: space, xdigit, digit, upper, lower, word,
1036    graph, print, punct, and cntrl. Other classes are built from combinations. */
1037    
1038      0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
1039      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1040      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1041      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1042    
1043      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1044      0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
1045      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1046      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1047    
1048      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1049      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1050      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1051      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1052    
1053      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1054      0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
1055      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1056      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1057    
1058      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1059      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
1060      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1061      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1062    
1063      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1064      0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
1065      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1066      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1067    
1068      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
1069      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1070      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1071      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1072    
1073      0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
1074      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1075      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1076      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1077    
1078      0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
1079      0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
1080      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1081      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1082    
1083      0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
1084      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
1085      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1086      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1087    
1088    /* This table identifies various classes of character by individual bits:
1089      0x01   white space character
1090      0x02   letter
1091      0x04   decimal digit
1092      0x08   hexadecimal digit
1093      0x10   alphanumeric or '_'
1094      0x80   regular expression metacharacter or binary zero
1095    */
1096    
1097  Arguments:    0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
1098    str           string to be converted    0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
1099    endptr        where to put the end pointer    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
1100      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
1101      0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
1102      0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
1103      0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
1104      0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
1105      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
1106      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
1107      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
1108      0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
1109      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
1110      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
1111      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
1112      0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
1113      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
1114      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
1115      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
1116      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
1117      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
1118      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
1119      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
1120      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
1121      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
1122      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
1123      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
1124      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
1125      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
1126      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
1127      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
1128      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
1129    
1130    /* This is a set of tables that came orginally from a Windows user. It seems to
1131    be at least an approximation of ISO 8859. In particular, there are characters
1132    greater than 128 that are marked as spaces, letters, etc. */
1133    
1134    static const pcre_uint8 tables1[] = {
1135    0,1,2,3,4,5,6,7,
1136    8,9,10,11,12,13,14,15,
1137    16,17,18,19,20,21,22,23,
1138    24,25,26,27,28,29,30,31,
1139    32,33,34,35,36,37,38,39,
1140    40,41,42,43,44,45,46,47,
1141    48,49,50,51,52,53,54,55,
1142    56,57,58,59,60,61,62,63,
1143    64,97,98,99,100,101,102,103,
1144    104,105,106,107,108,109,110,111,
1145    112,113,114,115,116,117,118,119,
1146    120,121,122,91,92,93,94,95,
1147    96,97,98,99,100,101,102,103,
1148    104,105,106,107,108,109,110,111,
1149    112,113,114,115,116,117,118,119,
1150    120,121,122,123,124,125,126,127,
1151    128,129,130,131,132,133,134,135,
1152    136,137,138,139,140,141,142,143,
1153    144,145,146,147,148,149,150,151,
1154    152,153,154,155,156,157,158,159,
1155    160,161,162,163,164,165,166,167,
1156    168,169,170,171,172,173,174,175,
1157    176,177,178,179,180,181,182,183,
1158    184,185,186,187,188,189,190,191,
1159    224,225,226,227,228,229,230,231,
1160    232,233,234,235,236,237,238,239,
1161    240,241,242,243,244,245,246,215,
1162    248,249,250,251,252,253,254,223,
1163    224,225,226,227,228,229,230,231,
1164    232,233,234,235,236,237,238,239,
1165    240,241,242,243,244,245,246,247,
1166    248,249,250,251,252,253,254,255,
1167    0,1,2,3,4,5,6,7,
1168    8,9,10,11,12,13,14,15,
1169    16,17,18,19,20,21,22,23,
1170    24,25,26,27,28,29,30,31,
1171    32,33,34,35,36,37,38,39,
1172    40,41,42,43,44,45,46,47,
1173    48,49,50,51,52,53,54,55,
1174    56,57,58,59,60,61,62,63,
1175    64,97,98,99,100,101,102,103,
1176    104,105,106,107,108,109,110,111,
1177    112,113,114,115,116,117,118,119,
1178    120,121,122,91,92,93,94,95,
1179    96,65,66,67,68,69,70,71,
1180    72,73,74,75,76,77,78,79,
1181    80,81,82,83,84,85,86,87,
1182    88,89,90,123,124,125,126,127,
1183    128,129,130,131,132,133,134,135,
1184    136,137,138,139,140,141,142,143,
1185    144,145,146,147,148,149,150,151,
1186    152,153,154,155,156,157,158,159,
1187    160,161,162,163,164,165,166,167,
1188    168,169,170,171,172,173,174,175,
1189    176,177,178,179,180,181,182,183,
1190    184,185,186,187,188,189,190,191,
1191    224,225,226,227,228,229,230,231,
1192    232,233,234,235,236,237,238,239,
1193    240,241,242,243,244,245,246,215,
1194    248,249,250,251,252,253,254,223,
1195    192,193,194,195,196,197,198,199,
1196    200,201,202,203,204,205,206,207,
1197    208,209,210,211,212,213,214,247,
1198    216,217,218,219,220,221,222,255,
1199    0,62,0,0,1,0,0,0,
1200    0,0,0,0,0,0,0,0,
1201    32,0,0,0,1,0,0,0,
1202    0,0,0,0,0,0,0,0,
1203    0,0,0,0,0,0,255,3,
1204    126,0,0,0,126,0,0,0,
1205    0,0,0,0,0,0,0,0,
1206    0,0,0,0,0,0,0,0,
1207    0,0,0,0,0,0,255,3,
1208    0,0,0,0,0,0,0,0,
1209    0,0,0,0,0,0,12,2,
1210    0,0,0,0,0,0,0,0,
1211    0,0,0,0,0,0,0,0,
1212    254,255,255,7,0,0,0,0,
1213    0,0,0,0,0,0,0,0,
1214    255,255,127,127,0,0,0,0,
1215    0,0,0,0,0,0,0,0,
1216    0,0,0,0,254,255,255,7,
1217    0,0,0,0,0,4,32,4,
1218    0,0,0,128,255,255,127,255,
1219    0,0,0,0,0,0,255,3,
1220    254,255,255,135,254,255,255,7,
1221    0,0,0,0,0,4,44,6,
1222    255,255,127,255,255,255,127,255,
1223    0,0,0,0,254,255,255,255,
1224    255,255,255,255,255,255,255,127,
1225    0,0,0,0,254,255,255,255,
1226    255,255,255,255,255,255,255,255,
1227    0,2,0,0,255,255,255,255,
1228    255,255,255,255,255,255,255,127,
1229    0,0,0,0,255,255,255,255,
1230    255,255,255,255,255,255,255,255,
1231    0,0,0,0,254,255,0,252,
1232    1,0,0,248,1,0,0,120,
1233    0,0,0,0,254,255,255,255,
1234    0,0,128,0,0,0,128,0,
1235    255,255,255,255,0,0,0,0,
1236    0,0,0,0,0,0,0,128,
1237    255,255,255,255,0,0,0,0,
1238    0,0,0,0,0,0,0,0,
1239    128,0,0,0,0,0,0,0,
1240    0,1,1,0,1,1,0,0,
1241    0,0,0,0,0,0,0,0,
1242    0,0,0,0,0,0,0,0,
1243    1,0,0,0,128,0,0,0,
1244    128,128,128,128,0,0,128,0,
1245    28,28,28,28,28,28,28,28,
1246    28,28,0,0,0,0,0,128,
1247    0,26,26,26,26,26,26,18,
1248    18,18,18,18,18,18,18,18,
1249    18,18,18,18,18,18,18,18,
1250    18,18,18,128,128,0,128,16,
1251    0,26,26,26,26,26,26,18,
1252    18,18,18,18,18,18,18,18,
1253    18,18,18,18,18,18,18,18,
1254    18,18,18,128,128,0,0,0,
1255    0,0,0,0,0,1,0,0,
1256    0,0,0,0,0,0,0,0,
1257    0,0,0,0,0,0,0,0,
1258    0,0,0,0,0,0,0,0,
1259    1,0,0,0,0,0,0,0,
1260    0,0,18,0,0,0,0,0,
1261    0,0,20,20,0,18,0,0,
1262    0,20,18,0,0,0,0,0,
1263    18,18,18,18,18,18,18,18,
1264    18,18,18,18,18,18,18,18,
1265    18,18,18,18,18,18,18,0,
1266    18,18,18,18,18,18,18,18,
1267    18,18,18,18,18,18,18,18,
1268    18,18,18,18,18,18,18,18,
1269    18,18,18,18,18,18,18,0,
1270    18,18,18,18,18,18,18,18
1271    };
1272    
1273  Returns:        the unsigned long  
1274    
1275    
1276    #ifndef HAVE_STRERROR
1277    /*************************************************
1278    *     Provide strerror() for non-ANSI libraries  *
1279    *************************************************/
1280    
1281    /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1282    in their libraries, but can provide the same facility by this simple
1283    alternative function. */
1284    
1285    extern int   sys_nerr;
1286    extern char *sys_errlist[];
1287    
1288    char *
1289    strerror(int n)
1290    {
1291    if (n < 0 || n >= sys_nerr) return "unknown error number";
1292    return sys_errlist[n];
1293    }
1294    #endif /* HAVE_STRERROR */
1295    
1296    
1297    
1298    /*************************************************
1299    *       Print newline configuration              *
1300    *************************************************/
1301    
1302    /*
1303    Arguments:
1304      rc         the return code from PCRE_CONFIG_NEWLINE
1305      isc        TRUE if called from "-C newline"
1306    Returns:     nothing
1307  */  */
1308    
1309  static int  static void
1310  get_value(unsigned char *str, unsigned char **endptr)  print_newline_config(int rc, BOOL isc)
1311  {  {
1312  int result = 0;  const char *s = NULL;
1313  while(*str != 0 && isspace(*str)) str++;  if (!isc) printf("  Newline sequence is ");
1314  while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');  switch(rc)
1315  *endptr = str;    {
1316  return(result);    case CHAR_CR: s = "CR"; break;
1317      case CHAR_LF: s = "LF"; break;
1318      case (CHAR_CR<<8 | CHAR_LF): s = "CRLF"; break;
1319      case -1: s = "ANY"; break;
1320      case -2: s = "ANYCRLF"; break;
1321    
1322      default:
1323      printf("a non-standard value: 0x%04x\n", rc);
1324      return;
1325      }
1326    
1327    printf("%s\n", s);
1328  }  }
1329    
1330    
1331    
1332    /*************************************************
1333    *         JIT memory callback                    *
1334    *************************************************/
1335    
1336    static pcre_jit_stack* jit_callback(void *arg)
1337    {
1338    jit_was_used = TRUE;
1339    return (pcre_jit_stack *)arg;
1340    }
1341    
1342    
1343    #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
1344  /*************************************************  /*************************************************
1345  *            Convert UTF-8 string to value       *  *            Convert UTF-8 string to value       *
1346  *************************************************/  *************************************************/
# Line 159  return(result); Line 1349  return(result);
1349  and returns the value of the character.  and returns the value of the character.
1350    
1351  Argument:  Argument:
1352    buffer   a pointer to the byte vector    utf8bytes   a pointer to the byte vector
1353    vptr     a pointer to an int to receive the value    vptr        a pointer to an int to receive the value
1354    
1355  Returns:   >  0 => the number of bytes consumed  Returns:      >  0 => the number of bytes consumed
1356             -6 to 0 => malformed UTF-8 character at offset = (-return)                -6 to 0 => malformed UTF-8 character at offset = (-return)
1357  */  */
1358    
 #if !defined NOUTF8  
   
1359  static int  static int
1360  utf82ord(unsigned char *buffer, int *vptr)  utf82ord(pcre_uint8 *utf8bytes, pcre_uint32 *vptr)
1361  {  {
1362  int c = *buffer++;  pcre_uint32 c = *utf8bytes++;
1363  int d = c;  pcre_uint32 d = c;
1364  int i, j, s;  int i, j, s;
1365    
1366  for (i = -1; i < 6; i++)               /* i is number of additional bytes */  for (i = -1; i < 6; i++)               /* i is number of additional bytes */
# Line 191  d = (c & utf8_table3[i]) << s; Line 1379  d = (c & utf8_table3[i]) << s;
1379    
1380  for (j = 0; j < i; j++)  for (j = 0; j < i; j++)
1381    {    {
1382    c = *buffer++;    c = *utf8bytes++;
1383    if ((c & 0xc0) != 0x80) return -(j+1);    if ((c & 0xc0) != 0x80) return -(j+1);
1384    s -= 6;    s -= 6;
1385    d |= (c & 0x3f) << s;    d |= (c & 0x3f) << s;
# Line 208  if (j != i) return -(i+1); Line 1396  if (j != i) return -(i+1);
1396  *vptr = d;  *vptr = d;
1397  return i+1;  return i+1;
1398  }  }
1399    #endif /* NOUTF || SUPPORT_PCRE16 */
 #endif  
1400    
1401    
1402    
1403    #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
1404  /*************************************************  /*************************************************
1405  *       Convert character value to UTF-8         *  *       Convert character value to UTF-8         *
1406  *************************************************/  *************************************************/
# Line 222  and encodes it as a UTF-8 character in 0 Line 1410  and encodes it as a UTF-8 character in 0
1410    
1411  Arguments:  Arguments:
1412    cvalue     the character value    cvalue     the character value
1413    buffer     pointer to buffer for result - at least 6 bytes long    utf8bytes  pointer to buffer for result - at least 6 bytes long
1414    
1415  Returns:     number of characters placed in the buffer  Returns:     number of characters placed in the buffer
1416  */  */
1417    
1418  static int  static int
1419  ord2utf8(int cvalue, uschar *buffer)  ord2utf8(pcre_uint32 cvalue, pcre_uint8 *utf8bytes)
1420  {  {
1421  register int i, j;  register int i, j;
1422    if (cvalue > 0x7fffffffu)
1423      return -1;
1424  for (i = 0; i < utf8_table1_size; i++)  for (i = 0; i < utf8_table1_size; i++)
1425    if (cvalue <= utf8_table1[i]) break;    if (cvalue <= utf8_table1[i]) break;
1426  buffer += i;  utf8bytes += i;
1427  for (j = i; j > 0; j--)  for (j = i; j > 0; j--)
1428   {   {
1429   *buffer-- = 0x80 | (cvalue & 0x3f);   *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1430   cvalue >>= 6;   cvalue >>= 6;
1431   }   }
1432  *buffer = utf8_table2[i] | cvalue;  *utf8bytes = utf8_table2[i] | cvalue;
1433  return i + 1;  return i + 1;
1434  }  }
1435    #endif
1436    
1437    
1438    #ifdef SUPPORT_PCRE16
1439    /*************************************************
1440    *         Convert a string to 16-bit             *
1441    *************************************************/
1442    
1443    /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1444    8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1445    double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1446    in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1447    result is always left in buffer16.
1448    
1449    Note that this function does not object to surrogate values. This is
1450    deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1451    for the purpose of testing that they are correctly faulted.
1452    
1453    Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1454    in UTF-8 so that values greater than 255 can be handled.
1455    
1456    Arguments:
1457      data       TRUE if converting a data line; FALSE for a regex
1458      p          points to a byte string
1459      utf        true if UTF-8 (to be converted to UTF-16)
1460      len        number of bytes in the string (excluding trailing zero)
1461    
1462    Returns:     number of 16-bit data items used (excluding trailing zero)
1463                 OR -1 if a UTF-8 string is malformed
1464                 OR -2 if a value > 0x10ffff is encountered
1465                 OR -3 if a value > 0xffff is encountered when not in UTF mode
1466    */
1467    
1468    static int
1469    to16(int data, pcre_uint8 *p, int utf, int len)
1470    {
1471    pcre_uint16 *pp;
1472    
1473    if (buffer16_size < 2*len + 2)
1474      {
1475      if (buffer16 != NULL) free(buffer16);
1476      buffer16_size = 2*len + 2;
1477      buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1478      if (buffer16 == NULL)
1479        {
1480        fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1481        exit(1);
1482        }
1483      }
1484    
1485    pp = buffer16;
1486    
1487    if (!utf && !data)
1488      {
1489      while (len-- > 0) *pp++ = *p++;
1490      }
1491    
1492    else
1493      {
1494      pcre_uint32 c = 0;
1495      while (len > 0)
1496        {
1497        int chlen = utf82ord(p, &c);
1498        if (chlen <= 0) return -1;
1499        if (c > 0x10ffff) return -2;
1500        p += chlen;
1501        len -= chlen;
1502        if (c < 0x10000) *pp++ = c; else
1503          {
1504          if (!utf) return -3;
1505          c -= 0x10000;
1506          *pp++ = 0xD800 | (c >> 10);
1507          *pp++ = 0xDC00 | (c & 0x3ff);
1508          }
1509        }
1510      }
1511    
1512    *pp = 0;
1513    return pp - buffer16;
1514    }
1515    #endif
1516    
1517    #ifdef SUPPORT_PCRE32
1518    /*************************************************
1519    *         Convert a string to 32-bit             *
1520    *************************************************/
1521    
1522    /* In non-UTF mode, the space needed for a 32-bit string is exactly four times the
1523    8-bit size. For a UTF-8 string, the size needed for UTF-32 is no more than four
1524    times, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1525    in UTF-32. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-32. The
1526    result is always left in buffer32.
1527    
1528    Note that this function does not object to surrogate values. This is
1529    deliberate; it makes it possible to construct UTF-32 strings that are invalid,
1530    for the purpose of testing that they are correctly faulted.
1531    
1532    Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1533    in UTF-8 so that values greater than 255 can be handled.
1534    
1535    Arguments:
1536      data       TRUE if converting a data line; FALSE for a regex
1537      p          points to a byte string
1538      utf        true if UTF-8 (to be converted to UTF-32)
1539      len        number of bytes in the string (excluding trailing zero)
1540    
1541    Returns:     number of 32-bit data items used (excluding trailing zero)
1542                 OR -1 if a UTF-8 string is malformed
1543                 OR -2 if a value > 0x10ffff is encountered
1544                 OR -3 if an ill-formed value is encountered (i.e. a surrogate)
1545    */
1546    
1547    static int
1548    to32(int data, pcre_uint8 *p, int utf, int len)
1549    {
1550    pcre_uint32 *pp;
1551    
1552    if (buffer32_size < 4*len + 4)
1553      {
1554      if (buffer32 != NULL) free(buffer32);
1555      buffer32_size = 4*len + 4;
1556      buffer32 = (pcre_uint32 *)malloc(buffer32_size);
1557      if (buffer32 == NULL)
1558        {
1559        fprintf(stderr, "pcretest: malloc(%d) failed for buffer32\n", buffer32_size);
1560        exit(1);
1561        }
1562      }
1563    
1564    pp = buffer32;
1565    
1566    if (!utf && !data)
1567      {
1568      while (len-- > 0) *pp++ = *p++;
1569      }
1570    
1571    else
1572      {
1573      pcre_uint32 c = 0;
1574      while (len > 0)
1575        {
1576        int chlen = utf82ord(p, &c);
1577        if (chlen <= 0) return -1;
1578        if (utf)
1579          {
1580          if (c > 0x10ffff) return -2;
1581          if (!data && (c & 0xfffff800u) == 0xd800u) return -3;
1582          }
1583    
1584        p += chlen;
1585        len -= chlen;
1586        *pp++ = c;
1587        }
1588      }
1589    
1590    *pp = 0;
1591    return pp - buffer32;
1592    }
1593    #endif
1594    
1595  /*************************************************  /*************************************************
1596  *             Print character string             *  *        Read or extend an input line            *
1597  *************************************************/  *************************************************/
1598    
1599  /* Character string printing function. Must handle UTF-8 strings in utf8  /* Input lines are read into buffer, but both patterns and data lines can be
1600  mode. Yields number of characters printed. If handed a NULL file, just counts  continued over multiple input lines. In addition, if the buffer fills up, we
1601  chars without printing. */  want to automatically expand it so as to be able to handle extremely large
1602    lines that are needed for certain stress tests. When the input buffer is
1603    expanded, the other two buffers must also be expanded likewise, and the
1604    contents of pbuffer, which are a copy of the input for callouts, must be
1605    preserved (for when expansion happens for a data line). This is not the most
1606    optimal way of handling this, but hey, this is just a test program!
1607    
1608    Arguments:
1609      f            the file to read
1610      start        where in buffer to start (this *must* be within buffer)
1611      prompt       for stdin or readline()
1612    
1613    Returns:       pointer to the start of new data
1614                   could be a copy of start, or could be moved
1615                   NULL if no data read and EOF reached
1616    */
1617    
1618  static int pchars(unsigned char *p, int length, FILE *f)  static pcre_uint8 *
1619    extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1620  {  {
1621  int c = 0;  pcre_uint8 *here = start;
1622    
1623    for (;;)
1624      {
1625      size_t rlen = (size_t)(buffer_size - (here - buffer));
1626    
1627      if (rlen > 1000)
1628        {
1629        int dlen;
1630    
1631        /* If libreadline or libedit support is required, use readline() to read a
1632        line if the input is a terminal. Note that readline() removes the trailing
1633        newline, so we must put it back again, to be compatible with fgets(). */
1634    
1635    #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1636        if (isatty(fileno(f)))
1637          {
1638          size_t len;
1639          char *s = readline(prompt);
1640          if (s == NULL) return (here == start)? NULL : start;
1641          len = strlen(s);
1642          if (len > 0) add_history(s);
1643          if (len > rlen - 1) len = rlen - 1;
1644          memcpy(here, s, len);
1645          here[len] = '\n';
1646          here[len+1] = 0;
1647          free(s);
1648          }
1649        else
1650    #endif
1651    
1652        /* Read the next line by normal means, prompting if the file is stdin. */
1653    
1654          {
1655          if (f == stdin) printf("%s", prompt);
1656          if (fgets((char *)here, rlen,  f) == NULL)
1657            return (here == start)? NULL : start;
1658          }
1659    
1660        dlen = (int)strlen((char *)here);
1661        if (dlen > 0 && here[dlen - 1] == '\n') return start;
1662        here += dlen;
1663        }
1664    
1665      else
1666        {
1667        int new_buffer_size = 2*buffer_size;
1668        pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1669        pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1670        pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1671    
1672        if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1673          {
1674          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1675          exit(1);
1676          }
1677    
1678        memcpy(new_buffer, buffer, buffer_size);
1679        memcpy(new_pbuffer, pbuffer, buffer_size);
1680    
1681        buffer_size = new_buffer_size;
1682    
1683        start = new_buffer + (start - buffer);
1684        here = new_buffer + (here - buffer);
1685    
1686        free(buffer);
1687        free(dbuffer);
1688        free(pbuffer);
1689    
1690        buffer = new_buffer;
1691        dbuffer = new_dbuffer;
1692        pbuffer = new_pbuffer;
1693        }
1694      }
1695    
1696    return NULL;  /* Control never gets here */
1697    }
1698    
1699    
1700    
1701    /*************************************************
1702    *          Read number from string               *
1703    *************************************************/
1704    
1705    /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1706    around with conditional compilation, just do the job by hand. It is only used
1707    for unpicking arguments, so just keep it simple.
1708    
1709    Arguments:
1710      str           string to be converted
1711      endptr        where to put the end pointer
1712    
1713    Returns:        the unsigned long
1714    */
1715    
1716    static int
1717    get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1718    {
1719    int result = 0;
1720    while(*str != 0 && isspace(*str)) str++;
1721    while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1722    *endptr = str;
1723    return(result);
1724    }
1725    
1726    
1727    
1728    /*************************************************
1729    *             Print one character                *
1730    *************************************************/
1731    
1732    /* Print a single character either literally, or as a hex escape. */
1733    
1734    static int pchar(pcre_uint32 c, FILE *f)
1735    {
1736    int n;
1737    if (PRINTOK(c))
1738      {
1739      if (f != NULL) fprintf(f, "%c", c);
1740      return 1;
1741      }
1742    
1743    if (c < 0x100)
1744      {
1745      if (use_utf)
1746        {
1747        if (f != NULL) fprintf(f, "\\x{%02x}", c);
1748        return 6;
1749        }
1750      else
1751        {
1752        if (f != NULL) fprintf(f, "\\x%02x", c);
1753        return 4;
1754        }
1755      }
1756    
1757    if (f != NULL) n = fprintf(f, "\\x{%02x}", c);
1758    return n >= 0 ? n : 0;
1759    }
1760    
1761    
1762    
1763    #ifdef SUPPORT_PCRE8
1764    /*************************************************
1765    *         Print 8-bit character string           *
1766    *************************************************/
1767    
1768    /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1769    If handed a NULL file, just counts chars without printing. */
1770    
1771    static int pchars(pcre_uint8 *p, int length, FILE *f)
1772    {
1773    pcre_uint32 c = 0;
1774  int yield = 0;  int yield = 0;
1775    
1776    if (length < 0)
1777      length = strlen((char *)p);
1778    
1779  while (length-- > 0)  while (length-- > 0)
1780    {    {
1781  #if !defined NOUTF8  #if !defined NOUTF
1782    if (use_utf8)    if (use_utf)
1783      {      {
1784      int rc = utf82ord(p, &c);      int rc = utf82ord(p, &c);
   
1785      if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */      if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
1786        {        {
1787        length -= rc - 1;        length -= rc - 1;
1788        p += rc;        p += rc;
1789        if (c < 256 && isprint(c))        yield += pchar(c, f);
         {  
         if (f != NULL) fprintf(f, "%c", c);  
         yield++;  
         }  
       else  
         {  
         int n;  
         if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);  
         yield += n;  
         }  
1790        continue;        continue;
1791        }        }
1792      }      }
1793  #endif  #endif
1794      c = *p++;
1795      yield += pchar(c, f);
1796      }
1797    
1798    return yield;
1799    }
1800    #endif
1801    
    /* Not UTF-8, or malformed UTF-8  */  
1802    
1803    if (isprint(c = *(p++)))  
1804      {  #ifdef SUPPORT_PCRE16
1805      if (f != NULL) fprintf(f, "%c", c);  /*************************************************
1806      yield++;  *    Find length of 0-terminated 16-bit string   *
1807      }  *************************************************/
1808    else  
1809    static int strlen16(PCRE_SPTR16 p)
1810    {
1811    int len = 0;
1812    while (*p++ != 0) len++;
1813    return len;
1814    }
1815    #endif  /* SUPPORT_PCRE16 */
1816    
1817    
1818    
1819    #ifdef SUPPORT_PCRE32
1820    /*************************************************
1821    *    Find length of 0-terminated 32-bit string   *
1822    *************************************************/
1823    
1824    static int strlen32(PCRE_SPTR32 p)
1825    {
1826    int len = 0;
1827    while (*p++ != 0) len++;
1828    return len;
1829    }
1830    #endif  /* SUPPORT_PCRE32 */
1831    
1832    
1833    
1834    #ifdef SUPPORT_PCRE16
1835    /*************************************************
1836    *           Print 16-bit character string        *
1837    *************************************************/
1838    
1839    /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1840    If handed a NULL file, just counts chars without printing. */
1841    
1842    static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1843    {
1844    int yield = 0;
1845    
1846    if (length < 0)
1847      length = strlen16(p);
1848    
1849    while (length-- > 0)
1850      {
1851      pcre_uint32 c = *p++ & 0xffff;
1852    #if !defined NOUTF
1853      if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1854      {      {
1855      if (f != NULL) fprintf(f, "\\x%02x", c);      int d = *p & 0xffff;
1856      yield += 4;      if (d >= 0xDC00 && d < 0xDFFF)
1857          {
1858          c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1859          length--;
1860          p++;
1861          }
1862      }      }
1863    #endif
1864      yield += pchar(c, f);
1865      }
1866    
1867    return yield;
1868    }
1869    #endif  /* SUPPORT_PCRE16 */
1870    
1871    
1872    
1873    #ifdef SUPPORT_PCRE32
1874    /*************************************************
1875    *           Print 32-bit character string        *
1876    *************************************************/
1877    
1878    /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
1879    If handed a NULL file, just counts chars without printing. */
1880    
1881    static int pchars32(PCRE_SPTR32 p, int length, FILE *f)
1882    {
1883    int yield = 0;
1884    
1885    if (length < 0)
1886      length = strlen32(p);
1887    
1888    while (length-- > 0)
1889      {
1890      pcre_uint32 c = *p++;
1891      yield += pchar(c, f);
1892    }    }
1893    
1894  return yield;  return yield;
1895  }  }
1896    #endif  /* SUPPORT_PCRE32 */
1897    
1898    
1899    
1900    #ifdef SUPPORT_PCRE8
1901    /*************************************************
1902    *     Read a capture name (8-bit) and check it   *
1903    *************************************************/
1904    
1905    static pcre_uint8 *
1906    read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1907    {
1908    pcre_uint8 *npp = *pp;
1909    while (isalnum(*p)) *npp++ = *p++;
1910    *npp++ = 0;
1911    *npp = 0;
1912    if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1913      {
1914      fprintf(outfile, "no parentheses with name \"");
1915      PCHARSV(*pp, 0, -1, outfile);
1916      fprintf(outfile, "\"\n");
1917      }
1918    
1919    *pp = npp;
1920    return p;
1921    }
1922    #endif  /* SUPPORT_PCRE8 */
1923    
1924    
1925    
1926    #ifdef SUPPORT_PCRE16
1927    /*************************************************
1928    *     Read a capture name (16-bit) and check it  *
1929    *************************************************/
1930    
1931    /* Note that the text being read is 8-bit. */
1932    
1933    static pcre_uint8 *
1934    read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1935    {
1936    pcre_uint16 *npp = *pp;
1937    while (isalnum(*p)) *npp++ = *p++;
1938    *npp++ = 0;
1939    *npp = 0;
1940    if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
1941      {
1942      fprintf(outfile, "no parentheses with name \"");
1943      PCHARSV(*pp, 0, -1, outfile);
1944      fprintf(outfile, "\"\n");
1945      }
1946    *pp = npp;
1947    return p;
1948    }
1949    #endif  /* SUPPORT_PCRE16 */
1950    
1951    
1952    
1953    #ifdef SUPPORT_PCRE32
1954    /*************************************************
1955    *     Read a capture name (32-bit) and check it  *
1956    *************************************************/
1957    
1958    /* Note that the text being read is 8-bit. */
1959    
1960    static pcre_uint8 *
1961    read_capture_name32(pcre_uint8 *p, pcre_uint32 **pp, pcre *re)
1962    {
1963    pcre_uint32 *npp = *pp;
1964    while (isalnum(*p)) *npp++ = *p++;
1965    *npp++ = 0;
1966    *npp = 0;
1967    if (pcre32_get_stringnumber((pcre32 *)re, (PCRE_SPTR32)(*pp)) < 0)
1968      {
1969      fprintf(outfile, "no parentheses with name \"");
1970      PCHARSV(*pp, 0, -1, outfile);
1971      fprintf(outfile, "\"\n");
1972      }
1973    *pp = npp;
1974    return p;
1975    }
1976    #endif  /* SUPPORT_PCRE32 */
1977    
1978    
1979    
# Line 329  if (callout_extra) Line 2002  if (callout_extra)
2002      else      else
2003        {        {
2004        fprintf(f, "%2d: ", i/2);        fprintf(f, "%2d: ", i/2);
2005        (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],        PCHARSV(cb->subject, cb->offset_vector[i],
2006          cb->offset_vector[i+1] - cb->offset_vector[i], f);          cb->offset_vector[i+1] - cb->offset_vector[i], f);
2007        fprintf(f, "\n");        fprintf(f, "\n");
2008        }        }
# Line 342  printed lengths of the substrings. */ Line 2015  printed lengths of the substrings. */
2015    
2016  if (f != NULL) fprintf(f, "--->");  if (f != NULL) fprintf(f, "--->");
2017    
2018  pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);  PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
2019  post_start = pchars((unsigned char *)(cb->subject + cb->start_match),  PCHARS(post_start, cb->subject, cb->start_match,
2020    cb->current_position - cb->start_match, f);    cb->current_position - cb->start_match, f);
2021    
2022  subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);  PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
2023    
2024  (void)pchars((unsigned char *)(cb->subject + cb->current_position),  PCHARSV(cb->subject, cb->current_position,
2025    cb->subject_length - cb->current_position, f);    cb->subject_length - cb->current_position, f);
2026    
2027  if (f != NULL) fprintf(f, "\n");  if (f != NULL) fprintf(f, "\n");
# Line 385  fprintf(outfile, "%.*s", (cb->next_item_ Line 2058  fprintf(outfile, "%.*s", (cb->next_item_
2058  fprintf(outfile, "\n");  fprintf(outfile, "\n");
2059  first_callout = 0;  first_callout = 0;
2060    
2061    if (cb->mark != last_callout_mark)
2062      {
2063      if (cb->mark == NULL)
2064        fprintf(outfile, "Latest Mark: <unset>\n");
2065      else
2066        {
2067        fprintf(outfile, "Latest Mark: ");
2068        PCHARSV(cb->mark, 0, -1, outfile);
2069        putc('\n', outfile);
2070        }
2071      last_callout_mark = cb->mark;
2072      }
2073    
2074  if (cb->callout_data != NULL)  if (cb->callout_data != NULL)
2075    {    {
2076    int callout_data = *((int *)(cb->callout_data));    int callout_data = *((int *)(cb->callout_data));
# Line 404  return (cb->callout_number != callout_fa Line 2090  return (cb->callout_number != callout_fa
2090  *            Local malloc functions              *  *            Local malloc functions              *
2091  *************************************************/  *************************************************/
2092    
2093  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and save the size of a
2094  compiled re. */  compiled re, which is the first store request that pcre_compile() makes. The
2095    show_malloc variable is set only during matching. */
2096    
2097  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
2098  {  {
2099  void *block = malloc(size);  void *block = malloc(size);
2100  gotten_store = size;  gotten_store = size;
2101    if (first_gotten_store == 0) first_gotten_store = size;
2102  if (show_malloc)  if (show_malloc)
2103    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
2104  return block;  return block;
# Line 423  if (show_malloc) Line 2111  if (show_malloc)
2111  free(block);  free(block);
2112  }  }
2113    
   
2114  /* For recursion malloc/free, to test stacking calls */  /* For recursion malloc/free, to test stacking calls */
2115    
2116  static void *stack_malloc(size_t size)  static void *stack_malloc(size_t size)
# Line 446  free(block); Line 2133  free(block);
2133  *          Call pcre_fullinfo()                  *  *          Call pcre_fullinfo()                  *
2134  *************************************************/  *************************************************/
2135    
2136  /* Get one piece of information from the pcre_fullinfo() function */  /* Get one piece of information from the pcre_fullinfo() function. When only
2137    one of 8-, 16- or 32-bit is supported, pcre_mode should always have the correct
2138    value, but the code is defensive.
2139    
2140  static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)  Arguments:
2141      re        compiled regex
2142      study     study data
2143      option    PCRE_INFO_xxx option
2144      ptr       where to put the data
2145    
2146    Returns:    0 when OK, < 0 on error
2147    */
2148    
2149    static int
2150    new_info(pcre *re, pcre_extra *study, int option, void *ptr)
2151  {  {
2152  int rc;  int rc;
2153  if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)  
2154    fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);  if (pcre_mode == PCRE32_MODE)
2155    #ifdef SUPPORT_PCRE32
2156      rc = pcre32_fullinfo((pcre32 *)re, (pcre32_extra *)study, option, ptr);
2157    #else
2158      rc = PCRE_ERROR_BADMODE;
2159    #endif
2160    else if (pcre_mode == PCRE16_MODE)
2161    #ifdef SUPPORT_PCRE16
2162      rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
2163    #else
2164      rc = PCRE_ERROR_BADMODE;
2165    #endif
2166    else
2167    #ifdef SUPPORT_PCRE8
2168      rc = pcre_fullinfo(re, study, option, ptr);
2169    #else
2170      rc = PCRE_ERROR_BADMODE;
2171    #endif
2172    
2173    if (rc < 0)
2174      {
2175      fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2176        pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "", option);
2177      if (rc == PCRE_ERROR_BADMODE)
2178        fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
2179          "%d-bit mode\n", 8 * CHAR_SIZE,
2180          8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
2181      }
2182    
2183    return rc;
2184  }  }
2185    
2186    
2187    
2188  /*************************************************  /*************************************************
2189  *         Byte flipping function                 *  *             Swap byte functions                *
2190  *************************************************/  *************************************************/
2191    
2192  static long int  /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
2193  byteflip(long int value, int n)  value, respectively.
2194    
2195    Arguments:
2196      value        any number
2197    
2198    Returns:       the byte swapped value
2199    */
2200    
2201    static pcre_uint32
2202    swap_uint32(pcre_uint32 value)
2203  {  {
 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);  
2204  return ((value & 0x000000ff) << 24) |  return ((value & 0x000000ff) << 24) |
2205         ((value & 0x0000ff00) <<  8) |         ((value & 0x0000ff00) <<  8) |
2206         ((value & 0x00ff0000) >>  8) |         ((value & 0x00ff0000) >>  8) |
2207         ((value & 0xff000000) >> 24);         (value >> 24);
2208  }  }
2209    
2210    static pcre_uint16
2211    swap_uint16(pcre_uint16 value)
2212    {
2213    return (value >> 8) | (value << 8);
2214    }
2215    
2216    
2217    
2218    /*************************************************
2219    *        Flip bytes in a compiled pattern        *
2220    *************************************************/
2221    
2222    /* This function is called if the 'F' option was present on a pattern that is
2223    to be written to a file. We flip the bytes of all the integer fields in the
2224    regex data block and the study block. In 16-bit mode this also flips relevant
2225    bytes in the pattern itself. This is to make it possible to test PCRE's
2226    ability to reload byte-flipped patterns, e.g. those compiled on a different
2227    architecture. */
2228    
2229    #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2230    static void
2231    regexflip8_or_16(pcre *ere, pcre_extra *extra)
2232    {
2233    real_pcre8_or_16 *re = (real_pcre8_or_16 *)ere;
2234    #ifdef SUPPORT_PCRE16
2235    int op;
2236    pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
2237    int length = re->name_count * re->name_entry_size;
2238    #ifdef SUPPORT_UTF
2239    BOOL utf = (re->options & PCRE_UTF16) != 0;
2240    BOOL utf16_char = FALSE;
2241    #endif /* SUPPORT_UTF */
2242    #endif /* SUPPORT_PCRE16 */
2243    
2244    /* Always flip the bytes in the main data block and study blocks. */
2245    
2246    re->magic_number = REVERSED_MAGIC_NUMBER;
2247    re->size = swap_uint32(re->size);
2248    re->options = swap_uint32(re->options);
2249    re->flags = swap_uint16(re->flags);
2250    re->top_bracket = swap_uint16(re->top_bracket);
2251    re->top_backref = swap_uint16(re->top_backref);
2252    re->first_char = swap_uint16(re->first_char);
2253    re->req_char = swap_uint16(re->req_char);
2254    re->name_table_offset = swap_uint16(re->name_table_offset);
2255    re->name_entry_size = swap_uint16(re->name_entry_size);
2256    re->name_count = swap_uint16(re->name_count);
2257    
2258    if (extra != NULL)
2259      {
2260      pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2261      rsd->size = swap_uint32(rsd->size);
2262      rsd->flags = swap_uint32(rsd->flags);
2263      rsd->minlength = swap_uint32(rsd->minlength);
2264      }
2265    
2266    /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
2267    in the name table, if present, and then in the pattern itself. */
2268    
2269    #ifdef SUPPORT_PCRE16
2270    if (pcre_mode != PCRE16_MODE) return;
2271    
2272    while(TRUE)
2273      {
2274      /* Swap previous characters. */
2275      while (length-- > 0)
2276        {
2277        *ptr = swap_uint16(*ptr);
2278        ptr++;
2279        }
2280    #ifdef SUPPORT_UTF
2281      if (utf16_char)
2282        {
2283        if ((ptr[-1] & 0xfc00) == 0xd800)
2284          {
2285          /* We know that there is only one extra character in UTF-16. */
2286          *ptr = swap_uint16(*ptr);
2287          ptr++;
2288          }
2289        }
2290      utf16_char = FALSE;
2291    #endif /* SUPPORT_UTF */
2292    
2293      /* Get next opcode. */
2294    
2295      length = 0;
2296      op = *ptr;
2297      *ptr++ = swap_uint16(op);
2298    
2299      switch (op)
2300        {
2301        case OP_END:
2302        return;
2303    
2304    #ifdef SUPPORT_UTF
2305        case OP_CHAR:
2306        case OP_CHARI:
2307        case OP_NOT:
2308        case OP_NOTI:
2309        case OP_STAR:
2310        case OP_MINSTAR:
2311        case OP_PLUS:
2312        case OP_MINPLUS:
2313        case OP_QUERY:
2314        case OP_MINQUERY:
2315        case OP_UPTO:
2316        case OP_MINUPTO:
2317        case OP_EXACT:
2318        case OP_POSSTAR:
2319        case OP_POSPLUS:
2320        case OP_POSQUERY:
2321        case OP_POSUPTO:
2322        case OP_STARI:
2323        case OP_MINSTARI:
2324        case OP_PLUSI:
2325        case OP_MINPLUSI:
2326        case OP_QUERYI:
2327        case OP_MINQUERYI:
2328        case OP_UPTOI:
2329        case OP_MINUPTOI:
2330        case OP_EXACTI:
2331        case OP_POSSTARI:
2332        case OP_POSPLUSI:
2333        case OP_POSQUERYI:
2334        case OP_POSUPTOI:
2335        case OP_NOTSTAR:
2336        case OP_NOTMINSTAR:
2337        case OP_NOTPLUS:
2338        case OP_NOTMINPLUS:
2339        case OP_NOTQUERY:
2340        case OP_NOTMINQUERY:
2341        case OP_NOTUPTO:
2342        case OP_NOTMINUPTO:
2343        case OP_NOTEXACT:
2344        case OP_NOTPOSSTAR:
2345        case OP_NOTPOSPLUS:
2346        case OP_NOTPOSQUERY:
2347        case OP_NOTPOSUPTO:
2348        case OP_NOTSTARI:
2349        case OP_NOTMINSTARI:
2350        case OP_NOTPLUSI:
2351        case OP_NOTMINPLUSI:
2352        case OP_NOTQUERYI:
2353        case OP_NOTMINQUERYI:
2354        case OP_NOTUPTOI:
2355        case OP_NOTMINUPTOI:
2356        case OP_NOTEXACTI:
2357        case OP_NOTPOSSTARI:
2358        case OP_NOTPOSPLUSI:
2359        case OP_NOTPOSQUERYI:
2360        case OP_NOTPOSUPTOI:
2361        if (utf) utf16_char = TRUE;
2362    #endif
2363        /* Fall through. */
2364    
2365        default:
2366        length = OP_lengths16[op] - 1;
2367        break;
2368    
2369        case OP_CLASS:
2370        case OP_NCLASS:
2371        /* Skip the character bit map. */
2372        ptr += 32/sizeof(pcre_uint16);
2373        length = 0;
2374        break;
2375    
2376        case OP_XCLASS:
2377        /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
2378        if (LINK_SIZE > 1)
2379          length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
2380            - (1 + LINK_SIZE + 1));
2381        else
2382          length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2383    
2384        /* Reverse the size of the XCLASS instance. */
2385        *ptr = swap_uint16(*ptr);
2386        ptr++;
2387        if (LINK_SIZE > 1)
2388          {
2389          *ptr = swap_uint16(*ptr);
2390          ptr++;
2391          }
2392    
2393        op = *ptr;
2394        *ptr = swap_uint16(op);
2395        ptr++;
2396        if ((op & XCL_MAP) != 0)
2397          {
2398          /* Skip the character bit map. */
2399          ptr += 32/sizeof(pcre_uint16);
2400          length -= 32/sizeof(pcre_uint16);
2401          }
2402        break;
2403        }
2404      }
2405    /* Control should never reach here in 16 bit mode. */
2406    #endif /* SUPPORT_PCRE16 */
2407    }
2408    #endif /* SUPPORT_PCRE[8|16] */
2409    
2410    
2411    
2412    #if defined SUPPORT_PCRE32
2413    static void
2414    regexflip_32(pcre *ere, pcre_extra *extra)
2415    {
2416    real_pcre32 *re = (real_pcre32 *)ere;
2417    int op;
2418    pcre_uint32 *ptr = (pcre_uint32 *)re + re->name_table_offset;
2419    int length = re->name_count * re->name_entry_size;
2420    #ifdef SUPPORT_UTF
2421    BOOL utf = (re->options & PCRE_UTF32) != 0;
2422    #endif /* SUPPORT_UTF */
2423    
2424    /* Always flip the bytes in the main data block and study blocks. */
2425    
2426    re->magic_number = REVERSED_MAGIC_NUMBER;
2427    re->size = swap_uint32(re->size);
2428    re->options = swap_uint32(re->options);
2429    re->flags = swap_uint16(re->flags);
2430    re->top_bracket = swap_uint16(re->top_bracket);
2431    re->top_backref = swap_uint16(re->top_backref);
2432    re->first_char = swap_uint32(re->first_char);
2433    re->req_char = swap_uint32(re->req_char);
2434    re->name_table_offset = swap_uint16(re->name_table_offset);
2435    re->name_entry_size = swap_uint16(re->name_entry_size);
2436    re->name_count = swap_uint16(re->name_count);
2437    
2438    if (extra != NULL)
2439      {
2440      pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2441      rsd->size = swap_uint32(rsd->size);
2442      rsd->flags = swap_uint32(rsd->flags);
2443      rsd->minlength = swap_uint32(rsd->minlength);
2444      }
2445    
2446    /* In 32-bit mode we must swap bytes
2447    in the name table, if present, and then in the pattern itself. */
2448    
2449    while(TRUE)
2450      {
2451      /* Swap previous characters. */
2452      while (length-- > 0)
2453        {
2454        *ptr = swap_uint32(*ptr);
2455        ptr++;
2456        }
2457    
2458      /* Get next opcode. */
2459    
2460      length = 0;
2461      op = *ptr;
2462      *ptr++ = swap_uint32(op);
2463    
2464      switch (op)
2465        {
2466        case OP_END:
2467        return;
2468    
2469        default:
2470        length = OP_lengths32[op] - 1;
2471        break;
2472    
2473        case OP_CLASS:
2474        case OP_NCLASS:
2475        /* Skip the character bit map. */
2476        ptr += 32/sizeof(pcre_uint32);
2477        length = 0;
2478        break;
2479    
2480        case OP_XCLASS:
2481        /* LINK_SIZE can only be 1 in 32-bit mode. */
2482        length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2483    
2484        /* Reverse the size of the XCLASS instance. */
2485        *ptr = swap_uint32(*ptr);
2486        ptr++;
2487    
2488        op = *ptr;
2489        *ptr = swap_uint32(op);
2490        ptr++;
2491        if ((op & XCL_MAP) != 0)
2492          {
2493          /* Skip the character bit map. */
2494          ptr += 32/sizeof(pcre_uint32);
2495          length -= 32/sizeof(pcre_uint32);
2496          }
2497        break;
2498        }
2499      }
2500    /* Control should never reach here in 32 bit mode. */
2501    }
2502    
2503    #endif /* SUPPORT_PCRE32 */
2504    
2505    
2506    
2507    static void
2508    regexflip(pcre *ere, pcre_extra *extra)
2509    {
2510    #if defined SUPPORT_PCRE32
2511      if (REAL_PCRE_FLAGS(ere) & PCRE_MODE32)
2512        regexflip_32(ere, extra);
2513    #endif
2514    #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2515      if (REAL_PCRE_FLAGS(ere) & (PCRE_MODE8 | PCRE_MODE16))
2516        regexflip8_or_16(ere, extra);
2517    #endif
2518    }
2519    
2520    
2521    
# Line 479  return ((value & 0x000000ff) << 24) | Line 2524  return ((value & 0x000000ff) << 24) |
2524  *************************************************/  *************************************************/
2525    
2526  static int  static int
2527  check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,  check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
2528    int start_offset, int options, int *use_offsets, int use_size_offsets,    int start_offset, int options, int *use_offsets, int use_size_offsets,
2529    int flag, unsigned long int *limit, int errnumber, const char *msg)    int flag, unsigned long int *limit, int errnumber, const char *msg)
2530  {  {
# Line 494  for (;;) Line 2539  for (;;)
2539    {    {
2540    *limit = mid;    *limit = mid;
2541    
2542    count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,    PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2543      use_offsets, use_size_offsets);      use_offsets, use_size_offsets);
2544    
2545    if (count == errnumber)    if (count == errnumber)
# Line 526  return count; Line 2571  return count;
2571    
2572    
2573  /*************************************************  /*************************************************
2574    *         Case-independent strncmp() function    *
2575    *************************************************/
2576    
2577    /*
2578    Arguments:
2579      s         first string
2580      t         second string
2581      n         number of characters to compare
2582    
2583    Returns:    < 0, = 0, or > 0, according to the comparison
2584    */
2585    
2586    static int
2587    strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2588    {
2589    while (n--)
2590      {
2591      int c = tolower(*s++) - tolower(*t++);
2592      if (c) return c;
2593      }
2594    return 0;
2595    }
2596    
2597    
2598    
2599    /*************************************************
2600    *         Check newline indicator                *
2601    *************************************************/
2602    
2603    /* This is used both at compile and run-time to check for <xxx> escapes. Print
2604    a message and return 0 if there is no match.
2605    
2606    Arguments:
2607      p           points after the leading '<'
2608      f           file for error message
2609    
2610    Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
2611    */
2612    
2613    static int
2614    check_newline(pcre_uint8 *p, FILE *f)
2615    {
2616    if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2617    if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2618    if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2619    if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2620    if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2621    if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2622    if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2623    fprintf(f, "Unknown newline type at: <%s\n", p);
2624    return 0;
2625    }
2626    
2627    
2628    
2629    /*************************************************
2630    *             Usage function                     *
2631    *************************************************/
2632    
2633    static void
2634    usage(void)
2635    {
2636    printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
2637    printf("Input and output default to stdin and stdout.\n");
2638    #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
2639    printf("If input is a terminal, readline() is used to read from it.\n");
2640    #else
2641    printf("This version of pcretest is not linked with readline().\n");
2642    #endif
2643    printf("\nOptions:\n");
2644    #ifdef SUPPORT_PCRE16
2645    printf("  -16      use the 16-bit library\n");
2646    #endif
2647    #ifdef SUPPORT_PCRE32
2648    printf("  -32      use the 32-bit library\n");
2649    #endif
2650    printf("  -b       show compiled code\n");
2651    printf("  -C       show PCRE compile-time options and exit\n");
2652    printf("  -C arg   show a specific compile-time option\n");
2653    printf("           and exit with its value. The arg can be:\n");
2654    printf("     linksize     internal link size [2, 3, 4]\n");
2655    printf("     pcre8        8 bit library support enabled [0, 1]\n");
2656    printf("     pcre16       16 bit library support enabled [0, 1]\n");
2657    printf("     pcre32       32 bit library support enabled [0, 1]\n");
2658    printf("     utf          Unicode Transformation Format supported [0, 1]\n");
2659    printf("     ucp          Unicode Properties supported [0, 1]\n");
2660    printf("     jit          Just-in-time compiler supported [0, 1]\n");
2661    printf("     newline      Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2662    printf("  -d       debug: show compiled code and information (-b and -i)\n");
2663    #if !defined NODFA
2664    printf("  -dfa     force DFA matching for all subjects\n");
2665    #endif
2666    printf("  -help    show usage information\n");
2667    printf("  -i       show information about compiled patterns\n"
2668           "  -M       find MATCH_LIMIT minimum for each subject\n"
2669           "  -m       output memory used information\n"
2670           "  -o <n>   set size of offsets vector to <n>\n");
2671    #if !defined NOPOSIX
2672    printf("  -p       use POSIX interface\n");
2673    #endif
2674    printf("  -q       quiet: do not output PCRE version number at start\n");
2675    printf("  -S <n>   set stack size to <n> megabytes\n");
2676    printf("  -s       force each pattern to be studied at basic level\n"
2677           "  -s+      force each pattern to be studied, using JIT if available\n"
2678           "  -s++     ditto, verifying when JIT was actually used\n"
2679           "  -s+n     force each pattern to be studied, using JIT if available,\n"
2680           "             where 1 <= n <= 7 selects JIT options\n"
2681           "  -s++n    ditto, verifying when JIT was actually used\n"
2682           "  -t       time compilation and execution\n");
2683    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
2684    printf("  -tm      time execution (matching) only\n");
2685    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
2686    }
2687    
2688    
2689    
2690    /*************************************************
2691  *                Main Program                    *  *                Main Program                    *
2692  *************************************************/  *************************************************/
2693    
# Line 536  options, followed by a set of test data, Line 2698  options, followed by a set of test data,
2698  int main(int argc, char **argv)  int main(int argc, char **argv)
2699  {  {
2700  FILE *infile = stdin;  FILE *infile = stdin;
2701    const char *version;
2702  int options = 0;  int options = 0;
2703  int study_options = 0;  int study_options = 0;
2704    int default_find_match_limit = FALSE;
2705  int op = 1;  int op = 1;
2706  int timeit = 0;  int timeit = 0;
2707    int timeitm = 0;
2708  int showinfo = 0;  int showinfo = 0;
2709  int showstore = 0;  int showstore = 0;
2710    int force_study = -1;
2711    int force_study_options = 0;
2712  int quiet = 0;  int quiet = 0;
2713  int size_offsets = 45;  int size_offsets = 45;
2714  int size_offsets_max;  int size_offsets_max;
2715  int *offsets = NULL;  int *offsets = NULL;
2716    int debug = 0;
2717    int done = 0;
2718    int all_use_dfa = 0;
2719    int verify_jit = 0;
2720    int yield = 0;
2721    int stack_size;
2722    
2723  #if !defined NOPOSIX  #if !defined NOPOSIX
2724  int posix = 0;  int posix = 0;
2725  #endif  #endif
2726  int debug = 0;  #if !defined NODFA
2727  int done = 0;  int *dfa_workspace = NULL;
2728  int all_use_dfa = 0;  #endif
2729  int yield = 0;  
2730    pcre_jit_stack *jit_stack = NULL;
2731    
2732    /* These vectors store, end-to-end, a list of zero-terminated captured
2733    substring names, each list itself being terminated by an empty name. Assume
2734    that 1024 is plenty long enough for the few names we'll be testing. It is
2735    easiest to keep separate 8-, 16- and 32-bit versions, using the 32-bit version
2736    for the actual memory, to ensure alignment. */
2737    
2738    pcre_uint32 copynames[1024];
2739    pcre_uint32 getnames[1024];
2740    
2741    #ifdef SUPPORT_PCRE32
2742    pcre_uint32 *cn32ptr;
2743    pcre_uint32 *gn32ptr;
2744    #endif
2745    
2746    #ifdef SUPPORT_PCRE16
2747    pcre_uint16 *copynames16 = (pcre_uint16 *)copynames;
2748    pcre_uint16 *getnames16 = (pcre_uint16 *)getnames;
2749    pcre_uint16 *cn16ptr;
2750    pcre_uint16 *gn16ptr;
2751    #endif
2752    
2753    #ifdef SUPPORT_PCRE8
2754    pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2755    pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2756    pcre_uint8 *cn8ptr;
2757    pcre_uint8 *gn8ptr;
2758    #endif
2759    
2760    /* Get buffers from malloc() so that valgrind will check their misuse when
2761    debugging. They grow automatically when very long lines are read. The 16-
2762    and 32-bit buffers (buffer16, buffer32) are obtained only if needed. */
2763    
2764    buffer = (pcre_uint8 *)malloc(buffer_size);
2765    dbuffer = (pcre_uint8 *)malloc(buffer_size);
2766    pbuffer = (pcre_uint8 *)malloc(buffer_size);
2767    
2768  unsigned char *buffer;  /* The outfile variable is static so that new_malloc can use it. */
 unsigned char *dbuffer;  
2769    
2770  /* Get buffers from malloc() so that Electric Fence will check their misuse  outfile = stdout;
 when I am debugging. */  
2771    
2772  buffer = (unsigned char *)malloc(BUFFER_SIZE);  /* The following  _setmode() stuff is some Windows magic that tells its runtime
2773  dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);  library to translate CRLF into a single LF character. At least, that's what
2774  pbuffer = (unsigned char *)malloc(PBUFFER_SIZE);  I've been told: never having used Windows I take this all on trust. Originally
2775    it set 0x8000, but then I was advised that _O_BINARY was better. */
 /* The outfile variable is static so that new_malloc can use it. The _setmode()  
 stuff is some magic that I don't understand, but which apparently does good  
 things in Windows. It's related to line terminations.  */  
2776    
2777  #if defined(_WIN32) || defined(WIN32)  #if defined(_WIN32) || defined(WIN32)
2778  _setmode( _fileno( stdout ), 0x8000 );  _setmode( _fileno( stdout ), _O_BINARY );
2779  #endif  /* defined(_WIN32) || defined(WIN32) */  #endif
2780    
2781  outfile = stdout;  /* Get the version number: both pcre_version() and pcre16_version() give the
2782    same answer. We just need to ensure that we call one that is available. */
2783    
2784    #if defined SUPPORT_PCRE8
2785    version = pcre_version();
2786    #elif defined SUPPORT_PCRE16
2787    version = pcre16_version();
2788    #elif defined SUPPORT_PCRE32
2789    version = pcre32_version();
2790    #endif
2791    
2792  /* Scan options */  /* Scan options */
2793    
2794  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
2795    {    {
2796    unsigned char *endptr;    pcre_uint8 *endptr;
2797      char *arg = argv[op];
2798    
2799      if (strcmp(arg, "-m") == 0) showstore = 1;
2800      else if (strcmp(arg, "-s") == 0) force_study = 0;
2801    
2802    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    else if (strncmp(arg, "-s+", 3) == 0)
2803      showstore = 1;      {
2804    else if (strcmp(argv[op], "-t") == 0) timeit = 1;      arg += 3;
2805    else if (strcmp(argv[op], "-q") == 0) quiet = 1;      if (*arg == '+') { arg++; verify_jit = TRUE; }
2806    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;      force_study = 1;
2807    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;      if (*arg == 0)
2808          force_study_options = jit_study_bits[6];
2809        else if (*arg >= '1' && *arg <= '7')
2810          force_study_options = jit_study_bits[*arg - '1'];
2811        else goto BAD_ARG;
2812        }
2813      else if (strcmp(arg, "-16") == 0)
2814        {
2815    #ifdef SUPPORT_PCRE16
2816        pcre_mode = PCRE16_MODE;
2817    #else
2818        printf("** This version of PCRE was built without 16-bit support\n");
2819        exit(1);
2820    #endif
2821        }
2822      else if (strcmp(arg, "-32") == 0)
2823        {
2824    #ifdef SUPPORT_PCRE32
2825        pcre_mode = PCRE32_MODE;
2826    #else
2827        printf("** This version of PCRE was built without 32-bit support\n");
2828        exit(1);
2829    #endif
2830        }
2831      else if (strcmp(arg, "-q") == 0) quiet = 1;
2832      else if (strcmp(arg, "-b") == 0) debug = 1;
2833      else if (strcmp(arg, "-i") == 0) showinfo = 1;
2834      else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
2835      else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
2836  #if !defined NODFA  #if !defined NODFA
2837    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;    else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
2838  #endif  #endif
2839    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(arg, "-o") == 0 && argc > 2 &&
2840        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),        ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2841            *endptr == 0))
2842        {
2843        op++;
2844        argc--;
2845        }
2846      else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0)
2847        {
2848        int both = arg[2] == 0;
2849        int temp;
2850        if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2851                         *endptr == 0))
2852          {
2853          timeitm = temp;
2854          op++;
2855          argc--;
2856          }
2857        else timeitm = LOOPREPEAT;
2858        if (both) timeit = timeitm;
2859        }
2860      else if (strcmp(arg, "-S") == 0 && argc > 2 &&
2861          ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2862          *endptr == 0))          *endptr == 0))
2863      {      {
2864    #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS)
2865        printf("PCRE: -S not supported on this OS\n");
2866        exit(1);
2867    #else
2868        int rc;
2869        struct rlimit rlim;
2870        getrlimit(RLIMIT_STACK, &rlim);
2871        rlim.rlim_cur = stack_size * 1024 * 1024;
2872        rc = setrlimit(RLIMIT_STACK, &rlim);
2873        if (rc != 0)
2874          {
2875        printf("PCRE: setrlimit() failed with error %d\n", rc);
2876        exit(1);
2877          }
2878      op++;      op++;
2879      argc--;      argc--;
2880    #endif
2881      }      }
2882  #if !defined NOPOSIX  #if !defined NOPOSIX
2883    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(arg, "-p") == 0) posix = 1;
2884  #endif  #endif
2885    else if (strcmp(argv[op], "-C") == 0)    else if (strcmp(arg, "-C") == 0)
2886      {      {
2887      int rc;      int rc;
2888      printf("PCRE version %s\n", pcre_version());      unsigned long int lrc;
2889    
2890        if (argc > 2)
2891          {
2892          if (strcmp(argv[op + 1], "linksize") == 0)
2893            {
2894            (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2895            printf("%d\n", rc);
2896            yield = rc;
2897            }
2898          else if (strcmp(argv[op + 1], "pcre8") == 0)
2899            {
2900    #ifdef SUPPORT_PCRE8
2901            printf("1\n");
2902            yield = 1;
2903    #else
2904            printf("0\n");
2905            yield = 0;
2906    #endif
2907            }
2908          else if (strcmp(argv[op + 1], "pcre16") == 0)
2909            {
2910    #ifdef SUPPORT_PCRE16
2911            printf("1\n");
2912            yield = 1;
2913    #else
2914            printf("0\n");
2915            yield = 0;
2916    #endif
2917            }
2918          else if (strcmp(argv[op + 1], "pcre32") == 0)
2919            {
2920    #ifdef SUPPORT_PCRE32
2921            printf("1\n");
2922            yield = 1;
2923    #else
2924            printf("0\n");
2925            yield = 0;
2926    #endif
2927            goto EXIT;
2928            }
2929          if (strcmp(argv[op + 1], "utf") == 0)
2930            {
2931    #ifdef SUPPORT_PCRE8
2932            if (pcre_mode == PCRE8_MODE)
2933              (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2934    #endif
2935    #ifdef SUPPORT_PCRE16
2936            if (pcre_mode == PCRE16_MODE)
2937              (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2938    #endif
2939    #ifdef SUPPORT_PCRE32
2940            if (pcre_mode == PCRE32_MODE)
2941              (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
2942    #endif
2943            printf("%d\n", rc);
2944            yield = rc;
2945            goto EXIT;
2946            }
2947          else if (strcmp(argv[op + 1], "ucp") == 0)
2948            {
2949            (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2950            printf("%d\n", rc);
2951            yield = rc;
2952            }
2953          else if (strcmp(argv[op + 1], "jit") == 0)
2954            {
2955            (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2956            printf("%d\n", rc);
2957            yield = rc;
2958            }
2959          else if (strcmp(argv[op + 1], "newline") == 0)
2960            {
2961            (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2962            print_newline_config(rc, TRUE);
2963            }
2964          else if (strcmp(argv[op + 1], "ebcdic") == 0)
2965            {
2966    #ifdef EBCDIC
2967            printf("1\n");
2968            yield = 1;
2969    #else
2970            printf("0\n");
2971    #endif
2972            }
2973          else if (strcmp(argv[op + 1], "ebcdic-nl") == 0)
2974            {
2975    #ifdef EBCDIC
2976            printf("0x%02x\n", CHAR_LF);
2977    #else
2978            printf("0\n");
2979    #endif
2980            }
2981          else
2982            {
2983            printf("Unknown -C option: %s\n", argv[op + 1]);
2984            }
2985          goto EXIT;
2986          }
2987    
2988        /* No argument for -C: output all configuration information. */
2989    
2990        printf("PCRE version %s\n", version);
2991      printf("Compiled with\n");      printf("Compiled with\n");
2992    
2993    #ifdef EBCDIC
2994        printf("  EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
2995    #endif
2996    
2997    /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2998    are set, either both UTFs are supported or both are not supported. */
2999    
3000    #ifdef SUPPORT_PCRE8
3001        printf("  8-bit support\n");
3002      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3003      printf("  %sUTF-8 support\n", rc? "" : "No ");        printf ("  %sUTF-8 support\n", rc ? "" : "No ");
3004      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);  #endif
3005    #ifdef SUPPORT_PCRE16
3006        printf("  16-bit support\n");
3007        (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3008        printf ("  %sUTF-16 support\n", rc ? "" : "No ");
3009    #endif
3010    #ifdef SUPPORT_PCRE32
3011        printf("  32-bit support\n");
3012        (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3013        printf ("  %sUTF-32 support\n", rc ? "" : "No ");
3014    #endif
3015    
3016        (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3017      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
3018      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3019      printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");      if (rc)
3020      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);        {
3021          const char *arch;
3022          (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
3023          printf("  Just-in-time compiler support: %s\n", arch);
3024          }
3025        else
3026          printf("  No just-in-time compiler support\n");
3027        (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3028        print_newline_config(rc, FALSE);
3029        (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
3030        printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
3031                                         "all Unicode newlines");
3032        (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3033      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
3034      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
3035      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
3036      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
3037      printf("  Default match limit = %d\n", rc);      printf("  Default match limit = %ld\n", lrc);
3038      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
3039      printf("  Default recursion depth limit = %d\n", rc);      printf("  Default recursion depth limit = %ld\n", lrc);
3040      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
3041      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s", rc? "stack" : "heap");
3042      exit(0);      if (showstore)
3043          {
3044          PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
3045          printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
3046          }
3047        printf("\n");
3048        goto EXIT;
3049        }
3050      else if (strcmp(arg, "-help") == 0 ||
3051               strcmp(arg, "--help") == 0)
3052        {
3053        usage();
3054        goto EXIT;
3055      }      }
3056    else    else
3057      {      {
3058      printf("** Unknown or malformed option %s\n", argv[op]);      BAD_ARG:
3059      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");      printf("** Unknown or malformed option %s\n", arg);
3060      printf("  -C     show PCRE compile-time options and exit\n");      usage();
     printf("  -d     debug: show compiled code; implies -i\n");  
 #if !defined NODFA  
     printf("  -dfa   force DFA matching for all subjects\n");  
 #endif  
     printf("  -i     show information about compiled pattern\n"  
            "  -m     output memory used information\n"  
            "  -o <n> set size of offsets vector to <n>\n");  
 #if !defined NOPOSIX  
     printf("  -p     use POSIX interface\n");  
 #endif  
     printf("  -s     output store (memory) used information\n"  
            "  -t     time compilation and execution\n");  
3061      yield = 1;      yield = 1;
3062      goto EXIT;      goto EXIT;
3063      }      }
# Line 653  offsets = (int *)malloc(size_offsets_max Line 3072  offsets = (int *)malloc(size_offsets_max
3072  if (offsets == NULL)  if (offsets == NULL)
3073    {    {
3074    printf("** Failed to get %d bytes of memory for offsets vector\n",    printf("** Failed to get %d bytes of memory for offsets vector\n",
3075      size_offsets_max * sizeof(int));      (int)(size_offsets_max * sizeof(int)));
3076    yield = 1;    yield = 1;
3077    goto EXIT;    goto EXIT;
3078    }    }
# Line 662  if (offsets == NULL) Line 3081  if (offsets == NULL)
3081    
3082  if (argc > 1)  if (argc > 1)
3083    {    {
3084    infile = fopen(argv[op], "rb");    infile = fopen(argv[op], INPUT_MODE);
3085    if (infile == NULL)    if (infile == NULL)
3086      {      {
3087      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
# Line 673  if (argc > 1) Line 3092  if (argc > 1)
3092    
3093  if (argc > 2)  if (argc > 2)
3094    {    {
3095    outfile = fopen(argv[op+1], "wb");    outfile = fopen(argv[op+1], OUTPUT_MODE);
3096    if (outfile == NULL)    if (outfile == NULL)
3097      {      {
3098      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
# Line 684  if (argc > 2) Line 3103  if (argc > 2)
3103    
3104  /* Set alternative malloc function */  /* Set alternative malloc function */
3105    
3106    #ifdef SUPPORT_PCRE8
3107  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
3108  pcre_free = new_free;  pcre_free = new_free;
3109  pcre_stack_malloc = stack_malloc;  pcre_stack_malloc = stack_malloc;
3110  pcre_stack_free = stack_free;  pcre_stack_free = stack_free;
3111    #endif
3112    
3113    #ifdef SUPPORT_PCRE16
3114    pcre16_malloc = new_malloc;
3115    pcre16_free = new_free;
3116    pcre16_stack_malloc = stack_malloc;
3117    pcre16_stack_free = stack_free;
3118    #endif
3119    
3120    #ifdef SUPPORT_PCRE32
3121    pcre32_malloc = new_malloc;
3122    pcre32_free = new_free;
3123    pcre32_stack_malloc = stack_malloc;
3124    pcre32_stack_free = stack_free;
3125    #endif
3126    
3127  /* Heading line unless quiet, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
3128    
3129  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
3130    
3131  /* Main loop */  /* Main loop */
3132    
# Line 706  while (!done) Line 3141  while (!done)
3141  #endif  #endif
3142    
3143    const char *error;    const char *error;
3144    unsigned char *p, *pp, *ppp;    pcre_uint8 *markptr;
3145    unsigned char *to_file = NULL;    pcre_uint8 *p, *pp, *ppp;
3146    const unsigned char *tables = NULL;    pcre_uint8 *to_file = NULL;
3147      const pcre_uint8 *tables = NULL;
3148      unsigned long int get_options;
3149    unsigned long int true_size, true_study_size = 0;    unsigned long int true_size, true_study_size = 0;
3150    size_t size, regex_gotten_store;    size_t size, regex_gotten_store;
3151      int do_allcaps = 0;
3152      int do_mark = 0;
3153    int do_study = 0;    int do_study = 0;
3154      int no_force_study = 0;
3155    int do_debug = debug;    int do_debug = debug;
3156    int do_G = 0;    int do_G = 0;
3157    int do_g = 0;    int do_g = 0;
3158    int do_showinfo = showinfo;    int do_showinfo = showinfo;
3159    int do_showrest = 0;    int do_showrest = 0;
3160      int do_showcaprest = 0;
3161    int do_flip = 0;    int do_flip = 0;
3162    int erroroffset, len, delimiter;    int erroroffset, len, delimiter, poffset;
3163    
3164    #if !defined NODFA
3165      int dfa_matched = 0;
3166    #endif
3167    
3168    use_utf8 = 0;    use_utf = 0;
3169      debug_lengths = 1;
3170    
3171    if (infile == stdin) printf("  re> ");    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
   if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;  
3172    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3173    fflush(outfile);    fflush(outfile);
3174    
# Line 735  while (!done) Line 3180  while (!done)
3180    
3181    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
3182      {      {
3183      unsigned long int magic;      pcre_uint32 magic;
3184      uschar sbuf[8];      pcre_uint8 sbuf[8];
3185      FILE *f;      FILE *f;
3186    
3187      p++;      p++;
3188        if (*p == '!')
3189          {
3190          do_debug = TRUE;
3191          do_showinfo = TRUE;
3192          p++;
3193          }
3194    
3195      pp = p + (int)strlen((char *)p);      pp = p + (int)strlen((char *)p);
3196      while (isspace(pp[-1])) pp--;      while (isspace(pp[-1])) pp--;
3197      *pp = 0;      *pp = 0;
# Line 751  while (!done) Line 3203  while (!done)
3203        continue;        continue;
3204        }        }
3205    
3206        first_gotten_store = 0;
3207      if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;      if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
3208    
3209      true_size =      true_size =
# Line 758  while (!done) Line 3211  while (!done)
3211      true_study_size =      true_study_size =
3212        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
3213    
3214      re = (real_pcre *)new_malloc(true_size);      re = (pcre *)new_malloc(true_size);
3215      regex_gotten_store = gotten_store;      if (re == NULL)
3216          {
3217          printf("** Failed to get %d bytes of memory for pcre object\n",
3218            (int)true_size);
3219          yield = 1;
3220          goto EXIT;
3221          }
3222        regex_gotten_store = first_gotten_store;
3223    
3224      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
3225    
3226      magic = ((real_pcre *)re)->magic_number;      magic = REAL_PCRE_MAGIC(re);
3227      if (magic != MAGIC_NUMBER)      if (magic != MAGIC_NUMBER)
3228        {        {
3229        if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)        if (swap_uint32(magic) == MAGIC_NUMBER)
3230          {          {
3231          do_flip = 1;          do_flip = 1;
3232          }          }
3233        else        else
3234          {          {
3235          fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);          fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
3236            new_free(re);
3237          fclose(f);          fclose(f);
3238          continue;          continue;
3239          }          }
3240        }        }
3241    
3242      fprintf(outfile, "Compiled regex%s loaded from %s\n",      /* We hide the byte-invert info for little and big endian tests. */
3243        do_flip? " (byte-inverted)" : "", p);      fprintf(outfile, "Compiled pattern%s loaded from %s\n",
3244          do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
     /* Need to know if UTF-8 for printing data strings */  
   
     new_info(re, NULL, PCRE_INFO_OPTIONS, &options);  
     use_utf8 = (options & PCRE_UTF8) != 0;  
3245    
3246      /* Now see if there is any following study data */      /* Now see if there is any following study data. */
3247    
3248      if (true_study_size != 0)      if (true_study_size != 0)
3249        {        {
# Line 802  while (!done) Line 3259  while (!done)
3259          {          {
3260          FAIL_READ:          FAIL_READ:
3261          fprintf(outfile, "Failed to read data from %s\n", p);          fprintf(outfile, "Failed to read data from %s\n", p);
3262          if (extra != NULL) new_free(extra);          if (extra != NULL)
3263          if (re != NULL) new_free(re);            {
3264              PCRE_FREE_STUDY(extra);
3265              }
3266            new_free(re);
3267          fclose(f);          fclose(f);
3268          continue;          continue;
3269          }          }
# Line 812  while (!done) Line 3272  while (!done)
3272        }        }
3273      else fprintf(outfile, "No study data\n");      else fprintf(outfile, "No study data\n");
3274    
3275        /* Flip the necessary bytes. */
3276        if (do_flip)
3277          {
3278          int rc;
3279          PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
3280          if (rc == PCRE_ERROR_BADMODE)
3281            {
3282            /* Simulate the result of the function call below. */
3283            fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
3284              pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "",
3285              PCRE_INFO_OPTIONS);
3286            fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
3287              "%d-bit mode\n", 8 * CHAR_SIZE,
3288              8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
3289            new_free(re);
3290            fclose(f);
3291            continue;
3292            }
3293          }
3294    
3295        /* Need to know if UTF-8 for printing data strings. */
3296    
3297        if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3298          {
3299          new_free(re);
3300          fclose(f);
3301          continue;
3302          }
3303        use_utf = (get_options & PCRE_UTF8) != 0;
3304    
3305      fclose(f);      fclose(f);
3306      goto SHOW_INFO;      goto SHOW_INFO;
3307      }      }
3308    
3309    /* In-line pattern (the usual case). Get the delimiter and seek the end of    /* In-line pattern (the usual case). Get the delimiter and seek the end of
3310    the pattern; if is isn't complete, read more. */    the pattern; if it isn't complete, read more. */
3311    
3312    delimiter = *p++;    delimiter = *p++;
3313    
3314    if (isalnum(delimiter) || delimiter == '\\')    if (isalnum(delimiter) || delimiter == '\\')
3315      {      {
3316      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");      fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
3317      goto SKIP_DATA;      goto SKIP_DATA;
3318      }      }
3319    
3320    pp = p;    pp = p;
3321      poffset = (int)(p - buffer);
3322    
3323    for(;;)    for(;;)
3324      {      {
# Line 838  while (!done) Line 3329  while (!done)
3329        pp++;        pp++;
3330        }        }
3331      if (*pp != 0) break;      if (*pp != 0) break;
3332        if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
     len = BUFFER_SIZE - (pp - buffer);  
     if (len < 256)  
       {  
       fprintf(outfile, "** Expression too long - missing delimiter?\n");  
       goto SKIP_DATA;  
       }  
   
     if (infile == stdin) printf("    > ");  
     if (fgets((char *)pp, len, infile) == NULL)  
3333        {        {
3334        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
3335        done = 1;        done = 1;
# Line 856  while (!done) Line 3338  while (!done)
3338      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
3339      }      }
3340    
3341      /* The buffer may have moved while being extended; reset the start of data
3342      pointer to the correct relative point in the buffer. */
3343    
3344      p = buffer + poffset;
3345    
3346    /* If the first character after the delimiter is backslash, make    /* If the first character after the delimiter is backslash, make
3347    the pattern end with backslash. This is purely to provide a way    the pattern end with backslash. This is purely to provide a way
3348    of testing for the error message when a pattern ends with backslash. */    of testing for the error message when a pattern ends with backslash. */
# Line 871  while (!done) Line 3358  while (!done)
3358    /* Look for options after final delimiter */    /* Look for options after final delimiter */
3359    
3360    options = 0;    options = 0;
3361    study_options = 0;    study_options = force_study_options;
3362    log_store = showstore;  /* default from command line */    log_store = showstore;  /* default from command line */
3363    
3364    while (*pp != 0)    while (*pp != 0)
# Line 885  while (!done) Line 3372  while (!done)
3372        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
3373        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
3374    
3375        case '+': do_showrest = 1; break;        case '+':
3376          if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
3377          break;
3378    
3379          case '=': do_allcaps = 1; break;
3380        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
3381          case 'B': do_debug = 1; break;
3382        case 'C': options |= PCRE_AUTO_CALLOUT; break;        case 'C': options |= PCRE_AUTO_CALLOUT; break;
3383        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
3384        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
3385        case 'F': do_flip = 1; break;        case 'F': do_flip = 1; break;
3386        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
3387        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
3388          case 'J': options |= PCRE_DUPNAMES; break;
3389          case 'K': do_mark = 1; break;
3390        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
3391        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
3392    
# Line 900  while (!done) Line 3394  while (!done)
3394        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
3395  #endif  #endif
3396    
3397        case 'S': do_study = 1; break;        case 'S':
3398          do_study = 1;
3399          for (;;)
3400            {
3401            switch (*pp++)
3402              {
3403              case 'S':
3404              do_study = 0;
3405              no_force_study = 1;
3406              break;
3407    
3408              case '!':
3409              study_options |= PCRE_STUDY_EXTRA_NEEDED;
3410              break;
3411    
3412              case '+':
3413              if (*pp == '+')
3414                {
3415                verify_jit = TRUE;
3416                pp++;
3417                }
3418              if (*pp >= '1' && *pp <= '7')
3419                study_options |= jit_study_bits[*pp++ - '1'];
3420              else
3421                study_options |= jit_study_bits[6];
3422              break;
3423    
3424              case '-':
3425              study_options &= ~PCRE_STUDY_ALLJIT;
3426              break;
3427    
3428              default:
3429              pp--;
3430              goto ENDLOOP;
3431              }
3432            }
3433          ENDLOOP:
3434          break;
3435    
3436        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
3437          case 'W': options |= PCRE_UCP; break;
3438        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
3439        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
3440          case 'Z': debug_lengths = 0; break;
3441          case '8': options |= PCRE_UTF8; use_utf = 1; break;
3442        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
3443    
3444          case 'T':
3445          switch (*pp++)
3446            {
3447            case '0': tables = tables0; break;
3448            case '1': tables = tables1; break;
3449    
3450            case '\r':
3451            case '\n':
3452            case ' ':
3453            case 0:
3454            fprintf(outfile, "** Missing table number after /T\n");
3455            goto SKIP_DATA;
3456    
3457            default:
3458            fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
3459            goto SKIP_DATA;
3460            }
3461          break;
3462    
3463        case 'L':        case 'L':
3464        ppp = pp;        ppp = pp;
3465        /* The '\r' test here is so that it works on Windows */        /* The '\r' test here is so that it works on Windows. */
3466        while (*ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;        /* The '0' test is just in case this is an unterminated line. */
3467          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
3468        *ppp = 0;        *ppp = 0;
3469        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
3470          {          {
3471          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
3472          goto SKIP_DATA;          goto SKIP_DATA;
3473          }          }
3474        tables = pcre_maketables();        locale_set = 1;
3475          tables = PCRE_MAKETABLES;
3476        pp = ppp;        pp = ppp;
3477        break;        break;
3478    
# Line 927  while (!done) Line 3483  while (!done)
3483        *pp = 0;        *pp = 0;
3484        break;        break;
3485    
3486          case '<':
3487            {
3488            if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
3489              {
3490              options |= PCRE_JAVASCRIPT_COMPAT;
3491              pp += 3;
3492              }
3493            else
3494              {
3495              int x = check_newline(pp, outfile);
3496              if (x == 0) goto SKIP_DATA;
3497              options |= x;
3498              while (*pp++ != '>');
3499              }
3500            }
3501          break;
3502    
3503        case '\r':                      /* So that it works in Windows */        case '\r':                      /* So that it works in Windows */
3504        case '\n':        case '\n':
3505        case ' ':        case ' ':
# Line 940  while (!done) Line 3513  while (!done)
3513    
3514    /* Handle compiling via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
3515    timing, showing, or debugging options, nor the ability to pass over    timing, showing, or debugging options, nor the ability to pass over
3516    local character tables. */    local character tables. Neither does it have 16-bit support. */
3517    
3518  #if !defined NOPOSIX  #if !defined NOPOSIX
3519    if (posix || do_posix)    if (posix || do_posix)
# Line 953  while (!done) Line 3526  while (!done)
3526      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
3527      if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;      if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
3528      if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;      if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
3529        if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
3530        if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
3531    
3532        first_gotten_store = 0;
3533      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
3534    
3535      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 961  while (!done) Line 3537  while (!done)
3537    
3538      if (rc != 0)      if (rc != 0)
3539        {        {
3540        (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);        (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3541        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
3542        goto SKIP_DATA;        goto SKIP_DATA;
3543        }        }
# Line 973  while (!done) Line 3549  while (!done)
3549  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
3550    
3551      {      {
3552      if (timeit)      /* In 16- or 32-bit mode, convert the input. */
3553    
3554    #ifdef SUPPORT_PCRE16
3555        if (pcre_mode == PCRE16_MODE)
3556          {
3557          switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
3558            {
3559            case -1:
3560            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3561              "converted to UTF-16\n");
3562            goto SKIP_DATA;
3563    
3564            case -2:
3565            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3566              "cannot be converted to UTF-16\n");
3567            goto SKIP_DATA;
3568    
3569            case -3: /* "Impossible error" when to16 is called arg1 FALSE */
3570            fprintf(outfile, "**Failed: character value greater than 0xffff "
3571              "cannot be converted to 16-bit in non-UTF mode\n");
3572            goto SKIP_DATA;
3573    
3574            default:
3575            break;
3576            }
3577          p = (pcre_uint8 *)buffer16;
3578          }
3579    #endif
3580    
3581    #ifdef SUPPORT_PCRE32
3582        if (pcre_mode == PCRE32_MODE)
3583          {
3584          switch(to32(FALSE, p, options & PCRE_UTF32, (int)strlen((char *)p)))
3585            {
3586            case -1:
3587            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3588              "converted to UTF-32\n");
3589            goto SKIP_DATA;
3590    
3591            case -2:
3592            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3593              "cannot be converted to UTF-32\n");
3594            goto SKIP_DATA;
3595    
3596            case -3:
3597            fprintf(outfile, "**Failed: character value is ill-formed UTF-32\n");
3598            goto SKIP_DATA;
3599    
3600            default:
3601            break;
3602            }
3603          p = (pcre_uint8 *)buffer32;
3604          }
3605    #endif
3606    
3607        /* Compile many times when timing */
3608    
3609        if (timeit > 0)
3610        {        {
3611        register int i;        register int i;
3612        clock_t time_taken;        clock_t time_taken;
3613        clock_t start_time = clock();        clock_t start_time = clock();
3614        for (i = 0; i < LOOPREPEAT; i++)        for (i = 0; i < timeit; i++)
3615          {          {
3616          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3617          if (re != NULL) free(re);          if (re != NULL) free(re);
3618          }          }
3619        time_taken = clock() - start_time;        time_taken = clock() - start_time;
3620        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.4f milliseconds\n",
3621          (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /          (((double)time_taken * 1000.0) / (double)timeit) /
3622            (double)CLOCKS_PER_SEC);            (double)CLOCKS_PER_SEC);
3623        }        }
3624    
3625      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      first_gotten_store = 0;
3626        PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3627    
3628      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
3629      if non-interactive. */      if non-interactive. */
# Line 1002  while (!done) Line 3636  while (!done)
3636          {          {
3637          for (;;)          for (;;)
3638            {            {
3639            if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)            if (extend_inputline(infile, buffer, NULL) == NULL)
3640              {              {
3641              done = 1;              done = 1;
3642              goto CONTINUE;              goto CONTINUE;
# Line 1016  while (!done) Line 3650  while (!done)
3650        goto CONTINUE;        goto CONTINUE;
3651        }        }
3652    
3653      /* Compilation succeeded; print data if required. There are now two      /* Compilation succeeded. It is now possible to set the UTF-8 option from
3654      info-returning functions. The old one has a limited interface and      within the regex; check for this so that we know how to process the data
3655      returns only limited data. Check that it agrees with the newer one. */      lines. */
3656    
3657      if (log_store)      if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3658        fprintf(outfile, "Memory allocation (code space): %d\n",        goto SKIP_DATA;
3659          (int)(gotten_store -      if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
               sizeof(real_pcre) -  
               ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));  
3660    
3661      /* Extract the size for possible writing before possibly flipping it,      /* Extract the size for possible writing before possibly flipping it,
3662      and remember the store that was got. */      and remember the store that was got. */
3663    
3664      true_size = ((real_pcre *)re)->size;      true_size = REAL_PCRE_SIZE(re);
3665      regex_gotten_store = gotten_store;      regex_gotten_store = first_gotten_store;
3666    
3667        /* Output code size information if requested */
3668    
3669        if (log_store)
3670          {
3671          int name_count, name_entry_size, real_pcre_size;
3672    
3673          new_info(re, NULL, PCRE_INFO_NAMECOUNT, &name_count);
3674          new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size);
3675    #ifdef SUPPORT_PCRE8
3676          if (REAL_PCRE_FLAGS(re) & PCRE_MODE8)
3677            real_pcre_size = sizeof(real_pcre);
3678    #endif
3679    #ifdef SUPPORT_PCRE16
3680          if (REAL_PCRE_FLAGS(re) & PCRE_MODE16)
3681            real_pcre_size = sizeof(real_pcre16);
3682    #endif
3683    #ifdef SUPPORT_PCRE32
3684          if (REAL_PCRE_FLAGS(re) & PCRE_MODE32)
3685            real_pcre_size = sizeof(real_pcre32);
3686    #endif
3687          fprintf(outfile, "Memory allocation (code space): %d\n",
3688            (int)(first_gotten_store - real_pcre_size - name_count * name_entry_size));
3689          }
3690    
3691      /* If /S was present, study the regexp to generate additional info to      /* If -s or /S was present, study the regex to generate additional info to
3692      help with the matching. */      help with the matching, unless the pattern has the SS option, which
3693        suppresses the effect of /S (used for a few test patterns where studying is
3694        never sensible). */
3695    
3696      if (do_study)      if (do_study || (force_study >= 0 && !no_force_study))
3697        {        {
3698        if (timeit)        if (timeit > 0)
3699          {          {
3700          register int i;          register int i;
3701          clock_t time_taken;          clock_t time_taken;
3702          clock_t start_time = clock();          clock_t start_time = clock();
3703          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < timeit; i++)
3704            extra = pcre_study(re, study_options, &error);            {
3705              PCRE_STUDY(extra, re, study_options, &error);
3706              }
3707          time_taken = clock() - start_time;          time_taken = clock() - start_time;
3708          if (extra != NULL) free(extra);          if (extra != NULL)
3709          fprintf(outfile, "  Study time %.3f milliseconds\n",            {
3710            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /            PCRE_FREE_STUDY(extra);
3711              }
3712            fprintf(outfile, "  Study time %.4f milliseconds\n",
3713              (((double)time_taken * 1000.0) / (double)timeit) /
3714              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
3715          }          }
3716        extra = pcre_study(re, study_options, &error);        PCRE_STUDY(extra, re, study_options, &error);
3717        if (error != NULL)        if (error != NULL)
3718          fprintf(outfile, "Failed to study: %s\n", error);          fprintf(outfile, "Failed to study: %s\n", error);
3719        else if (extra != NULL)        else if (extra != NULL)
3720            {
3721          true_study_size = ((pcre_study_data *)(extra->study_data))->size;          true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3722            if (log_store)
3723              {
3724              size_t jitsize;
3725              if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3726                  jitsize != 0)
3727                fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3728              }
3729            }
3730        }        }
3731    
3732      /* If the 'F' option was present, we flip the bytes of all the integer      /* If /K was present, we set up for handling MARK data. */
     fields in the regex data block and the study block. This is to make it  
     possible to test PCRE's handling of byte-flipped patterns, e.g. those  
     compiled on a different architecture. */  
3733    
3734      if (do_flip)      if (do_mark)
3735        {        {
3736        real_pcre *rre = (real_pcre *)re;        if (extra == NULL)
       rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));  
       rre->size = byteflip(rre->size, sizeof(rre->size));  
       rre->options = byteflip(rre->options, sizeof(rre->options));  
       rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));  
       rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));  
       rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));  
       rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));  
       rre->name_table_offset = byteflip(rre->name_table_offset,  
         sizeof(rre->name_table_offset));  
       rre->name_entry_size = byteflip(rre->name_entry_size,  
         sizeof(rre->name_entry_size));  
       rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));  
   
       if (extra != NULL)  
3737          {          {
3738          pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);          extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3739          rsd->size = byteflip(rsd->size, sizeof(rsd->size));          extra->flags = 0;
         rsd->options = byteflip(rsd->options, sizeof(rsd->options));  
3740          }          }
3741          extra->mark = &markptr;
3742          extra->flags |= PCRE_EXTRA_MARK;
3743        }        }
3744    
3745      /* Extract information from the compiled data if required */      /* Extract and display information from the compiled data if required. */
3746    
3747      SHOW_INFO:      SHOW_INFO:
3748