/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 9 by nigel, Sat Feb 24 21:38:13 2007 UTC revision 1087 by chpe, Tue Oct 16 15:55:38 2012 UTC
# Line 2  Line 2 
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5    /* This program was hacked up as a tester for PCRE. I really should have
6    written it more tidily in the first place. Will I ever learn? It has grown and
7    been extended and consequently is now rather, er, *very* untidy in places. The
8    addition of 16-bit support has made it even worse. :-(
9    
10    -----------------------------------------------------------------------------
11    Redistribution and use in source and binary forms, with or without
12    modification, are permitted provided that the following conditions are met:
13    
14        * Redistributions of source code must retain the above copyright notice,
15          this list of conditions and the following disclaimer.
16    
17        * Redistributions in binary form must reproduce the above copyright
18          notice, this list of conditions and the following disclaimer in the
19          documentation and/or other materials provided with the distribution.
20    
21        * Neither the name of the University of Cambridge nor the names of its
22          contributors may be used to endorse or promote products derived from
23          this software without specific prior written permission.
24    
25    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35    POSSIBILITY OF SUCH DAMAGE.
36    -----------------------------------------------------------------------------
37    */
38    
39    /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40    libraries in a single program. This is different from the modules such as
41    pcre_compile.c in the library itself, which are compiled separately for each
42    mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43    (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44    compiled only once. Therefore, it must not make use of any of the macros from
45    pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46    however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47    only supported library functions. */
48    
49    #ifdef HAVE_CONFIG_H
50    #include "config.h"
51    #endif
52    
53  #include <ctype.h>  #include <ctype.h>
54  #include <stdio.h>  #include <stdio.h>
55  #include <string.h>  #include <string.h>
56  #include <stdlib.h>  #include <stdlib.h>
57  #include <time.h>  #include <time.h>
58    #include <locale.h>
59    #include <errno.h>
60    
61    /* Both libreadline and libedit are optionally supported. The user-supplied
62    original patch uses readline/readline.h for libedit, but in at least one system
63    it is installed as editline/readline.h, so the configuration code now looks for
64    that first, falling back to readline/readline.h. */
65    
66    #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
67    #ifdef HAVE_UNISTD_H
68    #include <unistd.h>
69    #endif
70    #if defined(SUPPORT_LIBREADLINE)
71    #include <readline/readline.h>
72    #include <readline/history.h>
73    #else
74    #if defined(HAVE_EDITLINE_READLINE_H)
75    #include <editline/readline.h>
76    #else
77    #include <readline/readline.h>
78    #endif
79    #endif
80    #endif
81    
82  /* Use the internal info for displaying the results of pcre_study(). */  /* A number of things vary for Windows builds. Originally, pcretest opened its
83    input and output without "b"; then I was told that "b" was needed in some
84    environments, so it was added for release 5.0 to both the input and output. (It
85    makes no difference on Unix-like systems.) Later I was told that it is wrong
86    for the input on Windows. I've now abstracted the modes into two macros that
87    are set here, to make it easier to fiddle with them, and removed "b" from the
88    input mode under Windows. */
89    
90    #if defined(_WIN32) || defined(WIN32)
91    #include <io.h>                /* For _setmode() */
92    #include <fcntl.h>             /* For _O_BINARY */
93    #define INPUT_MODE   "r"
94    #define OUTPUT_MODE  "wb"
95    
96    #ifndef isatty
97    #define isatty _isatty         /* This is what Windows calls them, I'm told, */
98    #endif                         /* though in some environments they seem to   */
99                                   /* be already defined, hence the #ifndefs.    */
100    #ifndef fileno
101    #define fileno _fileno
102    #endif
103    
104  #include "internal.h"  /* A user sent this fix for Borland Builder 5 under Windows. */
105  #include "pcreposix.h"  
106    #ifdef __BORLANDC__
107    #define _setmode(handle, mode) setmode(handle, mode)
108    #endif
109    
110    /* Not Windows */
111    
 #ifndef CLOCKS_PER_SEC  
 #ifdef CLK_TCK  
 #define CLOCKS_PER_SEC CLK_TCK  
112  #else  #else
113  #define CLOCKS_PER_SEC 100  #include <sys/time.h>          /* These two includes are needed */
114    #include <sys/resource.h>      /* for setrlimit(). */
115    #if defined NATIVE_ZOS         /* z/OS uses non-binary I/O */
116    #define INPUT_MODE   "r"
117    #define OUTPUT_MODE  "w"
118    #else
119    #define INPUT_MODE   "rb"
120    #define OUTPUT_MODE  "wb"
121  #endif  #endif
122  #endif  #endif
123    
124    #define PRIV(name) name
125    
126  static FILE *outfile;  /* We have to include pcre_internal.h because we need the internal info for
127  static int log_store = 0;  displaying the results of pcre_study() and we also need to know about the
128    internal macros, structures, and other internal data values; pcretest has
129    "inside information" compared to a program that strictly follows the PCRE API.
130    
131    Although pcre_internal.h does itself include pcre.h, we explicitly include it
132    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
133    appropriately for an application, not for building PCRE. */
134    
135    #include "pcre.h"
136    
137    #if defined SUPPORT_PCRE32 && !defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE16
138    /* Configure internal macros to 32 bit mode. */
139    #define COMPILE_PCRE32
140    #endif
141    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE32
142    /* Configure internal macros to 16 bit mode. */
143    #define COMPILE_PCRE16
144    #endif
145    #if defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE32
146    /* Configure internal macros to 16 bit mode. */
147    #define COMPILE_PCRE8
148    #endif
149    
150    #include "pcre_internal.h"
151    
152    /* The pcre_printint() function, which prints the internal form of a compiled
153    regex, is held in a separate file so that (a) it can be compiled in either
154    8-, 16- or 32-bit mode, and (b) it can be #included directly in pcre_compile.c
155    when that is compiled in debug mode. */
156    
157  /* Debugging function to print the internal form of the regex. This is the same  #ifdef SUPPORT_PCRE8
158  code as contained in pcre.c under the DEBUG macro. */  void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
159    #endif
160    #ifdef SUPPORT_PCRE16
161    void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
162    #endif
163    #ifdef SUPPORT_PCRE32
164    void pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths);
165    #endif
166    
167  static const char *OP_names[] = {  /* We need access to some of the data tables that PCRE uses. So as not to have
168    "End", "\\A", "\\B", "\\b", "\\D", "\\d",  to keep two copies, we include the source files here, changing the names of the
169    "\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars",  external symbols to prevent clashes. */
170    "not",  
171    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  #define PCRE_INCLUDED
172    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
173    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  #include "pcre_tables.c"
174    "*", "*?", "+", "+?", "?", "??", "{", "{",  #include "pcre_ucd.c"
175    "class", "Ref",  
176    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",  /* The definition of the macro PRINTABLE, which determines whether to print an
177    "Brazero", "Braminzero", "Bra"  output character as-is or as a hex value when showing compiled patterns, is
178  };  the same as in the printint.src file. We uses it here in cases when the locale
179    has not been explicitly changed, so as to get consistent output from systems
180    that differ in their output from isprint() even in the "C" locale. */
181    
182    #ifdef EBCDIC
183    #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
184    #else
185    #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
186    #endif
187    
188  static void print_internals(pcre *re)  #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
 {  
 unsigned char *code = ((real_pcre *)re)->code;  
189    
190  printf("------------------------------------------------------------------\n");  /* Posix support is disabled in 16 or 32 bit only mode. */
191    #if !defined SUPPORT_PCRE8 && !defined NOPOSIX
192    #define NOPOSIX
193    #endif
194    
195  for(;;)  /* It is possible to compile this test program without including support for
196    {  testing the POSIX interface, though this is not available via the standard
197    int c;  Makefile. */
   int charlength;  
198    
199    printf("%3d ", code - ((real_pcre *)re)->code);  #if !defined NOPOSIX
200    #include "pcreposix.h"
201    #endif
202    
203    if (*code >= OP_BRA)  /* It is also possible, originally for the benefit of a version that was
204      {  imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
205      printf("%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);  NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
206      code += 2;  automatically cut out the UTF support if PCRE is built without it. */
207      }  
208    #ifndef SUPPORT_UTF
209    #ifndef NOUTF
210    #define NOUTF
211    #endif
212    #endif
213    
214    else switch(*code)  /* To make the code a bit tidier for 8/16/32-bit support, we define macros
215      {  for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
216      case OP_END:  only from one place and is handled differently). I couldn't dream up any way of
217      printf("    %s\n", OP_names[*code]);  using a single macro to do this in a generic way, because of the many different
218      printf("------------------------------------------------------------------\n");  argument requirements. We know that at least one of SUPPORT_PCRE8 and
219      return;  SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
220    use these in the definitions of generic macros.
221    
222      case OP_CHARS:  **** Special note about the PCHARSxxx macros: the address of the string to be
223      charlength = *(++code);  printed is always given as two arguments: a base address followed by an offset.
224      printf("%3d ", charlength);  The base address is cast to the correct data size for 8 or 16 bit data; the
225      while (charlength-- > 0)  offset is in units of this size. If the string were given as base+offset in one
226        if (isprint(c = *(++code))) printf("%c", c); else printf("\\x%02x", c);  argument, the casting might be incorrectly applied. */
     break;  
227    
228      case OP_KETRMAX:  #ifdef SUPPORT_PCRE8
     case OP_KETRMIN:  
     case OP_ALT:  
     case OP_KET:  
     case OP_ASSERT:  
     case OP_ASSERT_NOT:  
     case OP_ONCE:  
     printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
229    
230      case OP_STAR:  #define PCHARS8(lv, p, offset, len, f) \
231      case OP_MINSTAR:    lv = pchars((pcre_uint8 *)(p) + offset, len, f)
     case OP_PLUS:  
     case OP_MINPLUS:  
     case OP_QUERY:  
     case OP_MINQUERY:  
     case OP_TYPESTAR:  
     case OP_TYPEMINSTAR:  
     case OP_TYPEPLUS:  
     case OP_TYPEMINPLUS:  
     case OP_TYPEQUERY:  
     case OP_TYPEMINQUERY:  
     if (*code >= OP_TYPESTAR)  
       printf("    %s", OP_names[code[1]]);  
     else if (isprint(c = code[1])) printf("    %c", c);  
       else printf("    \\x%02x", c);  
     printf("%s", OP_names[*code++]);  
     break;  
232    
233      case OP_EXACT:  #define PCHARSV8(p, offset, len, f) \
234      case OP_UPTO:    (void)pchars((pcre_uint8 *)(p) + offset, len, f)
     case OP_MINUPTO:  
     if (isprint(c = code[3])) printf("    %c{", c);  
       else printf("    \\x%02x{", c);  
     if (*code != OP_EXACT) printf(",");  
     printf("%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_MINUPTO) printf("?");  
     code += 3;  
     break;  
235    
236      case OP_TYPEEXACT:  #define READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re) \
237      case OP_TYPEUPTO:    p = read_capture_name8(p, cn8, re)
     case OP_TYPEMINUPTO:  
     printf("    %s{", OP_names[code[3]]);  
     if (*code != OP_TYPEEXACT) printf(",");  
     printf("%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_TYPEMINUPTO) printf("?");  
     code += 3;  
     break;  
238    
239      case OP_NOT:  #define STRLEN8(p) ((int)strlen((char *)p))
     if (isprint(c = *(++code))) printf("    [^%c]", c);  
       else printf("    [^\\x%02x]", c);  
     break;  
240    
241      case OP_NOTSTAR:  #define SET_PCRE_CALLOUT8(callout) \
242      case OP_NOTMINSTAR:    pcre_callout = callout
     case OP_NOTPLUS:  
     case OP_NOTMINPLUS:  
     case OP_NOTQUERY:  
     case OP_NOTMINQUERY:  
     if (isprint(c = code[1])) printf("    [^%c]", c);  
       else printf("    [^\\x%02x]", c);  
     printf("%s", OP_names[*code++]);  
     break;  
243    
244      case OP_NOTEXACT:  #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
245      case OP_NOTUPTO:     pcre_assign_jit_stack(extra, callback, userdata)
     case OP_NOTMINUPTO:  
     if (isprint(c = code[3])) printf("    [^%c]{", c);  
       else printf("    [^\\x%02x]{", c);  
     if (*code != OP_NOTEXACT) printf(",");  
     printf("%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_NOTMINUPTO) printf("?");  
     code += 3;  
     break;  
246    
247      case OP_REF:  #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
248      printf("    \\%d", *(++code));    re = pcre_compile((char *)pat, options, error, erroffset, tables)
     code++;  
     goto CLASS_REF_REPEAT;  
249    
250      case OP_CLASS:  #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
251        {      namesptr, cbuffer, size) \
252        int i, min, max;    rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
253        (char *)namesptr, cbuffer, size)
254    
255        code++;  #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
256        printf("    [");    rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
257    
258        for (i = 0; i < 256; i++)  #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
259          {      offsets, size_offsets, workspace, size_workspace) \
260          if ((code[i/8] & (1 << (i&7))) != 0)    count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
261            {      offsets, size_offsets, workspace, size_workspace)
           int j;  
           for (j = i+1; j < 256; j++)  
             if ((code[j/8] & (1 << (j&7))) == 0) break;  
           if (i == '-' || i == ']') printf("\\");  
           if (isprint(i)) printf("%c", i); else printf("\\x%02x", i);  
           if (--j > i)  
             {  
             printf("-");  
             if (j == '-' || j == ']') printf("\\");  
             if (isprint(j)) printf("%c", j); else printf("\\x%02x", j);  
             }  
           i = j;  
           }  
         }  
       printf("]");  
       code += 32;  
262    
263        CLASS_REF_REPEAT:  #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
264        offsets, size_offsets) \
265      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
266        offsets, size_offsets)
267    
268        switch(*code)  #define PCRE_FREE_STUDY8(extra) \
269          {    pcre_free_study(extra)
         case OP_CRSTAR:  
         case OP_CRMINSTAR:  
         case OP_CRPLUS:  
         case OP_CRMINPLUS:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         printf("%s", OP_names[*code]);  
         break;  
270    
271          case OP_CRRANGE:  #define PCRE_FREE_SUBSTRING8(substring) \
272          case OP_CRMINRANGE:    pcre_free_substring(substring)
         min = (code[1] << 8) + code[2];  
         max = (code[3] << 8) + code[4];  
         if (max == 0) printf("{%d,}", min);  
         else printf("{%d,%d}", min, max);  
         if (*code == OP_CRMINRANGE) printf("?");  
         code += 4;  
         break;  
273    
274          default:  #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
275          code--;    pcre_free_substring_list(listptr)
         }  
       }  
     break;  
276    
277      /* Anything else is just a one-node item */  #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
278        getnamesptr, subsptr) \
279      rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
280        (char *)getnamesptr, subsptr)
281    
282      default:  #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
283      printf("    %s", OP_names[*code]);    n = pcre_get_stringnumber(re, (char *)ptr)
     break;  
     }  
284    
285    code++;  #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
286    printf("\n");    rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
   }  
 }  
287    
288    #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
289      rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
290    
291    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
292      rc = pcre_pattern_to_host_byte_order(re, extra, tables)
293    
294  /* Character string printing function. */  #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
295      pcre_printint(re, outfile, debug_lengths)
296    
297  static void pchars(unsigned char *p, int length)  #define PCRE_STUDY8(extra, re, options, error) \
298  {    extra = pcre_study(re, options, error)
 int c;  
 while (length-- > 0)  
   if (isprint(c = *(p++))) fprintf(outfile, "%c", c);  
     else fprintf(outfile, "\\x%02x", c);  
 }  
299    
300    #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
301      pcre_jit_stack_alloc(startsize, maxsize)
302    
303    #define PCRE_JIT_STACK_FREE8(stack) \
304      pcre_jit_stack_free(stack)
305    
306  /* Alternative malloc function, to test functionality and show the size of the  #endif /* SUPPORT_PCRE8 */
 compiled re. */  
307    
308  static void *new_malloc(size_t size)  /* -----------------------------------------------------------*/
 {  
 if (log_store) fprintf(outfile, "Store size request: %d\n", (int)size);  
 return malloc(size);  
 }  
309    
310    #ifdef SUPPORT_PCRE16
311    
312    #define PCHARS16(lv, p, offset, len, f) \
313      lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
314    
315  /* Read lines from named file or stdin and write to named file or stdout; lines  #define PCHARSV16(p, offset, len, f) \
316  consist of a regular expression, in delimiters and optionally followed by    (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
 options, followed by a set of test data, terminated by an empty line. */  
317    
318  int main(int argc, char **argv)  #define READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re) \
319  {    p = read_capture_name16(p, cn16, re)
 FILE *infile = stdin;  
 int options = 0;  
 int study_options = 0;  
 int op = 1;  
 int timeit = 0;  
 int showinfo = 0;  
 int posix = 0;  
 int debug = 0;  
 unsigned char buffer[30000];  
 unsigned char dbuffer[1024];  
320    
321  /* Static so that new_malloc can use it. */  #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
322    
323  outfile = stdout;  #define SET_PCRE_CALLOUT16(callout) \
324      pcre16_callout = (int (*)(pcre16_callout_block *))callout
325    
326  /* Scan options */  #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
327      pcre16_assign_jit_stack((pcre16_extra *)extra, \
328        (pcre16_jit_callback)callback, userdata)
329    
330  while (argc > 1 && argv[op][0] == '-')  #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
331    {    re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
332    if (strcmp(argv[op], "-s") == 0) log_store = 1;      tables)
   else if (strcmp(argv[op], "-t") == 0) timeit = 1;  
   else if (strcmp(argv[op], "-i") == 0) showinfo = 1;  
   else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;  
   else if (strcmp(argv[op], "-p") == 0) posix = 1;  
   else  
     {  
     printf("*** Unknown option %s\n", argv[op]);  
     return 1;  
     }  
   op++;  
   argc--;  
   }  
333    
334  /* Sort out the input and output files */  #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
335        namesptr, cbuffer, size) \
336      rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
337        count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
338    
339  if (argc > 1)  #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
340    {    rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
341    infile = fopen(argv[op], "r");      (PCRE_UCHAR16 *)cbuffer, size/2)
   if (infile == NULL)  
     {  
     printf("** Failed to open %s\n", argv[op]);  
     return 1;  
     }  
   }  
342    
343  if (argc > 2)  #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
344    {      offsets, size_offsets, workspace, size_workspace) \
345    outfile = fopen(argv[op+1], "w");    count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
346    if (outfile == NULL)      (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
347      {      workspace, size_workspace)
     printf("** Failed to open %s\n", argv[op+1]);  
     return 1;  
     }  
   }  
348    
349  /* Set alternative malloc function */  #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
350        offsets, size_offsets) \
351      count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
352        len, start_offset, options, offsets, size_offsets)
353    
354  pcre_malloc = new_malloc;  #define PCRE_FREE_STUDY16(extra) \
355      pcre16_free_study((pcre16_extra *)extra)
356    
357  /* Heading line, then prompt for first re if stdin */  #define PCRE_FREE_SUBSTRING16(substring) \
358      pcre16_free_substring((PCRE_SPTR16)substring)
359    
360  fprintf(outfile, "Testing Perl-Compatible Regular Expressions\n");  #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
361  fprintf(outfile, "PCRE version %s\n\n", pcre_version());    pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
362    
363  /* Main loop */  #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
364        getnamesptr, subsptr) \
365      rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
366        count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
367    
368  for (;;)  #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
369    {    n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
   pcre *re = NULL;  
   pcre_extra *extra = NULL;  
   regex_t preg;  
   const char *error;  
   unsigned char *p, *pp;  
   int do_study = 0;  
   int do_debug = 0;  
   int do_posix = 0;  
   int erroroffset, len, delimiter;  
370    
371    if (infile == stdin) printf("  re> ");  #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
372    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;    rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
373    if (infile != stdin) fprintf(outfile, (char *)buffer);      (PCRE_SPTR16 *)(void*)subsptr)
374    
375    p = buffer;  #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
376    while (isspace(*p)) p++;    rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
377    if (*p == 0) continue;      (PCRE_SPTR16 **)(void*)listptr)
378    
379    /* Get the delimiter and seek the end of the pattern; if is isn't  #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
380    complete, read more. */    rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
381        tables)
382    
383    delimiter = *p++;  #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
384      pcre16_printint(re, outfile, debug_lengths)
385    
386    if (isalnum(delimiter))  #define PCRE_STUDY16(extra, re, options, error) \
387      {    extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
     fprintf(outfile, "** Delimiter must not be alphameric\n");  
     goto SKIP_DATA;  
     }  
388    
389    pp = p;  #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
390      (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
391    
392    for(;;)  #define PCRE_JIT_STACK_FREE16(stack) \
393      {    pcre16_jit_stack_free((pcre16_jit_stack *)stack)
     while (*pp != 0 && *pp != delimiter) pp++;  
     if (*pp != 0) break;  
394    
395      len = sizeof(buffer) - (pp - buffer);  #endif /* SUPPORT_PCRE16 */
     if (len < 256)  
       {  
       fprintf(outfile, "** Expression too long - missing delimiter?\n");  
       goto SKIP_DATA;  
       }  
396    
397      if (infile == stdin) printf("    > ");  /* -----------------------------------------------------------*/
     if (fgets((char *)pp, len, infile) == NULL)  
       {  
       fprintf(outfile, "** Unexpected EOF\n");  
       goto END_OFF;  
       }  
     if (infile != stdin) fprintf(outfile, (char *)pp);  
     }  
398    
399    /* Terminate the pattern at the delimiter */  #ifdef SUPPORT_PCRE32
400    
401    *pp++ = 0;  #define PCHARS32(lv, p, offset, len, f) \
402      lv = pchars32((PCRE_SPTR32)(p) + offset, len, f)
403    
404    /* Look for options after final delimiter */  #define PCHARSV32(p, offset, len, f) \
405      (void)pchars32((PCRE_SPTR32)(p) + offset, len, f)
406    
407    options = 0;  #define READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re) \
408    study_options = 0;    p = read_capture_name32(p, cn32, re)
   while (*pp != 0)  
     {  
     switch (*pp++)  
       {  
       case 'i': options |= PCRE_CASELESS; break;  
       case 'm': options |= PCRE_MULTILINE; break;  
       case 's': options |= PCRE_DOTALL; break;  
       case 'x': options |= PCRE_EXTENDED; break;  
       case 'A': options |= PCRE_ANCHORED; break;  
       case 'D': do_debug = 1; break;  
       case 'E': options |= PCRE_DOLLAR_ENDONLY; break;  
       case 'P': do_posix = 1; break;  
       case 'S': do_study = 1; break;  
       case 'I': study_options |= PCRE_CASELESS; break;  
       case 'X': options |= PCRE_EXTRA; break;  
       case '\n': case ' ': break;  
       default:  
       fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);  
       goto SKIP_DATA;  
       }  
     }  
409    
410    /* Handle compiing via the POSIX interface, which doesn't support the  #define STRLEN32(p) ((int)strlen32((PCRE_SPTR32)p))
   timing, showing, or debugging options. */  
411    
412    if (posix || do_posix)  #define SET_PCRE_CALLOUT32(callout) \
413      {    pcre32_callout = (int (*)(pcre32_callout_block *))callout
     int rc;  
     int cflags = 0;  
     if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;  
     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;  
     rc = regcomp(&preg, (char *)p, cflags);  
414    
415      /* Compilation failed; go back for another re, skipping to blank line  #define PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata) \
416      if non-interactive. */    pcre32_assign_jit_stack((pcre32_extra *)extra, \
417        (pcre32_jit_callback)callback, userdata)
418    
419      if (rc != 0)  #define PCRE_COMPILE32(re, pat, options, error, erroffset, tables) \
420        {    re = (pcre *)pcre32_compile((PCRE_SPTR32)pat, options, error, erroffset, \
421        (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));      tables)
       fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);  
       goto SKIP_DATA;  
       }  
     }  
422    
423    /* Handle compiling via the native interface */  #define PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
424        namesptr, cbuffer, size) \
425      rc = pcre32_copy_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
426        count, (PCRE_SPTR32)namesptr, (PCRE_UCHAR32 *)cbuffer, size/2)
427    
428    else  #define PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size) \
429      {    rc = pcre32_copy_substring((PCRE_SPTR32)bptr, offsets, count, i, \
430      if (timeit)      (PCRE_UCHAR32 *)cbuffer, size/2)
       {  
       register int i;  
       clock_t time_taken;  
       clock_t start_time = clock();  
       for (i = 0; i < 4000; i++)  
         {  
         re = pcre_compile((char *)p, options, &error, &erroroffset);  
         if (re != NULL) free(re);  
         }  
       time_taken = clock() - start_time;  
       fprintf(outfile, "Compile time %.2f milliseconds\n",  
         ((double)time_taken)/(4 * CLOCKS_PER_SEC));  
       }  
431    
432      re = pcre_compile((char *)p, options, &error, &erroroffset);  #define PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
433        offsets, size_offsets, workspace, size_workspace) \
434      count = pcre32_dfa_exec((pcre32 *)re, (pcre32_extra *)extra, \
435        (PCRE_SPTR32)bptr, len, start_offset, options, offsets, size_offsets, \
436        workspace, size_workspace)
437    
438      /* Compilation failed; go back for another re, skipping to blank line  #define PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
439      if non-interactive. */      offsets, size_offsets) \
440      count = pcre32_exec((pcre32 *)re, (pcre32_extra *)extra, (PCRE_SPTR32)bptr, \
441        len, start_offset, options, offsets, size_offsets)
442    
443      if (re == NULL)  #define PCRE_FREE_STUDY32(extra) \
444        {    pcre32_free_study((pcre32_extra *)extra)
       fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);  
       SKIP_DATA:  
       if (infile != stdin)  
         {  
         for (;;)  
           {  
           if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)  
             goto END_OFF;  
           len = (int)strlen((char *)buffer);  
           while (len > 0 && isspace(buffer[len-1])) len--;  
           if (len == 0) break;  
           }  
         fprintf(outfile, "\n");  
         }  
       continue;  
       }  
445    
446      /* Compilation succeeded; print data if required */  #define PCRE_FREE_SUBSTRING32(substring) \
447      pcre32_free_substring((PCRE_SPTR32)substring)
448    
449      if (showinfo || do_debug)  #define PCRE_FREE_SUBSTRING_LIST32(listptr) \
450        {    pcre32_free_substring_list((PCRE_SPTR32 *)listptr)
       int first_char, count;  
451    
452        if (debug || do_debug) print_internals(re);  #define PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
453        getnamesptr, subsptr) \
454      rc = pcre32_get_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
455        count, (PCRE_SPTR32)getnamesptr, (PCRE_SPTR32 *)(void*)subsptr)
456    
457        count = pcre_info(re, &options, &first_char);  #define PCRE_GET_STRINGNUMBER32(n, rc, ptr) \
458        if (count < 0) fprintf(outfile,    n = pcre32_get_stringnumber(re, (PCRE_SPTR32)ptr)
         "Error %d while reading info\n", count);  
       else  
         {  
         fprintf(outfile, "Identifying subpattern count = %d\n", count);  
         if (options == 0) fprintf(outfile, "No options\n");  
           else fprintf(outfile, "Options:%s%s%s%s%s%s%s\n",  
             ((options & PCRE_ANCHORED) != 0)? " anchored" : "",  
             ((options & PCRE_CASELESS) != 0)? " caseless" : "",  
             ((options & PCRE_EXTENDED) != 0)? " extended" : "",  
             ((options & PCRE_MULTILINE) != 0)? " multiline" : "",  
             ((options & PCRE_DOTALL) != 0)? " dotall" : "",  
             ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",  
             ((options & PCRE_EXTRA) != 0)? " extra" : "");  
         if (first_char == -1)  
           {  
           fprintf(outfile, "First char at start or follows \\n\n");  
           }  
         else if (first_char < 0)  
           {  
           fprintf(outfile, "No first char\n");  
           }  
         else  
           {  
           if (isprint(first_char))  
             fprintf(outfile, "First char = \'%c\'\n", first_char);  
           else  
             fprintf(outfile, "First char = %d\n", first_char);  
           }  
         }  
       }  
459    
460      /* If /S was present, study the regexp to generate additional info to  #define PCRE_GET_SUBSTRING32(rc, bptr, offsets, count, i, subsptr) \
461      help with the matching. */    rc = pcre32_get_substring((PCRE_SPTR32)bptr, offsets, count, i, \
462        (PCRE_SPTR32 *)(void*)subsptr)
463    
464      if (do_study)  #define PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr) \
465        {    rc = pcre32_get_substring_list((PCRE_SPTR32)bptr, offsets, count, \
466        if (timeit)      (PCRE_SPTR32 **)(void*)listptr)
         {  
         register int i;  
         clock_t time_taken;  
         clock_t start_time = clock();  
         for (i = 0; i < 4000; i++)  
           extra = pcre_study(re, study_options, &error);  
         time_taken = clock() - start_time;  
         if (extra != NULL) free(extra);  
         fprintf(outfile, "  Study time %.2f milliseconds\n",  
           ((double)time_taken)/(4 * CLOCKS_PER_SEC));  
         }  
467    
468        extra = pcre_study(re, study_options, &error);  #define PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables) \
469        if (error != NULL)    rc = pcre32_pattern_to_host_byte_order((pcre32 *)re, (pcre32_extra *)extra, \
470          fprintf(outfile, "Failed to study: %s\n", error);      tables)
       else if (extra == NULL)  
         fprintf(outfile, "Study returned NULL\n");  
471    
472        /* This looks at internal information. A bit kludgy to do it this  #define PCRE_PRINTINT32(re, outfile, debug_lengths) \
473        way, but it is useful for testing. */    pcre32_printint(re, outfile, debug_lengths)
474    
475        else if (showinfo || do_debug)  #define PCRE_STUDY32(extra, re, options, error) \
476          {    extra = (pcre_extra *)pcre32_study((pcre32 *)re, options, error)
477          real_pcre_extra *xx = (real_pcre_extra *)extra;  
478          if ((xx->options & PCRE_STUDY_MAPPED) == 0)  #define PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
479            fprintf(outfile, "No starting character set\n");    (pcre_jit_stack *)pcre32_jit_stack_alloc(startsize, maxsize)
480          else  
481            {  #define PCRE_JIT_STACK_FREE32(stack) \
482            int i;    pcre32_jit_stack_free((pcre32_jit_stack *)stack)
483            int c = 24;  
484            fprintf(outfile, "Starting character set: ");  #endif /* SUPPORT_PCRE32 */
485            for (i = 0; i < 256; i++)  
486              {  
487              if ((xx->start_bits[i/8] & (1<<(i%8))) != 0)  /* ----- Both modes are supported; a runtime test is needed, except for
488                {  pcre_config(), and the JIT stack functions, when it doesn't matter which
489                if (c > 75)  version is called. ----- */
490                  {  
491                  fprintf(outfile, "\n  ");  enum {
492                  c = 2;    PCRE8_MODE,
493                  }    PCRE16_MODE,
494                if (isprint(i) && i != ' ')    PCRE32_MODE
495                  {  };
496                  fprintf(outfile, "%c ", i);  
497                  c += 2;  #if (defined (SUPPORT_PCRE8) + defined (SUPPORT_PCRE16) + defined (SUPPORT_PCRE32)) >= 2
498                  }  
499                else  #define CHAR_SIZE (1 << pcre_mode)
500    
501    #define PCHARS(lv, p, offset, len, f) \
502      if (pcre_mode == PCRE32_MODE) \
503        PCHARS32(lv, p, offset, len, f); \
504      else if (pcre_mode == PCRE16_MODE) \
505        PCHARS16(lv, p, offset, len, f); \
506      else \
507        PCHARS8(lv, p, offset, len, f)
508    
509    #define PCHARSV(p, offset, len, f) \
510      if (pcre_mode == PCRE32_MODE) \
511        PCHARSV32(p, offset, len, f); \
512      else if (pcre_mode == PCRE16_MODE) \
513        PCHARSV16(p, offset, len, f); \
514      else \
515        PCHARSV8(p, offset, len, f)
516    
517    #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
518      if (pcre_mode == PCRE32_MODE) \
519        READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re); \
520      else if (pcre_mode == PCRE16_MODE) \
521        READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re); \
522      else \
523        READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re)
524    
525    #define SET_PCRE_CALLOUT(callout) \
526      if (pcre_mode == PCRE32_MODE) \
527        SET_PCRE_CALLOUT32(callout); \
528      else if (pcre_mode == PCRE16_MODE) \
529        SET_PCRE_CALLOUT16(callout); \
530      else \
531        SET_PCRE_CALLOUT8(callout)
532    
533    #define STRLEN(p) (pcre_mode == PCRE32_MODE ? STRLEN32(p) : pcre_mode == PCRE16_MODE ? STRLEN16(p) : STRLEN8(p))
534    
535    #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
536      if (pcre_mode == PCRE32_MODE) \
537        PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata); \
538      else if (pcre_mode == PCRE16_MODE) \
539        PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
540      else \
541        PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
542    
543    #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
544      if (pcre_mode == PCRE32_MODE) \
545        PCRE_COMPILE32(re, pat, options, error, erroffset, tables); \
546      else if (pcre_mode == PCRE16_MODE) \
547        PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
548      else \
549        PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
550    
551    #define PCRE_CONFIG pcre_config
552    
553    #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
554        namesptr, cbuffer, size) \
555      if (pcre_mode == PCRE32_MODE) \
556        PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
557          namesptr, cbuffer, size); \
558      else if (pcre_mode == PCRE16_MODE) \
559        PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
560          namesptr, cbuffer, size); \
561      else \
562        PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
563          namesptr, cbuffer, size)
564    
565    #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
566      if (pcre_mode == PCRE32_MODE) \
567        PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size); \
568      else if (pcre_mode == PCRE16_MODE) \
569        PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
570      else \
571        PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
572    
573    #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
574        offsets, size_offsets, workspace, size_workspace) \
575      if (pcre_mode == PCRE32_MODE) \
576        PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
577          offsets, size_offsets, workspace, size_workspace); \
578      else if (pcre_mode == PCRE16_MODE) \
579        PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
580          offsets, size_offsets, workspace, size_workspace); \
581      else \
582        PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
583          offsets, size_offsets, workspace, size_workspace)
584    
585    #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
586        offsets, size_offsets) \
587      if (pcre_mode == PCRE32_MODE) \
588        PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
589          offsets, size_offsets); \
590      else if (pcre_mode == PCRE16_MODE) \
591        PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
592          offsets, size_offsets); \
593      else \
594        PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
595          offsets, size_offsets)
596    
597    #define PCRE_FREE_STUDY(extra) \
598      if (pcre_mode == PCRE32_MODE) \
599        PCRE_FREE_STUDY32(extra); \
600      else if (pcre_mode == PCRE16_MODE) \
601        PCRE_FREE_STUDY16(extra); \
602      else \
603        PCRE_FREE_STUDY8(extra)
604    
605    #define PCRE_FREE_SUBSTRING(substring) \
606      if (pcre_mode == PCRE32_MODE) \
607        PCRE_FREE_SUBSTRING32(substring); \
608      else if (pcre_mode == PCRE16_MODE) \
609        PCRE_FREE_SUBSTRING16(substring); \
610      else \
611        PCRE_FREE_SUBSTRING8(substring)
612    
613    #define PCRE_FREE_SUBSTRING_LIST(listptr) \
614      if (pcre_mode == PCRE32_MODE) \
615        PCRE_FREE_SUBSTRING_LIST32(listptr); \
616      else if (pcre_mode == PCRE16_MODE) \
617        PCRE_FREE_SUBSTRING_LIST16(listptr); \
618      else \
619        PCRE_FREE_SUBSTRING_LIST8(listptr)
620    
621    #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
622        getnamesptr, subsptr) \
623      if (pcre_mode == PCRE32_MODE) \
624        PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
625          getnamesptr, subsptr); \
626      else if (pcre_mode == PCRE16_MODE) \
627        PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
628          getnamesptr, subsptr); \
629      else \
630        PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
631          getnamesptr, subsptr)
632    
633    #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
634      if (pcre_mode == PCRE32_MODE) \
635        PCRE_GET_STRINGNUMBER32(n, rc, ptr); \
636      else if (pcre_mode == PCRE16_MODE) \
637        PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
638      else \
639        PCRE_GET_STRINGNUMBER8(n, rc, ptr)
640    
641    #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
642      if (pcre_mode == PCRE32_MODE) \
643        PCRE_GET_SUBSTRING32(rc, bptr, use_offsets, count, i, subsptr); \
644      else if (pcre_mode == PCRE16_MODE) \
645        PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
646      else \
647        PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
648    
649    #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
650      if (pcre_mode == PCRE32_MODE) \
651        PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr); \
652      else if (pcre_mode == PCRE16_MODE) \
653        PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
654      else \
655        PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
656    
657    #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
658      (pcre_mode == PCRE32_MODE ? \
659         PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
660        : pcre_mode == PCRE16_MODE ? \
661          PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
662          : PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
663    
664    #define PCRE_JIT_STACK_FREE(stack) \
665      if (pcre_mode == PCRE32_MODE) \
666        PCRE_JIT_STACK_FREE32(stack); \
667      else if (pcre_mode == PCRE16_MODE) \
668        PCRE_JIT_STACK_FREE16(stack); \
669      else \
670        PCRE_JIT_STACK_FREE8(stack)
671    
672    #define PCRE_MAKETABLES \
673      (pcre_mode == PCRE32_MODE ? pcre32_maketables() : pcre_mode == PCRE16_MODE ? pcre16_maketables() : pcre_maketables())
674    
675    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
676      if (pcre_mode == PCRE32_MODE) \
677        PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables); \
678      else if (pcre_mode == PCRE16_MODE) \
679        PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
680      else \
681        PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
682    
683    #define PCRE_PRINTINT(re, outfile, debug_lengths) \
684      if (pcre_mode == PCRE32_MODE) \
685        PCRE_PRINTINT32(re, outfile, debug_lengths); \
686      else if (pcre_mode == PCRE16_MODE) \
687        PCRE_PRINTINT16(re, outfile, debug_lengths); \
688      else \
689        PCRE_PRINTINT8(re, outfile, debug_lengths)
690    
691    #define PCRE_STUDY(extra, re, options, error) \
692      if (pcre_mode == PCRE32_MODE) \
693        PCRE_STUDY32(extra, re, options, error); \
694      else if (pcre_mode == PCRE16_MODE) \
695        PCRE_STUDY16(extra, re, options, error); \
696      else \
697        PCRE_STUDY8(extra, re, options, error)
698    
699    /* ----- Only 8-bit mode is supported ----- */
700    
701    #elif defined SUPPORT_PCRE8
702    #define CHAR_SIZE                 1
703    #define PCHARS                    PCHARS8
704    #define PCHARSV                   PCHARSV8
705    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME8
706    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT8
707    #define STRLEN                    STRLEN8
708    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK8
709    #define PCRE_COMPILE              PCRE_COMPILE8
710    #define PCRE_CONFIG               pcre_config
711    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
712    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING8
713    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC8
714    #define PCRE_EXEC                 PCRE_EXEC8
715    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY8
716    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING8
717    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST8
718    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING8
719    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER8
720    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING8
721    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST8
722    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC8
723    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE8
724    #define PCRE_MAKETABLES           pcre_maketables()
725    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
726    #define PCRE_PRINTINT             PCRE_PRINTINT8
727    #define PCRE_STUDY                PCRE_STUDY8
728    
729    /* ----- Only 16-bit mode is supported ----- */
730    
731    #elif defined SUPPORT_PCRE16
732    #define CHAR_SIZE                 2
733    #define PCHARS                    PCHARS16
734    #define PCHARSV                   PCHARSV16
735    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME16
736    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT16
737    #define STRLEN                    STRLEN16
738    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK16
739    #define PCRE_COMPILE              PCRE_COMPILE16
740    #define PCRE_CONFIG               pcre16_config
741    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
742    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING16
743    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC16
744    #define PCRE_EXEC                 PCRE_EXEC16
745    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY16
746    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING16
747    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST16
748    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING16
749    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER16
750    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING16
751    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST16
752    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC16
753    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE16
754    #define PCRE_MAKETABLES           pcre16_maketables()
755    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
756    #define PCRE_PRINTINT             PCRE_PRINTINT16
757    #define PCRE_STUDY                PCRE_STUDY16
758    
759    /* ----- Only 32-bit mode is supported ----- */
760    
761    #elif defined SUPPORT_PCRE32
762    #define CHAR_SIZE                 4
763    #define PCHARS                    PCHARS32
764    #define PCHARSV                   PCHARSV32
765    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME32
766    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT32
767    #define STRLEN                    STRLEN32
768    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK32
769    #define PCRE_COMPILE              PCRE_COMPILE32
770    #define PCRE_CONFIG               pcre32_config
771    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING32
772    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING32
773    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC32
774    #define PCRE_EXEC                 PCRE_EXEC32
775    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY32
776    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING32
777    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST32
778    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING32
779    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER32
780    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING32
781    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST32
782    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC32
783    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE32
784    #define PCRE_MAKETABLES           pcre32_maketables()
785    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER32
786    #define PCRE_PRINTINT             PCRE_PRINTINT32
787    #define PCRE_STUDY                PCRE_STUDY32
788    
789    #endif
790    
791    /* ----- End of mode-specific function call macros ----- */
792    
793    
794    /* Other parameters */
795    
796    #ifndef CLOCKS_PER_SEC
797    #ifdef CLK_TCK
798    #define CLOCKS_PER_SEC CLK_TCK
799    #else
800    #define CLOCKS_PER_SEC 100
801    #endif
802    #endif
803    
804    #if !defined NODFA
805    #define DFA_WS_DIMENSION 1000
806    #endif
807    
808    /* This is the default loop count for timing. */
809    
810    #define LOOPREPEAT 500000
811    
812    /* Static variables */
813    
814    static FILE *outfile;
815    static int log_store = 0;
816    static int callout_count;
817    static int callout_extra;
818    static int callout_fail_count;
819    static int callout_fail_id;
820    static int debug_lengths;
821    static int first_callout;
822    static int jit_was_used;
823    static int locale_set = 0;
824    static int show_malloc;
825    static int use_utf;
826    static size_t gotten_store;
827    static size_t first_gotten_store = 0;
828    static const unsigned char *last_callout_mark = NULL;
829    
830    /* The buffers grow automatically if very long input lines are encountered. */
831    
832    static int buffer_size = 50000;
833    static pcre_uint8 *buffer = NULL;
834    static pcre_uint8 *dbuffer = NULL;
835    static pcre_uint8 *pbuffer = NULL;
836    
837    /* Another buffer is needed translation to 16/32-bit character strings. It will
838    obtained and extended as required. */
839    
840    #if defined SUPPORT_PCRE8 && (defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32)
841    
842    /* We need the table of operator lengths that is used for 16/32-bit compiling, in
843    order to swap bytes in a pattern for saving/reloading testing. Luckily, the
844    data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
845    appropriately for the 16/32-bit world. Just as a safety check, make sure that
846    COMPILE_PCRE[16|32] is *not* set. */
847    
848    #ifdef COMPILE_PCRE16
849    #error COMPILE_PCRE16 must not be set when compiling pcretest.c
850    #endif
851    
852    #ifdef COMPILE_PCRE32
853    #error COMPILE_PCRE32 must not be set when compiling pcretest.c
854    #endif
855    
856    #if LINK_SIZE == 2
857    #undef LINK_SIZE
858    #define LINK_SIZE 1
859    #elif LINK_SIZE == 3 || LINK_SIZE == 4
860    #undef LINK_SIZE
861    #define LINK_SIZE 2
862    #else
863    #error LINK_SIZE must be either 2, 3, or 4
864    #endif
865    
866    #undef IMM2_SIZE
867    #define IMM2_SIZE 1
868    
869    #endif /* SUPPORT_PCRE8 && (SUPPORT_PCRE16 || SUPPORT_PCRE32) */
870    
871    #ifdef SUPPORT_PCRE16
872    static int buffer16_size = 0;
873    static pcre_uint16 *buffer16 = NULL;
874    static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
875    #endif  /* SUPPORT_PCRE16 */
876    
877    #ifdef SUPPORT_PCRE32
878    static int buffer32_size = 0;
879    static pcre_uint32 *buffer32 = NULL;
880    static const pcre_uint32 OP_lengths32[] = { OP_LENGTHS };
881    #endif  /* SUPPORT_PCRE32 */
882    
883    /* If we have 8-bit support, default to it; if there is also
884    16-or 32-bit support, it can be changed by an option. If there is no 8-bit support,
885    there must be 16-or 32-bit support, so default it to 1. */
886    
887    #if defined SUPPORT_PCRE8
888    static int pcre_mode = PCRE8_MODE;
889    #elif defined SUPPORT_PCRE16
890    static int pcre_mode = PCRE16_MODE;
891    #elif defined SUPPORT_PCRE32
892    static int pcre_mode = PCRE32_MODE;
893    #endif
894    
895    /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
896    
897    static int jit_study_bits[] =
898      {
899      PCRE_STUDY_JIT_COMPILE,
900      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
901      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
902      PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
903      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
904      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
905      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
906        PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
907    };
908    
909    #define PCRE_STUDY_ALLJIT (PCRE_STUDY_JIT_COMPILE | \
910      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)
911    
912    /* Textual explanations for runtime error codes */
913    
914    static const char *errtexts[] = {
915      NULL,  /* 0 is no error */
916      NULL,  /* NOMATCH is handled specially */
917      "NULL argument passed",
918      "bad option value",
919      "magic number missing",
920      "unknown opcode - pattern overwritten?",
921      "no more memory",
922      NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */
923      "match limit exceeded",
924      "callout error code",
925      NULL,  /* BADUTF8/16 is handled specially */
926      NULL,  /* BADUTF8/16 offset is handled specially */
927      NULL,  /* PARTIAL is handled specially */
928      "not used - internal error",
929      "internal error - pattern overwritten?",
930      "bad count value",
931      "item unsupported for DFA matching",
932      "backreference condition or recursion test not supported for DFA matching",
933      "match limit not supported for DFA matching",
934      "workspace size exceeded in DFA matching",
935      "too much recursion for DFA matching",
936      "recursion limit exceeded",
937      "not used - internal error",
938      "invalid combination of newline options",
939      "bad offset value",
940      NULL,  /* SHORTUTF8/16 is handled specially */
941      "nested recursion at the same subject position",
942      "JIT stack limit reached",
943      "pattern compiled in wrong mode: 8-bit/16-bit error",
944      "pattern compiled with other endianness",
945      "invalid data in workspace for DFA restart"
946    };
947    
948    
949    /*************************************************
950    *         Alternate character tables             *
951    *************************************************/
952    
953    /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
954    using the default tables of the library. However, the T option can be used to
955    select alternate sets of tables, for different kinds of testing. Note also that
956    the L (locale) option also adjusts the tables. */
957    
958    /* This is the set of tables distributed as default with PCRE. It recognizes
959    only ASCII characters. */
960    
961    static const pcre_uint8 tables0[] = {
962    
963    /* This table is a lower casing table. */
964    
965        0,  1,  2,  3,  4,  5,  6,  7,
966        8,  9, 10, 11, 12, 13, 14, 15,
967       16, 17, 18, 19, 20, 21, 22, 23,
968       24, 25, 26, 27, 28, 29, 30, 31,
969       32, 33, 34, 35, 36, 37, 38, 39,
970       40, 41, 42, 43, 44, 45, 46, 47,
971       48, 49, 50, 51, 52, 53, 54, 55,
972       56, 57, 58, 59, 60, 61, 62, 63,
973       64, 97, 98, 99,100,101,102,103,
974      104,105,106,107,108,109,110,111,
975      112,113,114,115,116,117,118,119,
976      120,121,122, 91, 92, 93, 94, 95,
977       96, 97, 98, 99,100,101,102,103,
978      104,105,106,107,108,109,110,111,
979      112,113,114,115,116,117,118,119,
980      120,121,122,123,124,125,126,127,
981      128,129,130,131,132,133,134,135,
982      136,137,138,139,140,141,142,143,
983      144,145,146,147,148,149,150,151,
984      152,153,154,155,156,157,158,159,
985      160,161,162,163,164,165,166,167,
986      168,169,170,171,172,173,174,175,
987      176,177,178,179,180,181,182,183,
988      184,185,186,187,188,189,190,191,
989      192,193,194,195,196,197,198,199,
990      200,201,202,203,204,205,206,207,
991      208,209,210,211,212,213,214,215,
992      216,217,218,219,220,221,222,223,
993      224,225,226,227,228,229,230,231,
994      232,233,234,235,236,237,238,239,
995      240,241,242,243,244,245,246,247,
996      248,249,250,251,252,253,254,255,
997    
998    /* This table is a case flipping table. */
999    
1000        0,  1,  2,  3,  4,  5,  6,  7,
1001        8,  9, 10, 11, 12, 13, 14, 15,
1002       16, 17, 18, 19, 20, 21, 22, 23,
1003       24, 25, 26, 27, 28, 29, 30, 31,
1004       32, 33, 34, 35, 36, 37, 38, 39,
1005       40, 41, 42, 43, 44, 45, 46, 47,
1006       48, 49, 50, 51, 52, 53, 54, 55,
1007       56, 57, 58, 59, 60, 61, 62, 63,
1008       64, 97, 98, 99,100,101,102,103,
1009      104,105,106,107,108,109,110,111,
1010      112,113,114,115,116,117,118,119,
1011      120,121,122, 91, 92, 93, 94, 95,
1012       96, 65, 66, 67, 68, 69, 70, 71,
1013       72, 73, 74, 75, 76, 77, 78, 79,
1014       80, 81, 82, 83, 84, 85, 86, 87,
1015       88, 89, 90,123,124,125,126,127,
1016      128,129,130,131,132,133,134,135,
1017      136,137,138,139,140,141,142,143,
1018      144,145,146,147,148,149,150,151,
1019      152,153,154,155,156,157,158,159,
1020      160,161,162,163,164,165,166,167,
1021      168,169,170,171,172,173,174,175,
1022      176,177,178,179,180,181,182,183,
1023      184,185,186,187,188,189,190,191,
1024      192,193,194,195,196,197,198,199,
1025      200,201,202,203,204,205,206,207,
1026      208,209,210,211,212,213,214,215,
1027      216,217,218,219,220,221,222,223,
1028      224,225,226,227,228,229,230,231,
1029      232,233,234,235,236,237,238,239,
1030      240,241,242,243,244,245,246,247,
1031      248,249,250,251,252,253,254,255,
1032    
1033    /* This table contains bit maps for various character classes. Each map is 32
1034    bytes long and the bits run from the least significant end of each byte. The
1035    classes that have their own maps are: space, xdigit, digit, upper, lower, word,
1036    graph, print, punct, and cntrl. Other classes are built from combinations. */
1037    
1038      0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
1039      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1040      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1041      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1042    
1043      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1044      0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
1045      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1046      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1047    
1048      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1049      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1050      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1051      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1052    
1053      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1054      0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
1055      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1056      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1057    
1058      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1059      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
1060      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1061      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1062    
1063      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1064      0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
1065      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1066      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1067    
1068      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
1069      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1070      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1071      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1072    
1073      0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
1074      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1075      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1076      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1077    
1078      0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
1079      0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
1080      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1081      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1082    
1083      0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
1084      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
1085      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1086      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1087    
1088    /* This table identifies various classes of character by individual bits:
1089      0x01   white space character
1090      0x02   letter
1091      0x04   decimal digit
1092      0x08   hexadecimal digit
1093      0x10   alphanumeric or '_'
1094      0x80   regular expression metacharacter or binary zero
1095    */
1096    
1097      0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
1098      0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
1099      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
1100      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
1101      0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
1102      0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
1103      0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
1104      0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
1105      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
1106      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
1107      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
1108      0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
1109      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
1110      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
1111      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
1112      0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
1113      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
1114      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
1115      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
1116      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
1117      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
1118      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
1119      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
1120      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
1121      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
1122      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
1123      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
1124      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
1125      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
1126      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
1127      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
1128      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
1129    
1130    /* This is a set of tables that came orginally from a Windows user. It seems to
1131    be at least an approximation of ISO 8859. In particular, there are characters
1132    greater than 128 that are marked as spaces, letters, etc. */
1133    
1134    static const pcre_uint8 tables1[] = {
1135    0,1,2,3,4,5,6,7,
1136    8,9,10,11,12,13,14,15,
1137    16,17,18,19,20,21,22,23,
1138    24,25,26,27,28,29,30,31,
1139    32,33,34,35,36,37,38,39,
1140    40,41,42,43,44,45,46,47,
1141    48,49,50,51,52,53,54,55,
1142    56,57,58,59,60,61,62,63,
1143    64,97,98,99,100,101,102,103,
1144    104,105,106,107,108,109,110,111,
1145    112,113,114,115,116,117,118,119,
1146    120,121,122,91,92,93,94,95,
1147    96,97,98,99,100,101,102,103,
1148    104,105,106,107,108,109,110,111,
1149    112,113,114,115,116,117,118,119,
1150    120,121,122,123,124,125,126,127,
1151    128,129,130,131,132,133,134,135,
1152    136,137,138,139,140,141,142,143,
1153    144,145,146,147,148,149,150,151,
1154    152,153,154,155,156,157,158,159,
1155    160,161,162,163,164,165,166,167,
1156    168,169,170,171,172,173,174,175,
1157    176,177,178,179,180,181,182,183,
1158    184,185,186,187,188,189,190,191,
1159    224,225,226,227,228,229,230,231,
1160    232,233,234,235,236,237,238,239,
1161    240,241,242,243,244,245,246,215,
1162    248,249,250,251,252,253,254,223,
1163    224,225,226,227,228,229,230,231,
1164    232,233,234,235,236,237,238,239,
1165    240,241,242,243,244,245,246,247,
1166    248,249,250,251,252,253,254,255,
1167    0,1,2,3,4,5,6,7,
1168    8,9,10,11,12,13,14,15,
1169    16,17,18,19,20,21,22,23,
1170    24,25,26,27,28,29,30,31,
1171    32,33,34,35,36,37,38,39,
1172    40,41,42,43,44,45,46,47,
1173    48,49,50,51,52,53,54,55,
1174    56,57,58,59,60,61,62,63,
1175    64,97,98,99,100,101,102,103,
1176    104,105,106,107,108,109,110,111,
1177    112,113,114,115,116,117,118,119,
1178    120,121,122,91,92,93,94,95,
1179    96,65,66,67,68,69,70,71,
1180    72,73,74,75,76,77,78,79,
1181    80,81,82,83,84,85,86,87,
1182    88,89,90,123,124,125,126,127,
1183    128,129,130,131,132,133,134,135,
1184    136,137,138,139,140,141,142,143,
1185    144,145,146,147,148,149,150,151,
1186    152,153,154,155,156,157,158,159,
1187    160,161,162,163,164,165,166,167,
1188    168,169,170,171,172,173,174,175,
1189    176,177,178,179,180,181,182,183,
1190    184,185,186,187,188,189,190,191,
1191    224,225,226,227,228,229,230,231,
1192    232,233,234,235,236,237,238,239,
1193    240,241,242,243,244,245,246,215,
1194    248,249,250,251,252,253,254,223,
1195    192,193,194,195,196,197,198,199,
1196    200,201,202,203,204,205,206,207,
1197    208,209,210,211,212,213,214,247,
1198    216,217,218,219,220,221,222,255,
1199    0,62,0,0,1,0,0,0,
1200    0,0,0,0,0,0,0,0,
1201    32,0,0,0,1,0,0,0,
1202    0,0,0,0,0,0,0,0,
1203    0,0,0,0,0,0,255,3,
1204    126,0,0,0,126,0,0,0,
1205    0,0,0,0,0,0,0,0,
1206    0,0,0,0,0,0,0,0,
1207    0,0,0,0,0,0,255,3,
1208    0,0,0,0,0,0,0,0,
1209    0,0,0,0,0,0,12,2,
1210    0,0,0,0,0,0,0,0,
1211    0,0,0,0,0,0,0,0,
1212    254,255,255,7,0,0,0,0,
1213    0,0,0,0,0,0,0,0,
1214    255,255,127,127,0,0,0,0,
1215    0,0,0,0,0,0,0,0,
1216    0,0,0,0,254,255,255,7,
1217    0,0,0,0,0,4,32,4,
1218    0,0,0,128,255,255,127,255,
1219    0,0,0,0,0,0,255,3,
1220    254,255,255,135,254,255,255,7,
1221    0,0,0,0,0,4,44,6,
1222    255,255,127,255,255,255,127,255,
1223    0,0,0,0,254,255,255,255,
1224    255,255,255,255,255,255,255,127,
1225    0,0,0,0,254,255,255,255,
1226    255,255,255,255,255,255,255,255,
1227    0,2,0,0,255,255,255,255,
1228    255,255,255,255,255,255,255,127,
1229    0,0,0,0,255,255,255,255,
1230    255,255,255,255,255,255,255,255,
1231    0,0,0,0,254,255,0,252,
1232    1,0,0,248,1,0,0,120,
1233    0,0,0,0,254,255,255,255,
1234    0,0,128,0,0,0,128,0,
1235    255,255,255,255,0,0,0,0,
1236    0,0,0,0,0,0,0,128,
1237    255,255,255,255,0,0,0,0,
1238    0,0,0,0,0,0,0,0,
1239    128,0,0,0,0,0,0,0,
1240    0,1,1,0,1,1,0,0,
1241    0,0,0,0,0,0,0,0,
1242    0,0,0,0,0,0,0,0,
1243    1,0,0,0,128,0,0,0,
1244    128,128,128,128,0,0,128,0,
1245    28,28,28,28,28,28,28,28,
1246    28,28,0,0,0,0,0,128,
1247    0,26,26,26,26,26,26,18,
1248    18,18,18,18,18,18,18,18,
1249    18,18,18,18,18,18,18,18,
1250    18,18,18,128,128,0,128,16,
1251    0,26,26,26,26,26,26,18,
1252    18,18,18,18,18,18,18,18,
1253    18,18,18,18,18,18,18,18,
1254    18,18,18,128,128,0,0,0,
1255    0,0,0,0,0,1,0,0,
1256    0,0,0,0,0,0,0,0,
1257    0,0,0,0,0,0,0,0,
1258    0,0,0,0,0,0,0,0,
1259    1,0,0,0,0,0,0,0,
1260    0,0,18,0,0,0,0,0,
1261    0,0,20,20,0,18,0,0,
1262    0,20,18,0,0,0,0,0,
1263    18,18,18,18,18,18,18,18,
1264    18,18,18,18,18,18,18,18,
1265    18,18,18,18,18,18,18,0,
1266    18,18,18,18,18,18,18,18,
1267    18,18,18,18,18,18,18,18,
1268    18,18,18,18,18,18,18,18,
1269    18,18,18,18,18,18,18,0,
1270    18,18,18,18,18,18,18,18
1271    };
1272    
1273    
1274    
1275    
1276    #ifndef HAVE_STRERROR
1277    /*************************************************
1278    *     Provide strerror() for non-ANSI libraries  *
1279    *************************************************/
1280    
1281    /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1282    in their libraries, but can provide the same facility by this simple
1283    alternative function. */
1284    
1285    extern int   sys_nerr;
1286    extern char *sys_errlist[];
1287    
1288    char *
1289    strerror(int n)
1290    {
1291    if (n < 0 || n >= sys_nerr) return "unknown error number";
1292    return sys_errlist[n];
1293    }
1294    #endif /* HAVE_STRERROR */
1295    
1296    
1297    
1298    /*************************************************
1299    *       Print newline configuration              *
1300    *************************************************/
1301    
1302    /*
1303    Arguments:
1304      rc         the return code from PCRE_CONFIG_NEWLINE
1305      isc        TRUE if called from "-C newline"
1306    Returns:     nothing
1307    */
1308    
1309    static void
1310    print_newline_config(int rc, BOOL isc)
1311    {
1312    const char *s = NULL;
1313    if (!isc) printf("  Newline sequence is ");
1314    switch(rc)
1315      {
1316      case CHAR_CR: s = "CR"; break;
1317      case CHAR_LF: s = "LF"; break;
1318      case (CHAR_CR<<8 | CHAR_LF): s = "CRLF"; break;
1319      case -1: s = "ANY"; break;
1320      case -2: s = "ANYCRLF"; break;
1321    
1322      default:
1323      printf("a non-standard value: 0x%04x\n", rc);
1324      return;
1325      }
1326    
1327    printf("%s\n", s);
1328    }
1329    
1330    
1331    
1332    /*************************************************
1333    *         JIT memory callback                    *
1334    *************************************************/
1335    
1336    static pcre_jit_stack* jit_callback(void *arg)
1337    {
1338    jit_was_used = TRUE;
1339    return (pcre_jit_stack *)arg;
1340    }
1341    
1342    
1343    #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
1344    /*************************************************
1345    *            Convert UTF-8 string to value       *
1346    *************************************************/
1347    
1348    /* This function takes one or more bytes that represents a UTF-8 character,
1349    and returns the value of the character.
1350    
1351    Argument:
1352      utf8bytes   a pointer to the byte vector
1353      vptr        a pointer to an int to receive the value
1354    
1355    Returns:      >  0 => the number of bytes consumed
1356                  -6 to 0 => malformed UTF-8 character at offset = (-return)
1357    */
1358    
1359    static int
1360    utf82ord(pcre_uint8 *utf8bytes, pcre_uint32 *vptr)
1361    {
1362    pcre_uint32 c = *utf8bytes++;
1363    pcre_uint32 d = c;
1364    int i, j, s;
1365    
1366    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
1367      {
1368      if ((d & 0x80) == 0) break;
1369      d <<= 1;
1370      }
1371    
1372    if (i == -1) { *vptr = c; return 1; }  /* ascii character */
1373    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
1374    
1375    /* i now has a value in the range 1-5 */
1376    
1377    s = 6*i;
1378    d = (c & utf8_table3[i]) << s;
1379    
1380    for (j = 0; j < i; j++)
1381      {
1382      c = *utf8bytes++;
1383      if ((c & 0xc0) != 0x80) return -(j+1);
1384      s -= 6;
1385      d |= (c & 0x3f) << s;
1386      }
1387    
1388    /* Check that encoding was the correct unique one */
1389    
1390    for (j = 0; j < utf8_table1_size; j++)
1391      if (d <= utf8_table1[j]) break;
1392    if (j != i) return -(i+1);
1393    
1394    /* Valid value */
1395    
1396    *vptr = d;
1397    return i+1;
1398    }
1399    #endif /* NOUTF || SUPPORT_PCRE16 */
1400    
1401    
1402    
1403    #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
1404    /*************************************************
1405    *       Convert character value to UTF-8         *
1406    *************************************************/
1407    
1408    /* This function takes an integer value in the range 0 - 0x7fffffff
1409    and encodes it as a UTF-8 character in 0 to 6 bytes.
1410    
1411    Arguments:
1412      cvalue     the character value
1413      utf8bytes  pointer to buffer for result - at least 6 bytes long
1414    
1415    Returns:     number of characters placed in the buffer
1416    */
1417    
1418    static int
1419    ord2utf8(pcre_uint32 cvalue, pcre_uint8 *utf8bytes)
1420    {
1421    register int i, j;
1422    if (cvalue > 0x7fffffffu)
1423      return -1;
1424    for (i = 0; i < utf8_table1_size; i++)
1425      if (cvalue <= utf8_table1[i]) break;
1426    utf8bytes += i;
1427    for (j = i; j > 0; j--)
1428     {
1429     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1430     cvalue >>= 6;
1431     }
1432    *utf8bytes = utf8_table2[i] | cvalue;
1433    return i + 1;
1434    }
1435    #endif
1436    
1437    
1438    #ifdef SUPPORT_PCRE16
1439    /*************************************************
1440    *         Convert a string to 16-bit             *
1441    *************************************************/
1442    
1443    /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1444    8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1445    double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1446    in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1447    result is always left in buffer16.
1448    
1449    Note that this function does not object to surrogate values. This is
1450    deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1451    for the purpose of testing that they are correctly faulted.
1452    
1453    Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1454    in UTF-8 so that values greater than 255 can be handled.
1455    
1456    Arguments:
1457      data       TRUE if converting a data line; FALSE for a regex
1458      p          points to a byte string
1459      utf        true if UTF-8 (to be converted to UTF-16)
1460      len        number of bytes in the string (excluding trailing zero)
1461    
1462    Returns:     number of 16-bit data items used (excluding trailing zero)
1463                 OR -1 if a UTF-8 string is malformed
1464                 OR -2 if a value > 0x10ffff is encountered
1465                 OR -3 if a value > 0xffff is encountered when not in UTF mode
1466    */
1467    
1468    static int
1469    to16(int data, pcre_uint8 *p, int utf, int len)
1470    {
1471    pcre_uint16 *pp;
1472    
1473    if (buffer16_size < 2*len + 2)
1474      {
1475      if (buffer16 != NULL) free(buffer16);
1476      buffer16_size = 2*len + 2;
1477      buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1478      if (buffer16 == NULL)
1479        {
1480        fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1481        exit(1);
1482        }
1483      }
1484    
1485    pp = buffer16;
1486    
1487    if (!utf && !data)
1488      {
1489      while (len-- > 0) *pp++ = *p++;
1490      }
1491    
1492    else
1493      {
1494      pcre_uint32 c = 0;
1495      while (len > 0)
1496        {
1497        int chlen = utf82ord(p, &c);
1498        if (chlen <= 0) return -1;
1499        if (c > 0x10ffff) return -2;
1500        p += chlen;
1501        len -= chlen;
1502        if (c < 0x10000) *pp++ = c; else
1503          {
1504          if (!utf) return -3;
1505          c -= 0x10000;
1506          *pp++ = 0xD800 | (c >> 10);
1507          *pp++ = 0xDC00 | (c & 0x3ff);
1508          }
1509        }
1510      }
1511    
1512    *pp = 0;
1513    return pp - buffer16;
1514    }
1515    #endif
1516    
1517    #ifdef SUPPORT_PCRE32
1518    /*************************************************
1519    *         Convert a string to 32-bit             *
1520    *************************************************/
1521    
1522    /* In non-UTF mode, the space needed for a 32-bit string is exactly four times the
1523    8-bit size. For a UTF-8 string, the size needed for UTF-32 is no more than four
1524    times, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1525    in UTF-32. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-32. The
1526    result is always left in buffer32.
1527    
1528    Note that this function does not object to surrogate values. This is
1529    deliberate; it makes it possible to construct UTF-32 strings that are invalid,
1530    for the purpose of testing that they are correctly faulted.
1531    
1532    Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1533    in UTF-8 so that values greater than 255 can be handled.
1534    
1535    Arguments:
1536      data       TRUE if converting a data line; FALSE for a regex
1537      p          points to a byte string
1538      utf        true if UTF-8 (to be converted to UTF-32)
1539      len        number of bytes in the string (excluding trailing zero)
1540    
1541    Returns:     number of 32-bit data items used (excluding trailing zero)
1542                 OR -1 if a UTF-8 string is malformed
1543                 OR -2 if a value > 0x10ffff is encountered
1544                 OR -3 if an ill-formed value is encountered (i.e. a surrogate)
1545    */
1546    
1547    static int
1548    to32(int data, pcre_uint8 *p, int utf, int len)
1549    {
1550    pcre_uint32 *pp;
1551    
1552    if (buffer32_size < 4*len + 4)
1553      {
1554      if (buffer32 != NULL) free(buffer32);
1555      buffer32_size = 4*len + 4;
1556      buffer32 = (pcre_uint32 *)malloc(buffer32_size);
1557      if (buffer32 == NULL)
1558        {
1559        fprintf(stderr, "pcretest: malloc(%d) failed for buffer32\n", buffer32_size);
1560        exit(1);
1561        }
1562      }
1563    
1564    pp = buffer32;
1565    
1566    if (!utf && !data)
1567      {
1568      while (len-- > 0) *pp++ = *p++;
1569      }
1570    
1571    else
1572      {
1573      pcre_uint32 c = 0;
1574      while (len > 0)
1575        {
1576        int chlen = utf82ord(p, &c);
1577        if (chlen <= 0) return -1;
1578        if (utf)
1579          {
1580          if (c > 0x10ffff) return -2;
1581          if (!data && (c & 0xfffff800u) == 0xd800u) return -3;
1582          }
1583    
1584        p += chlen;
1585        len -= chlen;
1586        *pp++ = c;
1587        }
1588      }
1589    
1590    *pp = 0;
1591    return pp - buffer32;
1592    }
1593    #endif
1594    
1595    /*************************************************
1596    *        Read or extend an input line            *
1597    *************************************************/
1598    
1599    /* Input lines are read into buffer, but both patterns and data lines can be
1600    continued over multiple input lines. In addition, if the buffer fills up, we
1601    want to automatically expand it so as to be able to handle extremely large
1602    lines that are needed for certain stress tests. When the input buffer is
1603    expanded, the other two buffers must also be expanded likewise, and the
1604    contents of pbuffer, which are a copy of the input for callouts, must be
1605    preserved (for when expansion happens for a data line). This is not the most
1606    optimal way of handling this, but hey, this is just a test program!
1607    
1608    Arguments:
1609      f            the file to read
1610      start        where in buffer to start (this *must* be within buffer)
1611      prompt       for stdin or readline()
1612    
1613    Returns:       pointer to the start of new data
1614                   could be a copy of start, or could be moved
1615                   NULL if no data read and EOF reached
1616    */
1617    
1618    static pcre_uint8 *
1619    extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1620    {
1621    pcre_uint8 *here = start;
1622    
1623    for (;;)
1624      {
1625      size_t rlen = (size_t)(buffer_size - (here - buffer));
1626    
1627      if (rlen > 1000)
1628        {
1629        int dlen;
1630    
1631        /* If libreadline or libedit support is required, use readline() to read a
1632        line if the input is a terminal. Note that readline() removes the trailing
1633        newline, so we must put it back again, to be compatible with fgets(). */
1634    
1635    #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1636        if (isatty(fileno(f)))
1637          {
1638          size_t len;
1639          char *s = readline(prompt);
1640          if (s == NULL) return (here == start)? NULL : start;
1641          len = strlen(s);
1642          if (len > 0) add_history(s);
1643          if (len > rlen - 1) len = rlen - 1;
1644          memcpy(here, s, len);
1645          here[len] = '\n';
1646          here[len+1] = 0;
1647          free(s);
1648          }
1649        else
1650    #endif
1651    
1652        /* Read the next line by normal means, prompting if the file is stdin. */
1653    
1654          {
1655          if (f == stdin) printf("%s", prompt);
1656          if (fgets((char *)here, rlen,  f) == NULL)
1657            return (here == start)? NULL : start;
1658          }
1659    
1660        dlen = (int)strlen((char *)here);
1661        if (dlen > 0 && here[dlen - 1] == '\n') return start;
1662        here += dlen;
1663        }
1664    
1665      else
1666        {
1667        int new_buffer_size = 2*buffer_size;
1668        pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1669        pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1670        pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1671    
1672        if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1673          {
1674          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1675          exit(1);
1676          }
1677    
1678        memcpy(new_buffer, buffer, buffer_size);
1679        memcpy(new_pbuffer, pbuffer, buffer_size);
1680    
1681        buffer_size = new_buffer_size;
1682    
1683        start = new_buffer + (start - buffer);
1684        here = new_buffer + (here - buffer);
1685    
1686        free(buffer);
1687        free(dbuffer);
1688        free(pbuffer);
1689    
1690        buffer = new_buffer;
1691        dbuffer = new_dbuffer;
1692        pbuffer = new_pbuffer;
1693        }
1694      }
1695    
1696    return NULL;  /* Control never gets here */
1697    }
1698    
1699    
1700    
1701    /*************************************************
1702    *          Read number from string               *
1703    *************************************************/
1704    
1705    /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1706    around with conditional compilation, just do the job by hand. It is only used
1707    for unpicking arguments, so just keep it simple.
1708    
1709    Arguments:
1710      str           string to be converted
1711      endptr        where to put the end pointer
1712    
1713    Returns:        the unsigned long
1714    */
1715    
1716    static int
1717    get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1718    {
1719    int result = 0;
1720    while(*str != 0 && isspace(*str)) str++;
1721    while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1722    *endptr = str;
1723    return(result);
1724    }
1725    
1726    
1727    
1728    /*************************************************
1729    *             Print one character                *
1730    *************************************************/
1731    
1732    /* Print a single character either literally, or as a hex escape. */
1733    
1734    static int pchar(pcre_uint32 c, FILE *f)
1735    {
1736    int n;
1737    if (PRINTOK(c))
1738      {
1739      if (f != NULL) fprintf(f, "%c", c);
1740      return 1;
1741      }
1742    
1743    if (c < 0x100)
1744      {
1745      if (use_utf)
1746        {
1747        if (f != NULL) fprintf(f, "\\x{%02x}", c);
1748        return 6;
1749        }
1750      else
1751        {
1752        if (f != NULL) fprintf(f, "\\x%02x", c);
1753        return 4;
1754        }
1755      }
1756    
1757    if (f != NULL) n = fprintf(f, "\\x{%02x}", c);
1758    return n >= 0 ? n : 0;
1759    }
1760    
1761    
1762    
1763    #ifdef SUPPORT_PCRE8
1764    /*************************************************
1765    *         Print 8-bit character string           *
1766    *************************************************/
1767    
1768    /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1769    If handed a NULL file, just counts chars without printing. */
1770    
1771    static int pchars(pcre_uint8 *p, int length, FILE *f)
1772    {
1773    pcre_uint32 c = 0;
1774    int yield = 0;
1775    
1776    if (length < 0)
1777      length = strlen((char *)p);
1778    
1779    while (length-- > 0)
1780      {
1781    #if !defined NOUTF
1782      if (use_utf)
1783        {
1784        int rc = utf82ord(p, &c);
1785        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
1786          {
1787          length -= rc - 1;
1788          p += rc;
1789          yield += pchar(c, f);
1790          continue;
1791          }
1792        }
1793    #endif
1794      c = *p++;
1795      yield += pchar(c, f);
1796      }
1797    
1798    return yield;
1799    }
1800    #endif
1801    
1802    
1803    
1804    #ifdef SUPPORT_PCRE16
1805    /*************************************************
1806    *    Find length of 0-terminated 16-bit string   *
1807    *************************************************/
1808    
1809    static int strlen16(PCRE_SPTR16 p)
1810    {
1811    int len = 0;
1812    while (*p++ != 0) len++;
1813    return len;
1814    }
1815    #endif  /* SUPPORT_PCRE16 */
1816    
1817    
1818    
1819    #ifdef SUPPORT_PCRE32
1820    /*************************************************
1821    *    Find length of 0-terminated 32-bit string   *
1822    *************************************************/
1823    
1824    static int strlen32(PCRE_SPTR32 p)
1825    {
1826    int len = 0;
1827    while (*p++ != 0) len++;
1828    return len;
1829    }
1830    #endif  /* SUPPORT_PCRE32 */
1831    
1832    
1833    
1834    #ifdef SUPPORT_PCRE16
1835    /*************************************************
1836    *           Print 16-bit character string        *
1837    *************************************************/
1838    
1839    /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1840    If handed a NULL file, just counts chars without printing. */
1841    
1842    static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1843    {
1844    int yield = 0;
1845    
1846    if (length < 0)
1847      length = strlen16(p);
1848    
1849    while (length-- > 0)
1850      {
1851      pcre_uint32 c = *p++ & 0xffff;
1852    #if !defined NOUTF
1853      if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1854        {
1855        int d = *p & 0xffff;
1856        if (d >= 0xDC00 && d < 0xDFFF)
1857          {
1858          c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1859          length--;
1860          p++;
1861          }
1862        }
1863    #endif
1864      yield += pchar(c, f);
1865      }
1866    
1867    return yield;
1868    }
1869    #endif  /* SUPPORT_PCRE16 */
1870    
1871    
1872    
1873    #ifdef SUPPORT_PCRE32
1874    /*************************************************
1875    *           Print 32-bit character string        *
1876    *************************************************/
1877    
1878    /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
1879    If handed a NULL file, just counts chars without printing. */
1880    
1881    static int pchars32(PCRE_SPTR32 p, int length, FILE *f)
1882    {
1883    int yield = 0;
1884    
1885    if (length < 0)
1886      length = strlen32(p);
1887    
1888    while (length-- > 0)
1889      {
1890      pcre_uint32 c = *p++;
1891      yield += pchar(c, f);
1892      }
1893    
1894    return yield;
1895    }
1896    #endif  /* SUPPORT_PCRE32 */
1897    
1898    
1899    
1900    #ifdef SUPPORT_PCRE8
1901    /*************************************************
1902    *     Read a capture name (8-bit) and check it   *
1903    *************************************************/
1904    
1905    static pcre_uint8 *
1906    read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1907    {
1908    pcre_uint8 *npp = *pp;
1909    while (isalnum(*p)) *npp++ = *p++;
1910    *npp++ = 0;
1911    *npp = 0;
1912    if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1913      {
1914      fprintf(outfile, "no parentheses with name \"");
1915      PCHARSV(*pp, 0, -1, outfile);
1916      fprintf(outfile, "\"\n");
1917      }
1918    
1919    *pp = npp;
1920    return p;
1921    }
1922    #endif  /* SUPPORT_PCRE8 */
1923    
1924    
1925    
1926    #ifdef SUPPORT_PCRE16
1927    /*************************************************
1928    *     Read a capture name (16-bit) and check it  *
1929    *************************************************/
1930    
1931    /* Note that the text being read is 8-bit. */
1932    
1933    static pcre_uint8 *
1934    read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1935    {
1936    pcre_uint16 *npp = *pp;
1937    while (isalnum(*p)) *npp++ = *p++;
1938    *npp++ = 0;
1939    *npp = 0;
1940    if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
1941      {
1942      fprintf(outfile, "no parentheses with name \"");
1943      PCHARSV(*pp, 0, -1, outfile);
1944      fprintf(outfile, "\"\n");
1945      }
1946    *pp = npp;
1947    return p;
1948    }
1949    #endif  /* SUPPORT_PCRE16 */
1950    
1951    
1952    
1953    #ifdef SUPPORT_PCRE32
1954    /*************************************************
1955    *     Read a capture name (32-bit) and check it  *
1956    *************************************************/
1957    
1958    /* Note that the text being read is 8-bit. */
1959    
1960    static pcre_uint8 *
1961    read_capture_name32(pcre_uint8 *p, pcre_uint32 **pp, pcre *re)
1962    {
1963    pcre_uint32 *npp = *pp;
1964    while (isalnum(*p)) *npp++ = *p++;
1965    *npp++ = 0;
1966    *npp = 0;
1967    if (pcre32_get_stringnumber((pcre32 *)re, (PCRE_SPTR32)(*pp)) < 0)
1968      {
1969      fprintf(outfile, "no parentheses with name \"");
1970      PCHARSV(*pp, 0, -1, outfile);
1971      fprintf(outfile, "\"\n");
1972      }
1973    *pp = npp;
1974    return p;
1975    }
1976    #endif  /* SUPPORT_PCRE32 */
1977    
1978    
1979    
1980    /*************************************************
1981    *              Callout function                  *
1982    *************************************************/
1983    
1984    /* Called from PCRE as a result of the (?C) item. We print out where we are in
1985    the match. Yield zero unless more callouts than the fail count, or the callout
1986    data is not zero. */
1987    
1988    static int callout(pcre_callout_block *cb)
1989    {
1990    FILE *f = (first_callout | callout_extra)? outfile : NULL;
1991    int i, pre_start, post_start, subject_length;
1992    
1993    if (callout_extra)
1994      {
1995      fprintf(f, "Callout %d: last capture = %d\n",
1996        cb->callout_number, cb->capture_last);
1997    
1998      for (i = 0; i < cb->capture_top * 2; i += 2)
1999        {
2000        if (cb->offset_vector[i] < 0)
2001          fprintf(f, "%2d: <unset>\n", i/2);
2002        else
2003          {
2004          fprintf(f, "%2d: ", i/2);
2005          PCHARSV(cb->subject, cb->offset_vector[i],
2006            cb->offset_vector[i+1] - cb->offset_vector[i], f);
2007          fprintf(f, "\n");
2008          }
2009        }
2010      }
2011    
2012    /* Re-print the subject in canonical form, the first time or if giving full
2013    datails. On subsequent calls in the same match, we use pchars just to find the
2014    printed lengths of the substrings. */
2015    
2016    if (f != NULL) fprintf(f, "--->");
2017    
2018    PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
2019    PCHARS(post_start, cb->subject, cb->start_match,
2020      cb->current_position - cb->start_match, f);
2021    
2022    PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
2023    
2024    PCHARSV(cb->subject, cb->current_position,
2025      cb->subject_length - cb->current_position, f);
2026    
2027    if (f != NULL) fprintf(f, "\n");
2028    
2029    /* Always print appropriate indicators, with callout number if not already
2030    shown. For automatic callouts, show the pattern offset. */
2031    
2032    if (cb->callout_number == 255)
2033      {
2034      fprintf(outfile, "%+3d ", cb->pattern_position);
2035      if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
2036      }
2037    else
2038      {
2039      if (callout_extra) fprintf(outfile, "    ");
2040        else fprintf(outfile, "%3d ", cb->callout_number);
2041      }
2042    
2043    for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
2044    fprintf(outfile, "^");
2045    
2046    if (post_start > 0)
2047      {
2048      for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
2049      fprintf(outfile, "^");
2050      }
2051    
2052    for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
2053      fprintf(outfile, " ");
2054    
2055    fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
2056      pbuffer + cb->pattern_position);
2057    
2058    fprintf(outfile, "\n");
2059    first_callout = 0;
2060    
2061    if (cb->mark != last_callout_mark)
2062      {
2063      if (cb->mark == NULL)
2064        fprintf(outfile, "Latest Mark: <unset>\n");
2065      else
2066        {
2067        fprintf(outfile, "Latest Mark: ");
2068        PCHARSV(cb->mark, 0, -1, outfile);
2069        putc('\n', outfile);
2070        }
2071      last_callout_mark = cb->mark;
2072      }
2073    
2074    if (cb->callout_data != NULL)
2075      {
2076      int callout_data = *((int *)(cb->callout_data));
2077      if (callout_data != 0)
2078        {
2079        fprintf(outfile, "Callout data = %d\n", callout_data);
2080        return callout_data;
2081        }
2082      }
2083    
2084    return (cb->callout_number != callout_fail_id)? 0 :
2085           (++callout_count >= callout_fail_count)? 1 : 0;
2086    }
2087    
2088    
2089    /*************************************************
2090    *            Local malloc functions              *
2091    *************************************************/
2092    
2093    /* Alternative malloc function, to test functionality and save the size of a
2094    compiled re, which is the first store request that pcre_compile() makes. The
2095    show_malloc variable is set only during matching. */
2096    
2097    static void *new_malloc(size_t size)
2098    {
2099    void *block = malloc(size);
2100    gotten_store = size;
2101    if (first_gotten_store == 0) first_gotten_store = size;
2102    if (show_malloc)
2103      fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
2104    return block;
2105    }
2106    
2107    static void new_free(void *block)
2108    {
2109    if (show_malloc)
2110      fprintf(outfile, "free             %p\n", block);
2111    free(block);
2112    }
2113    
2114    /* For recursion malloc/free, to test stacking calls */
2115    
2116    static void *stack_malloc(size_t size)
2117    {
2118    void *block = malloc(size);
2119    if (show_malloc)
2120      fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
2121    return block;
2122    }
2123    
2124    static void stack_free(void *block)
2125    {
2126    if (show_malloc)
2127      fprintf(outfile, "stack_free       %p\n", block);
2128    free(block);
2129    }
2130    
2131    
2132    /*************************************************
2133    *          Call pcre_fullinfo()                  *
2134    *************************************************/
2135    
2136    /* Get one piece of information from the pcre_fullinfo() function. When only
2137    one of 8-, 16- or 32-bit is supported, pcre_mode should always have the correct
2138    value, but the code is defensive.
2139    
2140    Arguments:
2141      re        compiled regex
2142      study     study data
2143      option    PCRE_INFO_xxx option
2144      ptr       where to put the data
2145    
2146    Returns:    0 when OK, < 0 on error
2147    */
2148    
2149    static int
2150    new_info(pcre *re, pcre_extra *study, int option, void *ptr)
2151    {
2152    int rc;
2153    
2154    if (pcre_mode == PCRE32_MODE)
2155    #ifdef SUPPORT_PCRE32
2156      rc = pcre32_fullinfo((pcre32 *)re, (pcre32_extra *)study, option, ptr);
2157    #else
2158      rc = PCRE_ERROR_BADMODE;
2159    #endif
2160    else if (pcre_mode == PCRE16_MODE)
2161    #ifdef SUPPORT_PCRE16
2162      rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
2163    #else
2164      rc = PCRE_ERROR_BADMODE;
2165    #endif
2166    else
2167    #ifdef SUPPORT_PCRE8
2168      rc = pcre_fullinfo(re, study, option, ptr);
2169    #else
2170      rc = PCRE_ERROR_BADMODE;
2171    #endif
2172    
2173    if (rc < 0)
2174      {
2175      fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2176        pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "", option);
2177      if (rc == PCRE_ERROR_BADMODE)
2178        fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
2179          "%d-bit mode\n", 8 * CHAR_SIZE,
2180          8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
2181      }
2182    
2183    return rc;
2184    }
2185    
2186    
2187    
2188    /*************************************************
2189    *             Swap byte functions                *
2190    *************************************************/
2191    
2192    /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
2193    value, respectively.
2194    
2195    Arguments:
2196      value        any number
2197    
2198    Returns:       the byte swapped value
2199    */
2200    
2201    static pcre_uint32
2202    swap_uint32(pcre_uint32 value)
2203    {
2204    return ((value & 0x000000ff) << 24) |
2205           ((value & 0x0000ff00) <<  8) |
2206           ((value & 0x00ff0000) >>  8) |
2207           (value >> 24);
2208    }
2209    
2210    static pcre_uint16
2211    swap_uint16(pcre_uint16 value)
2212    {
2213    return (value >> 8) | (value << 8);
2214    }
2215    
2216    
2217    
2218    /*************************************************
2219    *        Flip bytes in a compiled pattern        *
2220    *************************************************/
2221    
2222    /* This function is called if the 'F' option was present on a pattern that is
2223    to be written to a file. We flip the bytes of all the integer fields in the
2224    regex data block and the study block. In 16-bit mode this also flips relevant
2225    bytes in the pattern itself. This is to make it possible to test PCRE's
2226    ability to reload byte-flipped patterns, e.g. those compiled on a different
2227    architecture. */
2228    
2229    #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2230    static void
2231    regexflip8_or_16(pcre *ere, pcre_extra *extra)
2232    {
2233    real_pcre8_or_16 *re = (real_pcre8_or_16 *)ere;
2234    #ifdef SUPPORT_PCRE16
2235    int op;
2236    pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
2237    int length = re->name_count * re->name_entry_size;
2238    #ifdef SUPPORT_UTF
2239    BOOL utf = (re->options & PCRE_UTF16) != 0;
2240    BOOL utf16_char = FALSE;
2241    #endif /* SUPPORT_UTF */
2242    #endif /* SUPPORT_PCRE16 */
2243    
2244    /* Always flip the bytes in the main data block and study blocks. */
2245    
2246    re->magic_number = REVERSED_MAGIC_NUMBER;
2247    re->size = swap_uint32(re->size);
2248    re->options = swap_uint32(re->options);
2249    re->flags = swap_uint16(re->flags);
2250    re->top_bracket = swap_uint16(re->top_bracket);
2251    re->top_backref = swap_uint16(re->top_backref);
2252    re->first_char = swap_uint16(re->first_char);
2253    re->req_char = swap_uint16(re->req_char);
2254    re->name_table_offset = swap_uint16(re->name_table_offset);
2255    re->name_entry_size = swap_uint16(re->name_entry_size);
2256    re->name_count = swap_uint16(re->name_count);
2257    
2258    if (extra != NULL)
2259      {
2260      pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2261      rsd->size = swap_uint32(rsd->size);
2262      rsd->flags = swap_uint32(rsd->flags);
2263      rsd->minlength = swap_uint32(rsd->minlength);
2264      }
2265    
2266    /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
2267    in the name table, if present, and then in the pattern itself. */
2268    
2269    #ifdef SUPPORT_PCRE16
2270    if (pcre_mode != PCRE16_MODE) return;
2271    
2272    while(TRUE)
2273      {
2274      /* Swap previous characters. */
2275      while (length-- > 0)
2276        {
2277        *ptr = swap_uint16(*ptr);
2278        ptr++;
2279        }
2280    #ifdef SUPPORT_UTF
2281      if (utf16_char)
2282        {
2283        if ((ptr[-1] & 0xfc00) == 0xd800)
2284          {
2285          /* We know that there is only one extra character in UTF-16. */
2286          *ptr = swap_uint16(*ptr);
2287          ptr++;
2288          }
2289        }
2290      utf16_char = FALSE;
2291    #endif /* SUPPORT_UTF */
2292    
2293      /* Get next opcode. */
2294    
2295      length = 0;
2296      op = *ptr;
2297      *ptr++ = swap_uint16(op);
2298    
2299      switch (op)
2300        {
2301        case OP_END:
2302        return;
2303    
2304    #ifdef SUPPORT_UTF
2305        case OP_CHAR:
2306        case OP_CHARI:
2307        case OP_NOT:
2308        case OP_NOTI:
2309        case OP_STAR:
2310        case OP_MINSTAR:
2311        case OP_PLUS:
2312        case OP_MINPLUS:
2313        case OP_QUERY:
2314        case OP_MINQUERY:
2315        case OP_UPTO:
2316        case OP_MINUPTO:
2317        case OP_EXACT:
2318        case OP_POSSTAR:
2319        case OP_POSPLUS:
2320        case OP_POSQUERY:
2321        case OP_POSUPTO:
2322        case OP_STARI:
2323        case OP_MINSTARI:
2324        case OP_PLUSI:
2325        case OP_MINPLUSI:
2326        case OP_QUERYI:
2327        case OP_MINQUERYI:
2328        case OP_UPTOI:
2329        case OP_MINUPTOI:
2330        case OP_EXACTI:
2331        case OP_POSSTARI:
2332        case OP_POSPLUSI:
2333        case OP_POSQUERYI:
2334        case OP_POSUPTOI:
2335        case OP_NOTSTAR:
2336        case OP_NOTMINSTAR:
2337        case OP_NOTPLUS:
2338        case OP_NOTMINPLUS:
2339        case OP_NOTQUERY:
2340        case OP_NOTMINQUERY:
2341        case OP_NOTUPTO:
2342        case OP_NOTMINUPTO:
2343        case OP_NOTEXACT:
2344        case OP_NOTPOSSTAR:
2345        case OP_NOTPOSPLUS:
2346        case OP_NOTPOSQUERY:
2347        case OP_NOTPOSUPTO:
2348        case OP_NOTSTARI:
2349        case OP_NOTMINSTARI:
2350        case OP_NOTPLUSI:
2351        case OP_NOTMINPLUSI:
2352        case OP_NOTQUERYI:
2353        case OP_NOTMINQUERYI:
2354        case OP_NOTUPTOI:
2355        case OP_NOTMINUPTOI:
2356        case OP_NOTEXACTI:
2357        case OP_NOTPOSSTARI:
2358        case OP_NOTPOSPLUSI:
2359        case OP_NOTPOSQUERYI:
2360        case OP_NOTPOSUPTOI:
2361        if (utf) utf16_char = TRUE;
2362    #endif
2363        /* Fall through. */
2364    
2365        default:
2366        length = OP_lengths16[op] - 1;
2367        break;
2368    
2369        case OP_CLASS:
2370        case OP_NCLASS:
2371        /* Skip the character bit map. */
2372        ptr += 32/sizeof(pcre_uint16);
2373        length = 0;
2374        break;
2375    
2376        case OP_XCLASS:
2377        /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
2378        if (LINK_SIZE > 1)
2379          length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
2380            - (1 + LINK_SIZE + 1));
2381        else
2382          length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2383    
2384        /* Reverse the size of the XCLASS instance. */
2385        *ptr = swap_uint16(*ptr);
2386        ptr++;
2387        if (LINK_SIZE > 1)
2388          {
2389          *ptr = swap_uint16(*ptr);
2390          ptr++;
2391          }
2392    
2393        op = *ptr;
2394        *ptr = swap_uint16(op);
2395        ptr++;
2396        if ((op & XCL_MAP) != 0)
2397          {
2398          /* Skip the character bit map. */
2399          ptr += 32/sizeof(pcre_uint16);
2400          length -= 32/sizeof(pcre_uint16);
2401          }
2402        break;
2403        }
2404      }
2405    /* Control should never reach here in 16 bit mode. */
2406    #endif /* SUPPORT_PCRE16 */
2407    }
2408    #endif /* SUPPORT_PCRE[8|16] */
2409    
2410    
2411    
2412    #if defined SUPPORT_PCRE32
2413    static void
2414    regexflip_32(pcre *ere, pcre_extra *extra)
2415    {
2416    real_pcre32 *re = (real_pcre32 *)ere;
2417    int op;
2418    pcre_uint32 *ptr = (pcre_uint32 *)re + re->name_table_offset;
2419    int length = re->name_count * re->name_entry_size;
2420    #ifdef SUPPORT_UTF
2421    BOOL utf = (re->options & PCRE_UTF32) != 0;
2422    #endif /* SUPPORT_UTF */
2423    
2424    /* Always flip the bytes in the main data block and study blocks. */
2425    
2426    re->magic_number = REVERSED_MAGIC_NUMBER;
2427    re->size = swap_uint32(re->size);
2428    re->options = swap_uint32(re->options);
2429    re->flags = swap_uint16(re->flags);
2430    re->top_bracket = swap_uint16(re->top_bracket);
2431    re->top_backref = swap_uint16(re->top_backref);
2432    re->first_char = swap_uint32(re->first_char);
2433    re->req_char = swap_uint32(re->req_char);
2434    re->name_table_offset = swap_uint16(re->name_table_offset);
2435    re->name_entry_size = swap_uint16(re->name_entry_size);
2436    re->name_count = swap_uint16(re->name_count);
2437    
2438    if (extra != NULL)
2439      {
2440      pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2441      rsd->size = swap_uint32(rsd->size);
2442      rsd->flags = swap_uint32(rsd->flags);
2443      rsd->minlength = swap_uint32(rsd->minlength);
2444      }
2445    
2446    /* In 32-bit mode we must swap bytes
2447    in the name table, if present, and then in the pattern itself. */
2448    
2449    while(TRUE)
2450      {
2451      /* Swap previous characters. */
2452      while (length-- > 0)
2453        {
2454        *ptr = swap_uint32(*ptr);
2455        ptr++;
2456        }
2457    
2458      /* Get next opcode. */
2459    
2460      length = 0;
2461      op = *ptr;
2462      *ptr++ = swap_uint32(op);
2463    
2464      switch (op)
2465        {
2466        case OP_END:
2467        return;
2468    
2469        default:
2470        length = OP_lengths32[op] - 1;
2471        break;
2472    
2473        case OP_CLASS:
2474        case OP_NCLASS:
2475        /* Skip the character bit map. */
2476        ptr += 32/sizeof(pcre_uint32);
2477        length = 0;
2478        break;
2479    
2480        case OP_XCLASS:
2481        /* LINK_SIZE can only be 1 in 32-bit mode. */
2482        length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2483    
2484        /* Reverse the size of the XCLASS instance. */
2485        *ptr = swap_uint32(*ptr);
2486        ptr++;
2487    
2488        op = *ptr;
2489        *ptr = swap_uint32(op);
2490        ptr++;
2491        if ((op & XCL_MAP) != 0)
2492          {
2493          /* Skip the character bit map. */
2494          ptr += 32/sizeof(pcre_uint32);
2495          length -= 32/sizeof(pcre_uint32);
2496          }
2497        break;
2498        }
2499      }
2500    /* Control should never reach here in 32 bit mode. */
2501    }
2502    
2503    #endif /* SUPPORT_PCRE32 */
2504    
2505    
2506    
2507    static void
2508    regexflip(pcre *ere, pcre_extra *extra)
2509    {
2510    #if defined SUPPORT_PCRE32
2511      if (REAL_PCRE_FLAGS(ere) & PCRE_MODE32)
2512        regexflip_32(ere, extra);
2513    #endif
2514    #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2515      if (REAL_PCRE_FLAGS(ere) & (PCRE_MODE8 | PCRE_MODE16))
2516        regexflip8_or_16(ere, extra);
2517    #endif
2518    }
2519    
2520    
2521    
2522    /*************************************************
2523    *        Check match or recursion limit          *
2524    *************************************************/
2525    
2526    static int
2527    check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
2528      int start_offset, int options, int *use_offsets, int use_size_offsets,
2529      int flag, unsigned long int *limit, int errnumber, const char *msg)
2530    {
2531    int count;
2532    int min = 0;
2533    int mid = 64;
2534    int max = -1;
2535    
2536    extra->flags |= flag;
2537    
2538    for (;;)
2539      {
2540      *limit = mid;
2541    
2542      PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2543        use_offsets, use_size_offsets);
2544    
2545      if (count == errnumber)
2546        {
2547        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2548        min = mid;
2549        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
2550        }
2551    
2552      else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
2553                             count == PCRE_ERROR_PARTIAL)
2554        {
2555        if (mid == min + 1)
2556          {
2557          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
2558          break;
2559          }
2560        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2561        max = mid;
2562        mid = (min + mid)/2;
2563        }
2564      else break;    /* Some other error */
2565      }
2566    
2567    extra->flags &= ~flag;
2568    return count;
2569    }
2570    
2571    
2572    
2573    /*************************************************
2574    *         Case-independent strncmp() function    *
2575    *************************************************/
2576    
2577    /*
2578    Arguments:
2579      s         first string
2580      t         second string
2581      n         number of characters to compare
2582    
2583    Returns:    < 0, = 0, or > 0, according to the comparison
2584    */
2585    
2586    static int
2587    strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2588    {
2589    while (n--)
2590      {
2591      int c = tolower(*s++) - tolower(*t++);
2592      if (c) return c;
2593      }
2594    return 0;
2595    }
2596    
2597    
2598    
2599    /*************************************************
2600    *         Check newline indicator                *
2601    *************************************************/
2602    
2603    /* This is used both at compile and run-time to check for <xxx> escapes. Print
2604    a message and return 0 if there is no match.
2605    
2606    Arguments:
2607      p           points after the leading '<'
2608      f           file for error message
2609    
2610    Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
2611    */
2612    
2613    static int
2614    check_newline(pcre_uint8 *p, FILE *f)
2615    {
2616    if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2617    if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2618    if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2619    if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2620    if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2621    if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2622    if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2623    fprintf(f, "Unknown newline type at: <%s\n", p);
2624    return 0;
2625    }
2626    
2627    
2628    
2629    /*************************************************
2630    *             Usage function                     *
2631    *************************************************/
2632    
2633    static void
2634    usage(void)
2635    {
2636    printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
2637    printf("Input and output default to stdin and stdout.\n");
2638    #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
2639    printf("If input is a terminal, readline() is used to read from it.\n");
2640    #else
2641    printf("This version of pcretest is not linked with readline().\n");
2642    #endif
2643    printf("\nOptions:\n");
2644    #ifdef SUPPORT_PCRE16
2645    printf("  -16      use the 16-bit library\n");
2646    #endif
2647    #ifdef SUPPORT_PCRE32
2648    printf("  -32      use the 32-bit library\n");
2649    #endif
2650    printf("  -b       show compiled code\n");
2651    printf("  -C       show PCRE compile-time options and exit\n");
2652    printf("  -C arg   show a specific compile-time option\n");
2653    printf("           and exit with its value. The arg can be:\n");
2654    printf("     linksize     internal link size [2, 3, 4]\n");
2655    printf("     pcre8        8 bit library support enabled [0, 1]\n");
2656    printf("     pcre16       16 bit library support enabled [0, 1]\n");
2657    printf("     pcre32       32 bit library support enabled [0, 1]\n");
2658    printf("     utf          Unicode Transformation Format supported [0, 1]\n");
2659    printf("     ucp          Unicode Properties supported [0, 1]\n");
2660    printf("     jit          Just-in-time compiler supported [0, 1]\n");
2661    printf("     newline      Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2662    printf("  -d       debug: show compiled code and information (-b and -i)\n");
2663    #if !defined NODFA
2664    printf("  -dfa     force DFA matching for all subjects\n");
2665    #endif
2666    printf("  -help    show usage information\n");
2667    printf("  -i       show information about compiled patterns\n"
2668           "  -M       find MATCH_LIMIT minimum for each subject\n"
2669           "  -m       output memory used information\n"
2670           "  -o <n>   set size of offsets vector to <n>\n");
2671    #if !defined NOPOSIX
2672    printf("  -p       use POSIX interface\n");
2673    #endif
2674    printf("  -q       quiet: do not output PCRE version number at start\n");
2675    printf("  -S <n>   set stack size to <n> megabytes\n");
2676    printf("  -s       force each pattern to be studied at basic level\n"
2677           "  -s+      force each pattern to be studied, using JIT if available\n"
2678           "  -s++     ditto, verifying when JIT was actually used\n"
2679           "  -s+n     force each pattern to be studied, using JIT if available,\n"
2680           "             where 1 <= n <= 7 selects JIT options\n"
2681           "  -s++n    ditto, verifying when JIT was actually used\n"
2682           "  -t       time compilation and execution\n");
2683    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
2684    printf("  -tm      time execution (matching) only\n");
2685    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
2686    }
2687    
2688    
2689    
2690    /*************************************************
2691    *                Main Program                    *
2692    *************************************************/
2693    
2694    /* Read lines from named file or stdin and write to named file or stdout; lines
2695    consist of a regular expression, in delimiters and optionally followed by
2696    options, followed by a set of test data, terminated by an empty line. */
2697    
2698    int main(int argc, char **argv)
2699    {
2700    FILE *infile = stdin;
2701    const char *version;
2702    int options = 0;
2703    int study_options = 0;
2704    int default_find_match_limit = FALSE;
2705    int op = 1;
2706    int timeit = 0;
2707    int timeitm = 0;
2708    int showinfo = 0;
2709    int showstore = 0;
2710    int force_study = -1;
2711    int force_study_options = 0;
2712    int quiet = 0;
2713    int size_offsets = 45;
2714    int size_offsets_max;
2715    int *offsets = NULL;
2716    int debug = 0;
2717    int done = 0;
2718    int all_use_dfa = 0;
2719    int verify_jit = 0;
2720    int yield = 0;
2721    int stack_size;
2722    
2723    #if !defined NOPOSIX
2724    int posix = 0;
2725    #endif
2726    #if !defined NODFA
2727    int *dfa_workspace = NULL;
2728    #endif
2729    
2730    pcre_jit_stack *jit_stack = NULL;
2731    
2732    /* These vectors store, end-to-end, a list of zero-terminated captured
2733    substring names, each list itself being terminated by an empty name. Assume
2734    that 1024 is plenty long enough for the few names we'll be testing. It is
2735    easiest to keep separate 8-, 16- and 32-bit versions, using the 32-bit version
2736    for the actual memory, to ensure alignment. */
2737    
2738    pcre_uint32 copynames[1024];
2739    pcre_uint32 getnames[1024];
2740    
2741    #ifdef SUPPORT_PCRE32
2742    pcre_uint32 *cn32ptr;
2743    pcre_uint32 *gn32ptr;
2744    #endif
2745    
2746    #ifdef SUPPORT_PCRE16
2747    pcre_uint16 *copynames16 = (pcre_uint16 *)copynames;
2748    pcre_uint16 *getnames16 = (pcre_uint16 *)getnames;
2749    pcre_uint16 *cn16ptr;
2750    pcre_uint16 *gn16ptr;
2751    #endif
2752    
2753    #ifdef SUPPORT_PCRE8
2754    pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2755    pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2756    pcre_uint8 *cn8ptr;
2757    pcre_uint8 *gn8ptr;
2758    #endif
2759    
2760    /* Get buffers from malloc() so that valgrind will check their misuse when
2761    debugging. They grow automatically when very long lines are read. The 16-
2762    and 32-bit buffers (buffer16, buffer32) are obtained only if needed. */
2763    
2764    buffer = (pcre_uint8 *)malloc(buffer_size);
2765    dbuffer = (pcre_uint8 *)malloc(buffer_size);
2766    pbuffer = (pcre_uint8 *)malloc(buffer_size);
2767    
2768    /* The outfile variable is static so that new_malloc can use it. */
2769    
2770    outfile = stdout;
2771    
2772    /* The following  _setmode() stuff is some Windows magic that tells its runtime
2773    library to translate CRLF into a single LF character. At least, that's what
2774    I've been told: never having used Windows I take this all on trust. Originally
2775    it set 0x8000, but then I was advised that _O_BINARY was better. */
2776    
2777    #if defined(_WIN32) || defined(WIN32)
2778    _setmode( _fileno( stdout ), _O_BINARY );
2779    #endif
2780    
2781    /* Get the version number: both pcre_version() and pcre16_version() give the
2782    same answer. We just need to ensure that we call one that is available. */
2783    
2784    #if defined SUPPORT_PCRE8
2785    version = pcre_version();
2786    #elif defined SUPPORT_PCRE16
2787    version = pcre16_version();
2788    #elif defined SUPPORT_PCRE32
2789    version = pcre32_version();
2790    #endif
2791    
2792    /* Scan options */
2793    
2794    while (argc > 1 && argv[op][0] == '-')
2795      {
2796      pcre_uint8 *endptr;
2797      char *arg = argv[op];
2798    
2799      if (strcmp(arg, "-m") == 0) showstore = 1;
2800      else if (strcmp(arg, "-s") == 0) force_study = 0;
2801    
2802      else if (strncmp(arg, "-s+", 3) == 0)
2803        {
2804        arg += 3;
2805        if (*arg == '+') { arg++; verify_jit = TRUE; }
2806        force_study = 1;
2807        if (*arg == 0)
2808          force_study_options = jit_study_bits[6];
2809        else if (*arg >= '1' && *arg <= '7')
2810          force_study_options = jit_study_bits[*arg - '1'];
2811        else goto BAD_ARG;
2812        }
2813      else if (strcmp(arg, "-16") == 0)
2814        {
2815    #ifdef SUPPORT_PCRE16
2816        pcre_mode = PCRE16_MODE;
2817    #else
2818        printf("** This version of PCRE was built without 16-bit support\n");
2819        exit(1);
2820    #endif
2821        }
2822      else if (strcmp(arg, "-32") == 0)
2823        {
2824    #ifdef SUPPORT_PCRE32
2825        pcre_mode = PCRE32_MODE;
2826    #else
2827        printf("** This version of PCRE was built without 32-bit support\n");
2828        exit(1);
2829    #endif
2830        }
2831      else if (strcmp(arg, "-q") == 0) quiet = 1;
2832      else if (strcmp(arg, "-b") == 0) debug = 1;
2833      else if (strcmp(arg, "-i") == 0) showinfo = 1;
2834      else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
2835      else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
2836    #if !defined NODFA
2837      else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
2838    #endif
2839      else if (strcmp(arg, "-o") == 0 && argc > 2 &&
2840          ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2841            *endptr == 0))
2842        {
2843        op++;
2844        argc--;
2845        }
2846      else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0)
2847        {
2848        int both = arg[2] == 0;
2849        int temp;
2850        if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2851                         *endptr == 0))
2852          {
2853          timeitm = temp;
2854          op++;
2855          argc--;
2856          }
2857        else timeitm = LOOPREPEAT;
2858        if (both) timeit = timeitm;
2859        }
2860      else if (strcmp(arg, "-S") == 0 && argc > 2 &&
2861          ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2862            *endptr == 0))
2863        {
2864    #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS)
2865        printf("PCRE: -S not supported on this OS\n");
2866        exit(1);
2867    #else
2868        int rc;
2869        struct rlimit rlim;
2870        getrlimit(RLIMIT_STACK, &rlim);
2871        rlim.rlim_cur = stack_size * 1024 * 1024;
2872        rc = setrlimit(RLIMIT_STACK, &rlim);
2873        if (rc != 0)
2874          {
2875        printf("PCRE: setrlimit() failed with error %d\n", rc);
2876        exit(1);
2877          }
2878        op++;
2879        argc--;
2880    #endif
2881        }
2882    #if !defined NOPOSIX
2883      else if (strcmp(arg, "-p") == 0) posix = 1;
2884    #endif
2885      else if (strcmp(arg, "-C") == 0)
2886        {
2887        int rc;
2888        unsigned long int lrc;
2889    
2890        if (argc > 2)
2891          {
2892          if (strcmp(argv[op + 1], "linksize") == 0)
2893            {
2894            (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2895            printf("%d\n", rc);
2896            yield = rc;
2897            }
2898          else if (strcmp(argv[op + 1], "pcre8") == 0)
2899            {
2900    #ifdef SUPPORT_PCRE8
2901            printf("1\n");
2902            yield = 1;
2903    #else
2904            printf("0\n");
2905            yield = 0;
2906    #endif
2907            }
2908          else if (strcmp(argv[op + 1], "pcre16") == 0)
2909            {
2910    #ifdef SUPPORT_PCRE16
2911            printf("1\n");
2912            yield = 1;
2913    #else
2914            printf("0\n");
2915            yield = 0;
2916    #endif
2917            }
2918          else if (strcmp(argv[op + 1], "pcre32") == 0)
2919            {
2920    #ifdef SUPPORT_PCRE32
2921            printf("1\n");
2922            yield = 1;
2923    #else
2924            printf("0\n");
2925            yield = 0;
2926    #endif
2927            goto EXIT;
2928            }
2929          if (strcmp(argv[op + 1], "utf") == 0)
2930            {
2931    #ifdef SUPPORT_PCRE8
2932            if (pcre_mode == PCRE8_MODE)
2933              (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2934    #endif
2935    #ifdef SUPPORT_PCRE16
2936            if (pcre_mode == PCRE16_MODE)
2937              (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2938    #endif
2939    #ifdef SUPPORT_PCRE32
2940            if (pcre_mode == PCRE32_MODE)
2941              (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
2942    #endif
2943            printf("%d\n", rc);
2944            yield = rc;
2945            goto EXIT;
2946            }
2947          else if (strcmp(argv[op + 1], "ucp") == 0)
2948            {
2949            (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2950            printf("%d\n", rc);
2951            yield = rc;
2952            }
2953          else if (strcmp(argv[op + 1], "jit") == 0)
2954            {
2955            (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2956            printf("%d\n", rc);
2957            yield = rc;
2958            }
2959          else if (strcmp(argv[op + 1], "newline") == 0)
2960            {
2961            (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2962            print_newline_config(rc, TRUE);
2963            }
2964          else if (strcmp(argv[op + 1], "ebcdic") == 0)
2965            {
2966    #ifdef EBCDIC
2967            printf("1\n");
2968            yield = 1;
2969    #else
2970            printf("0\n");
2971    #endif
2972            }
2973          else if (strcmp(argv[op + 1], "ebcdic-nl") == 0)
2974            {
2975    #ifdef EBCDIC
2976            printf("0x%02x\n", CHAR_LF);
2977    #else
2978            printf("0\n");
2979    #endif
2980            }
2981          else
2982            {
2983            printf("Unknown -C option: %s\n", argv[op + 1]);
2984            }
2985          goto EXIT;
2986          }
2987    
2988        /* No argument for -C: output all configuration information. */
2989    
2990        printf("PCRE version %s\n", version);
2991        printf("Compiled with\n");
2992    
2993    #ifdef EBCDIC
2994        printf("  EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
2995    #endif
2996    
2997    /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2998    are set, either both UTFs are supported or both are not supported. */
2999    
3000    #ifdef SUPPORT_PCRE8
3001        printf("  8-bit support\n");
3002        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3003          printf ("  %sUTF-8 support\n", rc ? "" : "No ");
3004    #endif
3005    #ifdef SUPPORT_PCRE16
3006        printf("  16-bit support\n");
3007        (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3008        printf ("  %sUTF-16 support\n", rc ? "" : "No ");
3009    #endif
3010    #ifdef SUPPORT_PCRE32
3011        printf("  32-bit support\n");
3012        (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3013        printf ("  %sUTF-32 support\n", rc ? "" : "No ");
3014    #endif
3015    
3016        (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3017        printf("  %sUnicode properties support\n", rc? "" : "No ");
3018        (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3019        if (rc)
3020          {
3021          const char *arch;
3022          (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
3023          printf("  Just-in-time compiler support: %s\n", arch);
3024          }
3025        else
3026          printf("  No just-in-time compiler support\n");
3027        (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3028        print_newline_config(rc, FALSE);
3029        (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
3030        printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
3031                                         "all Unicode newlines");
3032        (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3033        printf("  Internal link size = %d\n", rc);
3034        (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
3035        printf("  POSIX malloc threshold = %d\n", rc);
3036        (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
3037        printf("  Default match limit = %ld\n", lrc);
3038        (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
3039        printf("  Default recursion depth limit = %ld\n", lrc);
3040        (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
3041        printf("  Match recursion uses %s", rc? "stack" : "heap");
3042        if (showstore)
3043          {
3044          PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
3045          printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
3046          }
3047        printf("\n");
3048        goto EXIT;
3049        }
3050      else if (strcmp(arg, "-help") == 0 ||
3051               strcmp(arg, "--help") == 0)
3052        {
3053        usage();
3054        goto EXIT;
3055        }
3056      else
3057        {
3058        BAD_ARG:
3059        printf("** Unknown or malformed option %s\n", arg);
3060        usage();
3061        yield = 1;
3062        goto EXIT;
3063        }
3064      op++;
3065      argc--;
3066      }
3067    
3068    /* Get the store for the offsets vector, and remember what it was */
3069    
3070    size_offsets_max = size_offsets;
3071    offsets = (int *)malloc(size_offsets_max * sizeof(int));
3072    if (offsets == NULL)
3073      {
3074      printf("** Failed to get %d bytes of memory for offsets vector\n",
3075        (int)(size_offsets_max * sizeof(int)));
3076      yield = 1;
3077      goto EXIT;
3078      }
3079    
3080    /* Sort out the input and output files */
3081    
3082    if (argc > 1)
3083      {
3084      infile = fopen(argv[op], INPUT_MODE);
3085      if (infile == NULL)
3086        {
3087        printf("** Failed to open %s\n", argv[op]);
3088        yield = 1;
3089        goto EXIT;
3090        }
3091      }
3092    
3093    if (argc > 2)
3094      {
3095      outfile = fopen(argv[op+1], OUTPUT_MODE);
3096      if (outfile == NULL)
3097        {
3098        printf("** Failed to open %s\n", argv[op+1]);
3099        yield = 1;
3100        goto EXIT;
3101        }
3102      }
3103    
3104    /* Set alternative malloc function */
3105    
3106    #ifdef SUPPORT_PCRE8
3107    pcre_malloc = new_malloc;
3108    pcre_free = new_free;
3109    pcre_stack_malloc = stack_malloc;
3110    pcre_stack_free = stack_free;
3111    #endif
3112    
3113    #ifdef SUPPORT_PCRE16
3114    pcre16_malloc = new_malloc;
3115    pcre16_free = new_free;
3116    pcre16_stack_malloc = stack_malloc;
3117    pcre16_stack_free = stack_free;
3118    #endif
3119    
3120    #ifdef SUPPORT_PCRE32
3121    pcre32_malloc = new_malloc;
3122    pcre32_free = new_free;
3123    pcre32_stack_malloc = stack_malloc;
3124    pcre32_stack_free = stack_free;
3125    #endif
3126    
3127    /* Heading line unless quiet, then prompt for first regex if stdin */
3128    
3129    if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
3130    
3131    /* Main loop */
3132    
3133    while (!done)
3134      {
3135      pcre *re = NULL;
3136      pcre_extra *extra = NULL;
3137    
3138    #if !defined NOPOSIX  /* There are still compilers that require no indent */
3139      regex_t preg;
3140      int do_posix = 0;
3141    #endif
3142    
3143      const char *error;
3144      pcre_uint8 *markptr;
3145      pcre_uint8 *p, *pp, *ppp;
3146      pcre_uint8 *to_file = NULL;
3147      const pcre_uint8 *tables = NULL;
3148      unsigned long int get_options;
3149      unsigned long int true_size, true_study_size = 0;
3150      size_t size, regex_gotten_store;
3151      int do_allcaps = 0;
3152      int do_mark = 0;
3153      int do_study = 0;
3154      int no_force_study = 0;
3155      int do_debug = debug;
3156      int do_G = 0;
3157      int do_g = 0;
3158      int do_showinfo = showinfo;
3159      int do_showrest = 0;
3160      int do_showcaprest = 0;
3161      int do_flip = 0;
3162      int erroroffset, len, delimiter, poffset;
3163    
3164    #if !defined NODFA
3165      int dfa_matched = 0;
3166    #endif
3167    
3168      use_utf = 0;
3169      debug_lengths = 1;
3170    
3171      if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
3172      if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3173      fflush(outfile);
3174    
3175      p = buffer;
3176      while (isspace(*p)) p++;
3177      if (*p == 0) continue;
3178    
3179      /* See if the pattern is to be loaded pre-compiled from a file. */
3180    
3181      if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
3182        {
3183        pcre_uint32 magic;
3184        pcre_uint8 sbuf[8];
3185        FILE *f;
3186    
3187        p++;
3188        if (*p == '!')
3189          {
3190          do_debug = TRUE;
3191          do_showinfo = TRUE;
3192          p++;
3193          }
3194    
3195        pp = p + (int)strlen((char *)p);
3196        while (isspace(pp[-1])) pp--;
3197        *pp = 0;
3198    
3199        f = fopen((char *)p, "rb");
3200        if (f == NULL)
3201          {
3202          fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
3203          continue;
3204          }
3205    
3206        first_gotten_store = 0;
3207        if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
3208    
3209        true_size =
3210          (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
3211        true_study_size =
3212          (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
3213    
3214        re = (pcre *)new_malloc(true_size);
3215        if (re == NULL)
3216          {
3217          printf("** Failed to get %d bytes of memory for pcre object\n",
3218            (int)true_size);
3219          yield = 1;
3220          goto EXIT;
3221          }
3222        regex_gotten_store = first_gotten_store;
3223    
3224        if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
3225    
3226        magic = REAL_PCRE_MAGIC(re);
3227        if (magic != MAGIC_NUMBER)
3228          {
3229          if (swap_uint32(magic) == MAGIC_NUMBER)
3230            {
3231            do_flip = 1;
3232            }
3233          else
3234            {
3235            fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
3236            new_free(re);
3237            fclose(f);
3238            continue;
3239            }
3240          }
3241    
3242        /* We hide the byte-invert info for little and big endian tests. */
3243        fprintf(outfile, "Compiled pattern%s loaded from %s\n",
3244          do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
3245    
3246        /* Now see if there is any following study data. */
3247    
3248        if (true_study_size != 0)
3249          {
3250          pcre_study_data *psd;
3251    
3252          extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
3253          extra->flags = PCRE_EXTRA_STUDY_DATA;
3254    
3255          psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
3256          extra->study_data = psd;
3257    
3258          if (fread(psd, 1, true_study_size, f) != true_study_size)
3259            {
3260            FAIL_READ:
3261            fprintf(outfile, "Failed to read data from %s\n", p);
3262            if (extra != NULL)
3263              {
3264              PCRE_FREE_STUDY(extra);
3265              }
3266            new_free(re);
3267            fclose(f);
3268            continue;
3269            }
3270          fprintf(outfile, "Study data loaded from %s\n", p);
3271          do_study = 1;     /* To get the data output if requested */
3272          }
3273        else fprintf(outfile, "No study data\n");
3274    
3275        /* Flip the necessary bytes. */
3276        if (do_flip)
3277          {
3278          int rc;
3279          PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
3280          if (rc == PCRE_ERROR_BADMODE)
3281            {
3282            /* Simulate the result of the function call below. */
3283            fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
3284              pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "",
3285              PCRE_INFO_OPTIONS);
3286            fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
3287              "%d-bit mode\n", 8 * CHAR_SIZE,
3288              8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
3289            new_free(re);
3290            fclose(f);
3291            continue;
3292            }
3293          }
3294    
3295        /* Need to know if UTF-8 for printing data strings. */
3296    
3297        if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3298          {
3299          new_free(re);
3300          fclose(f);
3301          continue;
3302          }
3303        use_utf = (get_options & PCRE_UTF8) != 0;
3304    
3305        fclose(f);
3306        goto SHOW_INFO;
3307        }
3308    
3309      /* In-line pattern (the usual case). Get the delimiter and seek the end of
3310      the pattern; if it isn't complete, read more. */
3311    
3312      delimiter = *p++;
3313    
3314      if (isalnum(delimiter) || delimiter == '\\')
3315        {
3316        fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
3317        goto SKIP_DATA;
3318        }
3319    
3320      pp = p;
3321      poffset = (int)(p - buffer);
3322    
3323      for(;;)
3324        {
3325        while (*pp != 0)
3326          {
3327          if (*pp == '\\' && pp[1] != 0) pp++;
3328            else if (*pp == delimiter) break;
3329          pp++;
3330          }
3331        if (*pp != 0) break;
3332        if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
3333          {
3334          fprintf(outfile, "** Unexpected EOF\n");
3335          done = 1;
3336          goto CONTINUE;
3337          }
3338        if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
3339        }
3340    
3341      /* The buffer may have moved while being extended; reset the start of data
3342      pointer to the correct relative point in the buffer. */
3343    
3344      p = buffer + poffset;
3345    
3346      /* If the first character after the delimiter is backslash, make
3347      the pattern end with backslash. This is purely to provide a way
3348      of testing for the error message when a pattern ends with backslash. */
3349    
3350      if (pp[1] == '\\') *pp++ = '\\';
3351    
3352      /* Terminate the pattern at the delimiter, and save a copy of the pattern
3353      for callouts. */
3354    
3355      *pp++ = 0;
3356      strcpy((char *)pbuffer, (char *)p);
3357    
3358      /* Look for options after final delimiter */
3359    
3360      options = 0;
3361      study_options = force_study_options;
3362      log_store = showstore;  /* default from command line */
3363    
3364      while (*pp != 0)
3365        {
3366        switch (*pp++)
3367          {
3368          case 'f': options |= PCRE_FIRSTLINE; break;
3369          case 'g': do_g = 1; break;
3370          case 'i': options |= PCRE_CASELESS; break;
3371          case 'm': options |= PCRE_MULTILINE; break;
3372          case 's': options |= PCRE_DOTALL; break;
3373          case 'x': options |= PCRE_EXTENDED; break;
<