/[pcre]/code/branches/pcre16/pcretest.c
ViewVC logotype

Diff of /code/branches/pcre16/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

code/trunk/pcretest.c revision 27 by nigel, Sat Feb 24 21:38:49 2007 UTC code/branches/pcre16/pcretest.c revision 811 by zherczeg, Mon Dec 19 14:05:44 2011 UTC
# Line 2  Line 2 
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5    /* This program was hacked up as a tester for PCRE. I really should have
6    written it more tidily in the first place. Will I ever learn? It has grown and
7    been extended and consequently is now rather, er, *very* untidy in places. The
8    addition of 16-bit support has made it even worse. :-(
9    
10    -----------------------------------------------------------------------------
11    Redistribution and use in source and binary forms, with or without
12    modification, are permitted provided that the following conditions are met:
13    
14        * Redistributions of source code must retain the above copyright notice,
15          this list of conditions and the following disclaimer.
16    
17        * Redistributions in binary form must reproduce the above copyright
18          notice, this list of conditions and the following disclaimer in the
19          documentation and/or other materials provided with the distribution.
20    
21        * Neither the name of the University of Cambridge nor the names of its
22          contributors may be used to endorse or promote products derived from
23          this software without specific prior written permission.
24    
25    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35    POSSIBILITY OF SUCH DAMAGE.
36    -----------------------------------------------------------------------------
37    */
38    
39    
40    #ifdef HAVE_CONFIG_H
41    #include "config.h"
42    #endif
43    
44  #include <ctype.h>  #include <ctype.h>
45  #include <stdio.h>  #include <stdio.h>
46  #include <string.h>  #include <string.h>
47  #include <stdlib.h>  #include <stdlib.h>
48  #include <time.h>  #include <time.h>
49  #include <locale.h>  #include <locale.h>
50    #include <errno.h>
51    
52    #ifdef SUPPORT_LIBREADLINE
53    #ifdef HAVE_UNISTD_H
54    #include <unistd.h>
55    #endif
56    #include <readline/readline.h>
57    #include <readline/history.h>
58    #endif
59    
60    
61    /* A number of things vary for Windows builds. Originally, pcretest opened its
62    input and output without "b"; then I was told that "b" was needed in some
63    environments, so it was added for release 5.0 to both the input and output. (It
64    makes no difference on Unix-like systems.) Later I was told that it is wrong
65    for the input on Windows. I've now abstracted the modes into two macros that
66    are set here, to make it easier to fiddle with them, and removed "b" from the
67    input mode under Windows. */
68    
69    #if defined(_WIN32) || defined(WIN32)
70    #include <io.h>                /* For _setmode() */
71    #include <fcntl.h>             /* For _O_BINARY */
72    #define INPUT_MODE   "r"
73    #define OUTPUT_MODE  "wb"
74    
75    #ifndef isatty
76    #define isatty _isatty         /* This is what Windows calls them, I'm told, */
77    #endif                         /* though in some environments they seem to   */
78                                   /* be already defined, hence the #ifndefs.    */
79    #ifndef fileno
80    #define fileno _fileno
81    #endif
82    
83    /* A user sent this fix for Borland Builder 5 under Windows. */
84    
85    #ifdef __BORLANDC__
86    #define _setmode(handle, mode) setmode(handle, mode)
87    #endif
88    
89    /* Not Windows */
90    
91    #else
92    #include <sys/time.h>          /* These two includes are needed */
93    #include <sys/resource.h>      /* for setrlimit(). */
94    #define INPUT_MODE   "rb"
95    #define OUTPUT_MODE  "wb"
96    #endif
97    
98    
99    /* We have to include pcre_internal.h because we need the internal info for
100    displaying the results of pcre_study() and we also need to know about the
101    internal macros, structures, and other internal data values; pcretest has
102    "inside information" compared to a program that strictly follows the PCRE API.
103    
104    Although pcre_internal.h does itself include pcre.h, we explicitly include it
105    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
106    appropriately for an application, not for building PCRE. */
107    
108    #include "pcre.h"
109    #include "pcre_internal.h"
110    
111    /* The pcre_printint() function, which prints the internal form of a compiled
112    regex, is held in a separate file so that (a) it can be compiled in either
113    8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
114    when that is compiled in debug mode. */
115    
116    #ifdef SUPPORT_PCRE8
117    void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
118    #endif
119    #ifdef SUPPORT_PCRE16
120    void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
121    #endif
122    
123    /* We need access to some of the data tables that PCRE uses. So as not to have
124    to keep two copies, we include the source file here, changing the names of the
125    external symbols to prevent clashes. */
126    
127    #define _pcre_ucp_gentype      ucp_gentype
128    #define _pcre_ucp_typerange    ucp_typerange
129    #define _pcre_utf8_table1      utf8_table1
130    #define _pcre_utf8_table1_size utf8_table1_size
131    #define _pcre_utf8_table2      utf8_table2
132    #define _pcre_utf8_table3      utf8_table3
133    #define _pcre_utf8_table4      utf8_table4
134    #define _pcre_utt              utt
135    #define _pcre_utt_size         utt_size
136    #define _pcre_utt_names        utt_names
137    #define _pcre_OP_lengths       OP_lengths
138    
139    #include "pcre_tables.c"
140    
141    /* The definition of the macro PRINTABLE, which determines whether to print an
142    output character as-is or as a hex value when showing compiled patterns, is
143    the same as in the printint.src file. We uses it here in cases when the locale
144    has not been explicitly changed, so as to get consistent output from systems
145    that differ in their output from isprint() even in the "C" locale. */
146    
147    #ifdef EBCDIC
148    #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
149    #else
150    #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
151    #endif
152    
153  /* Use the internal info for displaying the results of pcre_study(). */  #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
154    
155  #include "internal.h"  /* It is possible to compile this test program without including support for
156    testing the POSIX interface, though this is not available via the standard
157    Makefile. */
158    
159    #if !defined NOPOSIX
160  #include "pcreposix.h"  #include "pcreposix.h"
161    #endif
162    
163    /* It is also possible, originally for the benefit of a version that was
164    imported into Exim, to build pcretest without support for UTF8 (define NOUTF8),
165    without the interface to the DFA matcher (NODFA), and without the doublecheck
166    of the old "info" function (define NOINFOCHECK). In fact, we automatically cut
167    out the UTF8 support if PCRE is built without it. */
168    
169    #ifndef SUPPORT_UTF8
170    #ifndef NOUTF8
171    #define NOUTF8
172    #endif
173    #endif
174    
175    /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
176    for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
177    only from one place and is handled differently). I couldn't dream up any way of
178    using a single macro to do this in a generic way, because of the many different
179    argument requirements. We know that at least one of SUPPORT_PCRE8 and
180    SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
181    use these in the definitions of generic macros. */
182    
183    #ifdef SUPPORT_PCRE8
184    #define PCHARS8(lv, p, len, f) \
185      lv = pchars((pcre_uint8 *)p, len, f)
186    
187    #define PCHARSV8(p, len, f) \
188      (void)pchars((pcre_uint8 *)p, len, f)
189    
190    #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
191      re = pcre_compile((char *)pat, options, error, erroffset, tables)
192    
193    #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
194        offsets, size_offsets) \
195      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
196        offsets, size_offsets)
197    
198    #define PCRE_STUDY8(extra, re, options, error) \
199      extra = pcre_study(re, options, error)
200    
201    #define PCRE_FREE_STUDY8(extra) \
202      pcre_free_study(extra)
203    
204    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(re, extra, tables) \
205      pcre_pattern_to_host_byte_order(re, extra, tables)
206    
207    #endif /* SUPPORT_PCRE8 */
208    
209    
210    #ifdef SUPPORT_PCRE16
211    #define PCHARS16(lv, p, len, f) \
212      lv = pchars16((PCRE_SPTR16)p, len, f)
213    
214    #define PCHARSV16(p, len, f) \
215      (void)pchars16((PCRE_SPTR16)p, len, f)
216    
217    #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
218      re = pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, tables)
219    
220    #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
221        offsets, size_offsets) \
222      count = pcre16_exec(re, extra, (PCRE_SPTR16)bptr, len, start_offset, \
223        options, offsets, size_offsets)
224    
225    #define PCRE_FREE_STUDY16(extra) \
226      pcre16_free_study(extra)
227    
228    #define PCRE_STUDY16(extra, re, options, error) \
229      extra = pcre16_study(re, options, error)
230    
231    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(re, extra, tables) \
232      pcre16_pattern_to_host_byte_order(re, extra, tables)
233    
234    #endif /* SUPPORT_PCRE16 */
235    
236    
237    /* ----- Both modes are supported; a runtime test is needed ----- */
238    
239    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
240    
241    #define PCHARS(lv, p, len, f) \
242      if (use_pcre16) \
243        PCHARS16(lv, p, len, f); \
244      else \
245        PCHARS8(lv, p, len, f)
246    
247    #define PCHARSV(p, len, f) \
248      if (use_pcre16) \
249        PCHARSV16(p, len, f); \
250      else \
251        PCHARSV8(p, len, f)
252    
253    #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
254      if (use_pcre16) \
255        PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
256      else \
257        PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
258    
259    #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
260        offsets, size_offsets) \
261      if (use_pcre16) \
262        PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
263          offsets, size_offsets); \
264      else \
265        PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
266          offsets, size_offsets)
267    
268    #define PCRE_FREE_STUDY(extra) \
269      if (use_pcre16) \
270        PCRE_FREE_STUDY16(extra); \
271      else \
272        PCRE_FREE_STUDY8(extra)
273    
274    #define PCRE_STUDY(extra, re, options, error) \
275      if (use_pcre16) \
276        PCRE_STUDY16(extra, re, options, error); \
277      else \
278        PCRE_STUDY8(extra, re, options, error)
279    
280    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(re, extra, tables) \
281      if (use_pcre16) \
282        PCRE_PATTERN_TO_HOST_BYTE_ORDER16(re, extra, tables); \
283      else \
284        PCRE_PATTERN_TO_HOST_BYTE_ORDER8(re, extra, tables)
285    
286    /* ----- Only 8-bit mode is supported ----- */
287    
288    #elif defined SUPPORT_PCRE8
289    #define PCHARS           PCHARS8
290    #define PCHARSV          PCHARSV8
291    #define PCRE_COMPILE     PCRE_COMPILE8
292    #define PCRE_EXEC        PCRE_EXEC8
293    #define PCRE_FREE_STUDY  PCRE_FREE_STUDY8
294    #define PCRE_STUDY       PCRE_STUDY8
295    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
296    
297    /* ----- Only 16-bit mode is supported ----- */
298    
299    #else
300    #define PCHARS           PCHARS16
301    #define PCHARSV          PCHARSV16
302    #define PCRE_COMPILE     PCRE_COMPILE16
303    #define PCRE_EXEC        PCRE_EXEC16
304    #define PCRE_FREE_STUDY  PCRE_FREE_STUDY16
305    #define PCRE_STUDY       PCRE_STUDY16
306    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
307    #endif
308    
309    /* ----- End of mode-specific function call macros ----- */
310    
311    
312    /* Other parameters */
313    
314  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
315  #ifdef CLK_TCK  #ifdef CLK_TCK
# Line 22  Line 319 
319  #endif  #endif
320  #endif  #endif
321    
322  #define LOOPREPEAT 20000  /* This is the default loop count for timing. */
323    
324    #define LOOPREPEAT 500000
325    
326    /* Static variables */
327    
328  static FILE *outfile;  static FILE *outfile;
329  static int log_store = 0;  static int log_store = 0;
330    static int callout_count;
331    static int callout_extra;
332    static int callout_fail_count;
333    static int callout_fail_id;
334    static int debug_lengths;
335    static int first_callout;
336    static int locale_set = 0;
337    static int show_malloc;
338    static int use_utf;
339    static size_t gotten_store;
340    static size_t first_gotten_store = 0;
341    static const unsigned char *last_callout_mark = NULL;
342    
343    /* The buffers grow automatically if very long input lines are encountered. */
344    
345    static int buffer_size = 50000;
346    static pcre_uint8 *buffer = NULL;
347    static pcre_uint8 *dbuffer = NULL;
348    static pcre_uint8 *pbuffer = NULL;
349    
350    #ifdef SUPPORT_PCRE16
351    static int buffer16_size = 0;
352    static pcre_uint16 *buffer16 = NULL;
353    #endif
354    
355    /* If we have 8-bit support, default use_pcre16 to false; if there is also
356    16-bit support, it can be changed by an option. If there is no 8-bit support,
357    there must be 16-bit support, so default it to 1. */
358    
359    #ifdef SUPPORT_PCRE8
360    static int use_pcre16 = 0;
361    #else
362    static int use_pcre16 = 1;
363    #endif
364    
365  /* Debugging function to print the internal form of the regex. This is the same  /* Textual explanations for runtime error codes */
 code as contained in pcre.c under the DEBUG macro. */  
366    
367  static const char *OP_names[] = {  static const char *errtexts[] = {
368    "End", "\\A", "\\B", "\\b", "\\D", "\\d",    NULL,  /* 0 is no error */
369    "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",    NULL,  /* NOMATCH is handled specially */
370    "Opt", "^", "$", "Any", "chars", "not",    "NULL argument passed",
371    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "bad option value",
372    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "magic number missing",
373    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "unknown opcode - pattern overwritten?",
374    "*", "*?", "+", "+?", "?", "??", "{", "{",    "no more memory",
375    "class", "Ref",    NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */
376    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",    "match limit exceeded",
377    "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",    "callout error code",
378    "Brazero", "Braminzero", "Bra"    NULL,  /* BADUTF8 is handled specially */
379      "bad UTF-8 offset",
380      NULL,  /* PARTIAL is handled specially */
381      "not used - internal error",
382      "internal error - pattern overwritten?",
383      "bad count value",
384      "item unsupported for DFA matching",
385      "backreference condition or recursion test not supported for DFA matching",
386      "match limit not supported for DFA matching",
387      "workspace size exceeded in DFA matching",
388      "too much recursion for DFA matching",
389      "recursion limit exceeded",
390      "not used - internal error",
391      "invalid combination of newline options",
392      "bad offset value",
393      NULL,  /* SHORTUTF8 is handled specially */
394      "nested recursion at the same subject position",
395      "JIT stack limit reached",
396      "pattern compiled in wrong mode (8-bit/16-bit error)"
397  };  };
398    
399    
400  static void print_internals(pcre *re, FILE *outfile)  /*************************************************
401  {  *         Alternate character tables             *
402  unsigned char *code = ((real_pcre *)re)->code;  *************************************************/
403    
404  fprintf(outfile, "------------------------------------------------------------------\n");  /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
405    using the default tables of the library. However, the T option can be used to
406    select alternate sets of tables, for different kinds of testing. Note also that
407    the L (locale) option also adjusts the tables. */
408    
409    /* This is the set of tables distributed as default with PCRE. It recognizes
410    only ASCII characters. */
411    
412    static const pcre_uint8 tables0[] = {
413    
414    /* This table is a lower casing table. */
415    
416        0,  1,  2,  3,  4,  5,  6,  7,
417        8,  9, 10, 11, 12, 13, 14, 15,
418       16, 17, 18, 19, 20, 21, 22, 23,
419       24, 25, 26, 27, 28, 29, 30, 31,
420       32, 33, 34, 35, 36, 37, 38, 39,
421       40, 41, 42, 43, 44, 45, 46, 47,
422       48, 49, 50, 51, 52, 53, 54, 55,
423       56, 57, 58, 59, 60, 61, 62, 63,
424       64, 97, 98, 99,100,101,102,103,
425      104,105,106,107,108,109,110,111,
426      112,113,114,115,116,117,118,119,
427      120,121,122, 91, 92, 93, 94, 95,
428       96, 97, 98, 99,100,101,102,103,
429      104,105,106,107,108,109,110,111,
430      112,113,114,115,116,117,118,119,
431      120,121,122,123,124,125,126,127,
432      128,129,130,131,132,133,134,135,
433      136,137,138,139,140,141,142,143,
434      144,145,146,147,148,149,150,151,
435      152,153,154,155,156,157,158,159,
436      160,161,162,163,164,165,166,167,
437      168,169,170,171,172,173,174,175,
438      176,177,178,179,180,181,182,183,
439      184,185,186,187,188,189,190,191,
440      192,193,194,195,196,197,198,199,
441      200,201,202,203,204,205,206,207,
442      208,209,210,211,212,213,214,215,
443      216,217,218,219,220,221,222,223,
444      224,225,226,227,228,229,230,231,
445      232,233,234,235,236,237,238,239,
446      240,241,242,243,244,245,246,247,
447      248,249,250,251,252,253,254,255,
448    
449    /* This table is a case flipping table. */
450    
451        0,  1,  2,  3,  4,  5,  6,  7,
452        8,  9, 10, 11, 12, 13, 14, 15,
453       16, 17, 18, 19, 20, 21, 22, 23,
454       24, 25, 26, 27, 28, 29, 30, 31,
455       32, 33, 34, 35, 36, 37, 38, 39,
456       40, 41, 42, 43, 44, 45, 46, 47,
457       48, 49, 50, 51, 52, 53, 54, 55,
458       56, 57, 58, 59, 60, 61, 62, 63,
459       64, 97, 98, 99,100,101,102,103,
460      104,105,106,107,108,109,110,111,
461      112,113,114,115,116,117,118,119,
462      120,121,122, 91, 92, 93, 94, 95,
463       96, 65, 66, 67, 68, 69, 70, 71,
464       72, 73, 74, 75, 76, 77, 78, 79,
465       80, 81, 82, 83, 84, 85, 86, 87,
466       88, 89, 90,123,124,125,126,127,
467      128,129,130,131,132,133,134,135,
468      136,137,138,139,140,141,142,143,
469      144,145,146,147,148,149,150,151,
470      152,153,154,155,156,157,158,159,
471      160,161,162,163,164,165,166,167,
472      168,169,170,171,172,173,174,175,
473      176,177,178,179,180,181,182,183,
474      184,185,186,187,188,189,190,191,
475      192,193,194,195,196,197,198,199,
476      200,201,202,203,204,205,206,207,
477      208,209,210,211,212,213,214,215,
478      216,217,218,219,220,221,222,223,
479      224,225,226,227,228,229,230,231,
480      232,233,234,235,236,237,238,239,
481      240,241,242,243,244,245,246,247,
482      248,249,250,251,252,253,254,255,
483    
484    /* This table contains bit maps for various character classes. Each map is 32
485    bytes long and the bits run from the least significant end of each byte. The
486    classes that have their own maps are: space, xdigit, digit, upper, lower, word,
487    graph, print, punct, and cntrl. Other classes are built from combinations. */
488    
489      0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
490      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
491      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
492      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
493    
494      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
495      0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
496      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
497      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
498    
499      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
500      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
501      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
502      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
503    
504      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
505      0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
506      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
507      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
508    
509      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
510      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
511      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
512      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
513    
514      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
515      0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
516      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
517      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
518    
519      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
520      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
521      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
522      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
523    
524      0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
525      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
526      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
527      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
528    
529      0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
530      0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
531      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
532      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
533    
534      0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
535      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
536      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
537      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
538    
539    /* This table identifies various classes of character by individual bits:
540      0x01   white space character
541      0x02   letter
542      0x04   decimal digit
543      0x08   hexadecimal digit
544      0x10   alphanumeric or '_'
545      0x80   regular expression metacharacter or binary zero
546    */
547    
548      0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
549      0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
550      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
551      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
552      0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
553      0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
554      0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
555      0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
556      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
557      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
558      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
559      0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
560      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
561      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
562      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
563      0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
564      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
565      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
566      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
567      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
568      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
569      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
570      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
571      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
572      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
573      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
574      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
575      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
576      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
577      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
578      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
579      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
580    
581    /* This is a set of tables that came orginally from a Windows user. It seems to
582    be at least an approximation of ISO 8859. In particular, there are characters
583    greater than 128 that are marked as spaces, letters, etc. */
584    
585    static const pcre_uint8 tables1[] = {
586    0,1,2,3,4,5,6,7,
587    8,9,10,11,12,13,14,15,
588    16,17,18,19,20,21,22,23,
589    24,25,26,27,28,29,30,31,
590    32,33,34,35,36,37,38,39,
591    40,41,42,43,44,45,46,47,
592    48,49,50,51,52,53,54,55,
593    56,57,58,59,60,61,62,63,
594    64,97,98,99,100,101,102,103,
595    104,105,106,107,108,109,110,111,
596    112,113,114,115,116,117,118,119,
597    120,121,122,91,92,93,94,95,
598    96,97,98,99,100,101,102,103,
599    104,105,106,107,108,109,110,111,
600    112,113,114,115,116,117,118,119,
601    120,121,122,123,124,125,126,127,
602    128,129,130,131,132,133,134,135,
603    136,137,138,139,140,141,142,143,
604    144,145,146,147,148,149,150,151,
605    152,153,154,155,156,157,158,159,
606    160,161,162,163,164,165,166,167,
607    168,169,170,171,172,173,174,175,
608    176,177,178,179,180,181,182,183,
609    184,185,186,187,188,189,190,191,
610    224,225,226,227,228,229,230,231,
611    232,233,234,235,236,237,238,239,
612    240,241,242,243,244,245,246,215,
613    248,249,250,251,252,253,254,223,
614    224,225,226,227,228,229,230,231,
615    232,233,234,235,236,237,238,239,
616    240,241,242,243,244,245,246,247,
617    248,249,250,251,252,253,254,255,
618    0,1,2,3,4,5,6,7,
619    8,9,10,11,12,13,14,15,
620    16,17,18,19,20,21,22,23,
621    24,25,26,27,28,29,30,31,
622    32,33,34,35,36,37,38,39,
623    40,41,42,43,44,45,46,47,
624    48,49,50,51,52,53,54,55,
625    56,57,58,59,60,61,62,63,
626    64,97,98,99,100,101,102,103,
627    104,105,106,107,108,109,110,111,
628    112,113,114,115,116,117,118,119,
629    120,121,122,91,92,93,94,95,
630    96,65,66,67,68,69,70,71,
631    72,73,74,75,76,77,78,79,
632    80,81,82,83,84,85,86,87,
633    88,89,90,123,124,125,126,127,
634    128,129,130,131,132,133,134,135,
635    136,137,138,139,140,141,142,143,
636    144,145,146,147,148,149,150,151,
637    152,153,154,155,156,157,158,159,
638    160,161,162,163,164,165,166,167,
639    168,169,170,171,172,173,174,175,
640    176,177,178,179,180,181,182,183,
641    184,185,186,187,188,189,190,191,
642    224,225,226,227,228,229,230,231,
643    232,233,234,235,236,237,238,239,
644    240,241,242,243,244,245,246,215,
645    248,249,250,251,252,253,254,223,
646    192,193,194,195,196,197,198,199,
647    200,201,202,203,204,205,206,207,
648    208,209,210,211,212,213,214,247,
649    216,217,218,219,220,221,222,255,
650    0,62,0,0,1,0,0,0,
651    0,0,0,0,0,0,0,0,
652    32,0,0,0,1,0,0,0,
653    0,0,0,0,0,0,0,0,
654    0,0,0,0,0,0,255,3,
655    126,0,0,0,126,0,0,0,
656    0,0,0,0,0,0,0,0,
657    0,0,0,0,0,0,0,0,
658    0,0,0,0,0,0,255,3,
659    0,0,0,0,0,0,0,0,
660    0,0,0,0,0,0,12,2,
661    0,0,0,0,0,0,0,0,
662    0,0,0,0,0,0,0,0,
663    254,255,255,7,0,0,0,0,
664    0,0,0,0,0,0,0,0,
665    255,255,127,127,0,0,0,0,
666    0,0,0,0,0,0,0,0,
667    0,0,0,0,254,255,255,7,
668    0,0,0,0,0,4,32,4,
669    0,0,0,128,255,255,127,255,
670    0,0,0,0,0,0,255,3,
671    254,255,255,135,254,255,255,7,
672    0,0,0,0,0,4,44,6,
673    255,255,127,255,255,255,127,255,
674    0,0,0,0,254,255,255,255,
675    255,255,255,255,255,255,255,127,
676    0,0,0,0,254,255,255,255,
677    255,255,255,255,255,255,255,255,
678    0,2,0,0,255,255,255,255,
679    255,255,255,255,255,255,255,127,
680    0,0,0,0,255,255,255,255,
681    255,255,255,255,255,255,255,255,
682    0,0,0,0,254,255,0,252,
683    1,0,0,248,1,0,0,120,
684    0,0,0,0,254,255,255,255,
685    0,0,128,0,0,0,128,0,
686    255,255,255,255,0,0,0,0,
687    0,0,0,0,0,0,0,128,
688    255,255,255,255,0,0,0,0,
689    0,0,0,0,0,0,0,0,
690    128,0,0,0,0,0,0,0,
691    0,1,1,0,1,1,0,0,
692    0,0,0,0,0,0,0,0,
693    0,0,0,0,0,0,0,0,
694    1,0,0,0,128,0,0,0,
695    128,128,128,128,0,0,128,0,
696    28,28,28,28,28,28,28,28,
697    28,28,0,0,0,0,0,128,
698    0,26,26,26,26,26,26,18,
699    18,18,18,18,18,18,18,18,
700    18,18,18,18,18,18,18,18,
701    18,18,18,128,128,0,128,16,
702    0,26,26,26,26,26,26,18,
703    18,18,18,18,18,18,18,18,
704    18,18,18,18,18,18,18,18,
705    18,18,18,128,128,0,0,0,
706    0,0,0,0,0,1,0,0,
707    0,0,0,0,0,0,0,0,
708    0,0,0,0,0,0,0,0,
709    0,0,0,0,0,0,0,0,
710    1,0,0,0,0,0,0,0,
711    0,0,18,0,0,0,0,0,
712    0,0,20,20,0,18,0,0,
713    0,20,18,0,0,0,0,0,
714    18,18,18,18,18,18,18,18,
715    18,18,18,18,18,18,18,18,
716    18,18,18,18,18,18,18,0,
717    18,18,18,18,18,18,18,18,
718    18,18,18,18,18,18,18,18,
719    18,18,18,18,18,18,18,18,
720    18,18,18,18,18,18,18,0,
721    18,18,18,18,18,18,18,18
722    };
723    
 for(;;)  
   {  
   int c;  
   int charlength;  
724    
   fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));  
725    
   if (*code >= OP_BRA)  
     {  
     fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);  
     code += 2;  
     }  
726    
727    else switch(*code)  #ifndef HAVE_STRERROR
728      {  /*************************************************
729      case OP_END:  *     Provide strerror() for non-ANSI libraries  *
730      fprintf(outfile, "    %s\n", OP_names[*code]);  *************************************************/
     fprintf(outfile, "------------------------------------------------------------------\n");  
     return;  
   
     case OP_OPT:  
     fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
     case OP_COND:  
     fprintf(outfile, "%3d Cond", (code[1] << 8) + code[2]);  
     code += 2;  
     break;  
   
     case OP_CREF:  
     fprintf(outfile, " %.2d %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
     case OP_CHARS:  
     charlength = *(++code);  
     fprintf(outfile, "%3d ", charlength);  
     while (charlength-- > 0)  
       if (isprint(c = *(++code))) fprintf(outfile, "%c", c);  
         else fprintf(outfile, "\\x%02x", c);  
     break;  
   
     case OP_KETRMAX:  
     case OP_KETRMIN:  
     case OP_ALT:  
     case OP_KET:  
     case OP_ASSERT:  
     case OP_ASSERT_NOT:  
     case OP_ASSERTBACK:  
     case OP_ASSERTBACK_NOT:  
     case OP_ONCE:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_REVERSE:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_STAR:  
     case OP_MINSTAR:  
     case OP_PLUS:  
     case OP_MINPLUS:  
     case OP_QUERY:  
     case OP_MINQUERY:  
     case OP_TYPESTAR:  
     case OP_TYPEMINSTAR:  
     case OP_TYPEPLUS:  
     case OP_TYPEMINPLUS:  
     case OP_TYPEQUERY:  
     case OP_TYPEMINQUERY:  
     if (*code >= OP_TYPESTAR)  
       fprintf(outfile, "    %s", OP_names[code[1]]);  
     else if (isprint(c = code[1])) fprintf(outfile, "    %c", c);  
       else fprintf(outfile, "    \\x%02x", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_EXACT:  
     case OP_UPTO:  
     case OP_MINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    %c{", c);  
       else fprintf(outfile, "    \\x%02x{", c);  
     if (*code != OP_EXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_MINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_TYPEEXACT:  
     case OP_TYPEUPTO:  
     case OP_TYPEMINUPTO:  
     fprintf(outfile, "    %s{", OP_names[code[3]]);  
     if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_NOT:  
     if (isprint(c = *(++code))) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     break;  
   
     case OP_NOTSTAR:  
     case OP_NOTMINSTAR:  
     case OP_NOTPLUS:  
     case OP_NOTMINPLUS:  
     case OP_NOTQUERY:  
     case OP_NOTMINQUERY:  
     if (isprint(c = code[1])) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_NOTEXACT:  
     case OP_NOTUPTO:  
     case OP_NOTMINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    [^%c]{", c);  
       else fprintf(outfile, "    [^\\x%02x]{", c);  
     if (*code != OP_NOTEXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_REF:  
     fprintf(outfile, "    \\%d", *(++code));  
     code++;  
     goto CLASS_REF_REPEAT;  
   
     case OP_CLASS:  
       {  
       int i, min, max;  
       code++;  
       fprintf(outfile, "    [");  
   
       for (i = 0; i < 256; i++)  
         {  
         if ((code[i/8] & (1 << (i&7))) != 0)  
           {  
           int j;  
           for (j = i+1; j < 256; j++)  
             if ((code[j/8] & (1 << (j&7))) == 0) break;  
           if (i == '-' || i == ']') fprintf(outfile, "\\");  
           if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);  
           if (--j > i)  
             {  
             fprintf(outfile, "-");  
             if (j == '-' || j == ']') fprintf(outfile, "\\");  
             if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);  
             }  
           i = j;  
           }  
         }  
       fprintf(outfile, "]");  
       code += 32;  
   
       CLASS_REF_REPEAT:  
   
       switch(*code)  
         {  
         case OP_CRSTAR:  
         case OP_CRMINSTAR:  
         case OP_CRPLUS:  
         case OP_CRMINPLUS:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         fprintf(outfile, "%s", OP_names[*code]);  
         break;  
731    
732          case OP_CRRANGE:  /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
733          case OP_CRMINRANGE:  in their libraries, but can provide the same facility by this simple
734          min = (code[1] << 8) + code[2];  alternative function. */
         max = (code[3] << 8) + code[4];  
         if (max == 0) fprintf(outfile, "{%d,}", min);  
         else fprintf(outfile, "{%d,%d}", min, max);  
         if (*code == OP_CRMINRANGE) fprintf(outfile, "?");  
         code += 4;  
         break;  
735    
736          default:  extern int   sys_nerr;
737          code--;  extern char *sys_errlist[];
         }  
       }  
     break;  
738    
739      /* Anything else is just a one-node item */  char *
740    strerror(int n)
741    {
742    if (n < 0 || n >= sys_nerr) return "unknown error number";
743    return sys_errlist[n];
744    }
745    #endif /* HAVE_STRERROR */
746    
     default:  
     fprintf(outfile, "    %s", OP_names[*code]);  
     break;  
     }  
747    
748    code++;  /*************************************************
749    fprintf(outfile, "\n");  *         JIT memory callback                    *
750    }  *************************************************/
751    
752    static pcre_jit_stack* jit_callback(void *arg)
753    {
754    return (pcre_jit_stack *)arg;
755  }  }
756    
757    
758    /*************************************************
759    *            Convert UTF-8 string to value       *
760    *************************************************/
761    
762    /* This function takes one or more bytes that represents a UTF-8 character,
763    and returns the value of the character.
764    
765    Argument:
766      utf8bytes   a pointer to the byte vector
767      vptr        a pointer to an int to receive the value
768    
769  /* Character string printing function. */  Returns:      >  0 => the number of bytes consumed
770                  -6 to 0 => malformed UTF-8 character at offset = (-return)
771    */
772    
773  static void pchars(unsigned char *p, int length)  #if !defined NOUTF8
774    
775    static int
776    utf82ord(pcre_uint8 *utf8bytes, int *vptr)
777  {  {
778  int c;  int c = *utf8bytes++;
779  while (length-- > 0)  int d = c;
780    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);  int i, j, s;
     else fprintf(outfile, "\\x%02x", c);  
 }  
781    
782    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
783      {
784      if ((d & 0x80) == 0) break;
785      d <<= 1;
786      }
787    
788    if (i == -1) { *vptr = c; return 1; }  /* ascii character */
789    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
790    
791  /* Alternative malloc function, to test functionality and show the size of the  /* i now has a value in the range 1-5 */
 compiled re. */  
792    
793  static void *new_malloc(size_t size)  s = 6*i;
794  {  d = (c & utf8_table3[i]) << s;
795  if (log_store) fprintf(outfile, "Store size request: %d\n", (int)size);  
796  return malloc(size);  for (j = 0; j < i; j++)
797      {
798      c = *utf8bytes++;
799      if ((c & 0xc0) != 0x80) return -(j+1);
800      s -= 6;
801      d |= (c & 0x3f) << s;
802      }
803    
804    /* Check that encoding was the correct unique one */
805    
806    for (j = 0; j < utf8_table1_size; j++)
807      if (d <= utf8_table1[j]) break;
808    if (j != i) return -(i+1);
809    
810    /* Valid value */
811    
812    *vptr = d;
813    return i+1;
814  }  }
815    
816    #endif
817    
818    
 /* Read lines from named file or stdin and write to named file or stdout; lines  
 consist of a regular expression, in delimiters and optionally followed by  
 options, followed by a set of test data, terminated by an empty line. */  
819    
820  int main(int argc, char **argv)  /*************************************************
821    *       Convert character value to UTF-8         *
822    *************************************************/
823    
824    /* This function takes an integer value in the range 0 - 0x7fffffff
825    and encodes it as a UTF-8 character in 0 to 6 bytes.
826    
827    Arguments:
828      cvalue     the character value
829      utf8bytes  pointer to buffer for result - at least 6 bytes long
830    
831    Returns:     number of characters placed in the buffer
832    */
833    
834    #if !defined NOUTF8
835    
836    static int
837    ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
838  {  {
839  FILE *infile = stdin;  register int i, j;
840  int options = 0;  for (i = 0; i < utf8_table1_size; i++)
841  int study_options = 0;    if (cvalue <= utf8_table1[i]) break;
842  int op = 1;  utf8bytes += i;
843  int timeit = 0;  for (j = i; j > 0; j--)
844  int showinfo = 0;   {
845  int posix = 0;   *utf8bytes-- = 0x80 | (cvalue & 0x3f);
846  int debug = 0;   cvalue >>= 6;
847  int done = 0;   }
848  unsigned char buffer[30000];  *utf8bytes = utf8_table2[i] | cvalue;
849  unsigned char dbuffer[1024];  return i + 1;
850    }
851    
852  /* Static so that new_malloc can use it. */  #endif
853    
 outfile = stdout;  
854    
 /* Scan options */  
855    
856  while (argc > 1 && argv[op][0] == '-')  #ifdef SUPPORT_PCRE16
857    /*************************************************
858    *         Convert a string to 16-bit             *
859    *************************************************/
860    
861    /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
862    8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
863    double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
864    in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
865    result is always left in buffer16.
866    
867    Arguments:
868      p          points to a byte string
869      utf        true if UTF-8 (to be converted to UTF-16)
870      len        number of bytes in the string (excluding trailing zero)
871    
872    Returns:     number of 16-bit data items used (excluding trailing zero)
873                 OR -1 if a UTF-8 string is malformed
874    */
875    
876    static int
877    to16(pcre_uint8 *p, int utf, int len)
878    {
879    pcre_uint16 *pp;
880    
881    if (buffer16_size < 2*len + 2)
882    {    {
883    if (strcmp(argv[op], "-s") == 0) log_store = 1;    if (buffer16 != NULL) free(buffer16);
884    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    buffer16_size = 2*len + 2;
885    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    buffer16 = (pcre_uint16 *)malloc(buffer16_size);
886    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    if (buffer16 == NULL)
   else if (strcmp(argv[op], "-p") == 0) posix = 1;  
   else  
887      {      {
888      printf("*** Unknown option %s\n", argv[op]);      fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
889      printf("Usage: pcretest [-d] [-i] [-p] [-s] [-t] [<input> [<output>]]\n");      exit(1);
     printf("  -d   debug: show compiled code; implies -i\n"  
            "  -i   show information about compiled pattern\n"  
            "  -p   use POSIX interface\n"  
            "  -s   output store information\n"  
            "  -t   time compilation and execution\n");  
     return 1;  
890      }      }
   op++;  
   argc--;  
891    }    }
892    
893  /* Sort out the input and output files */  pp = buffer16;
894    
895  if (argc > 1)  if (!utf)
896    {    {
897    infile = fopen(argv[op], "r");    while (len-- > 0) *pp++ = *p++;
   if (infile == NULL)  
     {  
     printf("** Failed to open %s\n", argv[op]);  
     return 1;  
     }  
898    }    }
899    
900  if (argc > 2)  else
901    {    {
902    outfile = fopen(argv[op+1], "w");    int c;
903    if (outfile == NULL)    while (len > 0)
904      {      {
905      printf("** Failed to open %s\n", argv[op+1]);      int chlen = utf82ord(p, &c);
906      return 1;      if (chlen <= 0) return -1;
907        p += chlen;
908        len -= chlen;
909        if (c < 0x10000) *pp++ = c; else
910          {
911          c -= 0x10000;
912          *pp++ = 0xD800 | (c >> 10);
913          *pp++ = 0xDC00 | (c & 0x3ff);
914          }
915      }      }
916    }    }
917    
918  /* Set alternative malloc function */  *pp = 0;
919    return pp - buffer16;
920    }
921    #endif
922    
 pcre_malloc = new_malloc;  
923    
924  /* Heading line, then prompt for first regex if stdin */  /*************************************************
925    *        Read or extend an input line            *
926    *************************************************/
927    
928  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  /* Input lines are read into buffer, but both patterns and data lines can be
929    continued over multiple input lines. In addition, if the buffer fills up, we
930    want to automatically expand it so as to be able to handle extremely large
931    lines that are needed for certain stress tests. When the input buffer is
932    expanded, the other two buffers must also be expanded likewise, and the
933    contents of pbuffer, which are a copy of the input for callouts, must be
934    preserved (for when expansion happens for a data line). This is not the most
935    optimal way of handling this, but hey, this is just a test program!
936    
937    Arguments:
938      f            the file to read
939      start        where in buffer to start (this *must* be within buffer)
940      prompt       for stdin or readline()
941    
942    Returns:       pointer to the start of new data
943                   could be a copy of start, or could be moved
944                   NULL if no data read and EOF reached
945    */
946    
947  /* Main loop */  static pcre_uint8 *
948    extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
949    {
950    pcre_uint8 *here = start;
951    
952  while (!done)  for (;;)
953    {    {
954    pcre *re = NULL;    int rlen = (int)(buffer_size - (here - buffer));
   pcre_extra *extra = NULL;  
   regex_t preg;  
   const char *error;  
   unsigned char *p, *pp, *ppp;  
   unsigned const char *tables = NULL;  
   int do_study = 0;  
   int do_debug = debug;  
   int do_showinfo = showinfo;  
   int do_posix = 0;  
   int erroroffset, len, delimiter;  
955    
956    if (infile == stdin) printf("  re> ");    if (rlen > 1000)
957    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;      {
958    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);      int dlen;
959    
960    p = buffer;      /* If libreadline support is required, use readline() to read a line if the
961    while (isspace(*p)) p++;      input is a terminal. Note that readline() removes the trailing newline, so
962    if (*p == 0) continue;      we must put it back again, to be compatible with fgets(). */
963    
964    /* Get the delimiter and seek the end of the pattern; if is isn't  #ifdef SUPPORT_LIBREADLINE
965    complete, read more. */      if (isatty(fileno(f)))
966          {
967          size_t len;
968          char *s = readline(prompt);
969          if (s == NULL) return (here == start)? NULL : start;
970          len = strlen(s);
971          if (len > 0) add_history(s);
972          if (len > rlen - 1) len = rlen - 1;
973          memcpy(here, s, len);
974          here[len] = '\n';
975          here[len+1] = 0;
976          free(s);
977          }
978        else
979    #endif
980    
981    delimiter = *p++;      /* Read the next line by normal means, prompting if the file is stdin. */
982    
983    if (isalnum(delimiter))        {
984      {        if (f == stdin) printf("%s", prompt);
985      fprintf(outfile, "** Delimiter must not be alphameric\n");        if (fgets((char *)here, rlen,  f) == NULL)
986      goto SKIP_DATA;          return (here == start)? NULL : start;
987      }        }
988    
989    pp = p;      dlen = (int)strlen((char *)here);
990        if (dlen > 0 && here[dlen - 1] == '\n') return start;
991        here += dlen;
992        }
993    
994    for(;;)    else
995      {      {
996      while (*pp != 0 && *pp != delimiter) pp++;      int new_buffer_size = 2*buffer_size;
997      if (*pp != 0) break;      pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
998        pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
999        pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1000    
1001      len = sizeof(buffer) - (pp - buffer);      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
     if (len < 256)  
1002        {        {
1003        fprintf(outfile, "** Expression too long - missing delimiter?\n");        fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1004        goto SKIP_DATA;        exit(1);
1005        }        }
1006    
1007      if (infile == stdin) printf("    > ");      memcpy(new_buffer, buffer, buffer_size);
1008      if (fgets((char *)pp, len, infile) == NULL)      memcpy(new_pbuffer, pbuffer, buffer_size);
1009        {  
1010        fprintf(outfile, "** Unexpected EOF\n");      buffer_size = new_buffer_size;
1011        done = 1;  
1012        goto CONTINUE;      start = new_buffer + (start - buffer);
1013        }      here = new_buffer + (here - buffer);
1014      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);  
1015        free(buffer);
1016        free(dbuffer);
1017        free(pbuffer);
1018    
1019        buffer = new_buffer;
1020        dbuffer = new_dbuffer;
1021        pbuffer = new_pbuffer;
1022      }      }
1023      }
1024    
1025    /* Terminate the pattern at the delimiter */  return NULL;  /* Control never gets here */
1026    }
1027    
   *pp++ = 0;  
1028    
   /* Look for options after final delimiter */  
1029    
1030    options = 0;  /*************************************************
1031    study_options = 0;  *          Read number from string               *
1032    while (*pp != 0)  *************************************************/
     {  
     switch (*pp++)  
       {  
       case 'i': options |= PCRE_CASELESS; break;  
       case 'm': options |= PCRE_MULTILINE; break;  
       case 's': options |= PCRE_DOTALL; break;  
       case 'x': options |= PCRE_EXTENDED; break;  
1033    
1034        case 'A': options |= PCRE_ANCHORED; break;  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1035        case 'D': do_debug = do_showinfo = 1; break;  around with conditional compilation, just do the job by hand. It is only used
1036        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;  for unpicking arguments, so just keep it simple.
1037        case 'I': do_showinfo = 1; break;  
1038        case 'P': do_posix = 1; break;  Arguments:
1039        case 'S': do_study = 1; break;    str           string to be converted
1040        case 'U': options |= PCRE_UNGREEDY; break;    endptr        where to put the end pointer
       case 'X': options |= PCRE_EXTRA; break;  
1041    
1042        case 'L':  Returns:        the unsigned long
1043        ppp = pp;  */
       while (*ppp != '\n' && *ppp != ' ') ppp++;  
       *ppp = 0;  
       if (setlocale(LC_CTYPE, (const char *)pp) == NULL)  
         {  
         fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);  
         goto SKIP_DATA;  
         }  
       tables = pcre_maketables();  
       pp = ppp;  
       break;  
1044    
1045        case '\n': case ' ': break;  static int
1046        default:  get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1047        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);  {
1048        goto SKIP_DATA;  int result = 0;
1049        }  while(*str != 0 && isspace(*str)) str++;
1050      }  while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1051    *endptr = str;
1052    return(result);
1053    }
1054    
   /* Handle compiling via the POSIX interface, which doesn't support the  
   timing, showing, or debugging options, nor the ability to pass over  
   local character tables. */  
1055    
1056    if (posix || do_posix)  
1057    /*************************************************
1058    *             Print one character                *
1059    *************************************************/
1060    
1061    /* Print a single character either literally, or as a hex escape. */
1062    
1063    static int pchar(int c, FILE *f)
1064    {
1065    if (PRINTOK(c))
1066      {
1067      if (f != NULL) fprintf(f, "%c", c);
1068      return 1;
1069      }
1070    
1071    if (c < 0x100)
1072      {
1073      if (use_utf)
1074        {
1075        if (f != NULL) fprintf(f, "\\x{%02x}", c);
1076        return 6;
1077        }
1078      else
1079      {      {
1080      int rc;      if (f != NULL) fprintf(f, "\\x%02x", c);
1081      int cflags = 0;      return 4;
1082      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      }
1083      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;    }
1084      rc = regcomp(&preg, (char *)p, cflags);  
1085    if (f != NULL) fprintf(f, "\\x{%02x}", c);
1086    return (c <= 0x000000ff)? 6 :
1087           (c <= 0x00000fff)? 7 :
1088           (c <= 0x0000ffff)? 8 :
1089           (c <= 0x000fffff)? 9 : 10;
1090    }
1091    
     /* Compilation failed; go back for another re, skipping to blank line  
     if non-interactive. */  
1092    
     if (rc != 0)  
       {  
       (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));  
       fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);  
       goto SKIP_DATA;  
       }  
     }  
1093    
1094    /* Handle compiling via the native interface */  #ifdef SUPPORT_PCRE8
1095    /*************************************************
1096    *         Print 8-bit character string           *
1097    *************************************************/
1098    
1099    else  /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1100    If handed a NULL file, just counts chars without printing. */
1101    
1102    static int pchars(pcre_uint8 *p, int length, FILE *f)
1103    {
1104    int c = 0;
1105    int yield = 0;
1106    
1107    while (length-- > 0)
1108      {
1109    #if !defined NOUTF8
1110      if (use_utf)
1111      {      {
1112      if (timeit)      int rc = utf82ord(p, &c);
1113        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
1114        {        {
1115        register int i;        length -= rc - 1;
1116        clock_t time_taken;        p += rc;
1117        clock_t start_time = clock();        yield += pchar(c, f);
1118        for (i = 0; i < LOOPREPEAT; i++)        continue;
         {  
         re = pcre_compile((char *)p, options, &error, &erroroffset, tables);  
         if (re != NULL) free(re);  
         }  
       time_taken = clock() - start_time;  
       fprintf(outfile, "Compile time %.3f milliseconds\n",  
         ((double)time_taken * 1000.0) /  
         ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));  
1119        }        }
1120        }
1121    #endif
1122      c = *p++;
1123      yield += pchar(c, f);
1124      }
1125    
1126    return yield;
1127    }
1128    #endif
1129    
1130    
1131    
1132    #ifdef SUPPORT_PCRE16
1133    /*************************************************
1134    *           Print 16-bit character string        *
1135    *************************************************/
1136    
1137    /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1138    If handed a NULL file, just counts chars without printing. */
1139    
1140    static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1141    {
1142    int yield = 0;
1143    
1144    while (length-- > 0)
1145      {
1146      int c = *p++ & 0xffff;
1147    #if !defined NOUTF8
1148      if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1149        {
1150        int d = *p & 0xffff;
1151        if (d >= 0xDC00 && d < 0xDFFF)
1152          {
1153          c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1154          length--;
1155          p++;
1156          }
1157        }
1158    #endif
1159      yield += pchar(c, f);
1160      }
1161    
1162    return yield;
1163    }
1164    #endif
1165    
1166    
1167    
1168    /*************************************************
1169    *              Callout function                  *
1170    *************************************************/
1171    
1172    /* Called from PCRE as a result of the (?C) item. We print out where we are in
1173    the match. Yield zero unless more callouts than the fail count, or the callout
1174    data is not zero. */
1175    
1176    static int callout(pcre_callout_block *cb)
1177    {
1178    FILE *f = (first_callout | callout_extra)? outfile : NULL;
1179    int i, pre_start, post_start, subject_length;
1180    
1181    if (callout_extra)
1182      {
1183      fprintf(f, "Callout %d: last capture = %d\n",
1184        cb->callout_number, cb->capture_last);
1185    
1186      for (i = 0; i < cb->capture_top * 2; i += 2)
1187        {
1188        if (cb->offset_vector[i] < 0)
1189          fprintf(f, "%2d: <unset>\n", i/2);
1190        else
1191          {
1192          fprintf(f, "%2d: ", i/2);
1193          PCHARSV(cb->subject + cb->offset_vector[i],
1194            cb->offset_vector[i+1] - cb->offset_vector[i], f);
1195          fprintf(f, "\n");
1196          }
1197        }
1198      }
1199    
1200    /* Re-print the subject in canonical form, the first time or if giving full
1201    datails. On subsequent calls in the same match, we use pchars just to find the
1202    printed lengths of the substrings. */
1203    
1204    if (f != NULL) fprintf(f, "--->");
1205    
1206    PCHARS(pre_start, cb->subject, cb->start_match, f);
1207    PCHARS(post_start, cb->subject + cb->start_match,
1208      cb->current_position - cb->start_match, f);
1209    
1210    PCHARS(subject_length, cb->subject, cb->subject_length, NULL);
1211    
1212    PCHARSV(cb->subject + cb->current_position,
1213      cb->subject_length - cb->current_position, f);
1214    
1215    if (f != NULL) fprintf(f, "\n");
1216    
1217    /* Always print appropriate indicators, with callout number if not already
1218    shown. For automatic callouts, show the pattern offset. */
1219    
1220    if (cb->callout_number == 255)
1221      {
1222      fprintf(outfile, "%+3d ", cb->pattern_position);
1223      if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
1224      }
1225    else
1226      {
1227      if (callout_extra) fprintf(outfile, "    ");
1228        else fprintf(outfile, "%3d ", cb->callout_number);
1229      }
1230    
1231    for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
1232    fprintf(outfile, "^");
1233    
1234    if (post_start > 0)
1235      {
1236      for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
1237      fprintf(outfile, "^");
1238      }
1239    
1240    for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
1241      fprintf(outfile, " ");
1242    
1243    fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
1244      pbuffer + cb->pattern_position);
1245    
1246    fprintf(outfile, "\n");
1247    first_callout = 0;
1248    
1249    if (cb->mark != last_callout_mark)
1250      {
1251      fprintf(outfile, "Latest Mark: %s\n",
1252        (cb->mark == NULL)? "<unset>" : (char *)(cb->mark));
1253      last_callout_mark = cb->mark;
1254      }
1255    
1256    if (cb->callout_data != NULL)
1257      {
1258      int callout_data = *((int *)(cb->callout_data));
1259      if (callout_data != 0)
1260        {
1261        fprintf(outfile, "Callout data = %d\n", callout_data);
1262        return callout_data;
1263        }
1264      }
1265    
1266    return (cb->callout_number != callout_fail_id)? 0 :
1267           (++callout_count >= callout_fail_count)? 1 : 0;
1268    }
1269    
1270    
1271    /*************************************************
1272    *            Local malloc functions              *
1273    *************************************************/
1274    
1275    /* Alternative malloc function, to test functionality and save the size of a
1276    compiled re, which is the first store request that pcre_compile() makes. The
1277    show_malloc variable is set only during matching. */
1278    
1279    static void *new_malloc(size_t size)
1280    {
1281    void *block = malloc(size);
1282    gotten_store = size;
1283    if (first_gotten_store == 0) first_gotten_store = size;
1284    if (show_malloc)
1285      fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
1286    return block;
1287    }
1288    
1289    static void new_free(void *block)
1290    {
1291    if (show_malloc)
1292      fprintf(outfile, "free             %p\n", block);
1293    free(block);
1294    }
1295    
1296    /* For recursion malloc/free, to test stacking calls */
1297    
1298    static void *stack_malloc(size_t size)
1299    {
1300    void *block = malloc(size);
1301    if (show_malloc)
1302      fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1303    return block;
1304    }
1305    
1306    static void stack_free(void *block)
1307    {
1308    if (show_malloc)
1309      fprintf(outfile, "stack_free       %p\n", block);
1310    free(block);
1311    }
1312    
1313    
1314    /*************************************************
1315    *          Call pcre_fullinfo()                  *
1316    *************************************************/
1317    
1318    /* Get one piece of information from the pcre_fullinfo() function. When only
1319    one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1320    value, but the code is defensive. */
1321    
1322    static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1323    {
1324    int rc;
1325    
1326    if (use_pcre16)
1327    #ifdef SUPPORT_PCRE16
1328      rc = pcre16_fullinfo(re, study, option, ptr);
1329    #else
1330      rc = PCRE_ERROR_BADMODE;
1331    #endif
1332    else
1333    #ifdef SUPPORT_PCRE8
1334      rc = pcre_fullinfo(re, study, option, ptr);
1335    #else
1336      rc = PCRE_ERROR_BADMODE;
1337    #endif
1338    
1339    if (rc < 0) fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1340      use_pcre16? "16" : "", option);
1341    }
1342    
1343    
1344    
1345    /*************************************************
1346    *         Byte flipping function                 *
1347    *************************************************/
1348    
1349    static unsigned long int
1350    byteflip(unsigned long int value, int n)
1351    {
1352    if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
1353    return ((value & 0x000000ff) << 24) |
1354           ((value & 0x0000ff00) <<  8) |
1355           ((value & 0x00ff0000) >>  8) |
1356           ((value & 0xff000000) >> 24);
1357    }
1358    
1359    
1360    
1361    
1362    /*************************************************
1363    *        Check match or recursion limit          *
1364    *************************************************/
1365    
1366    static int
1367    check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1368      int start_offset, int options, int *use_offsets, int use_size_offsets,
1369      int flag, unsigned long int *limit, int errnumber, const char *msg)
1370    {
1371    int count;
1372    int min = 0;
1373    int mid = 64;
1374    int max = -1;
1375    
1376    extra->flags |= flag;
1377    
1378    for (;;)
1379      {
1380      *limit = mid;
1381    
1382      PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
1383        use_offsets, use_size_offsets);
1384    
1385      if (count == errnumber)
1386        {
1387        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1388        min = mid;
1389        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1390        }
1391    
1392      else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1393                             count == PCRE_ERROR_PARTIAL)
1394        {
1395        if (mid == min + 1)
1396          {
1397          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1398          break;
1399          }
1400        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1401        max = mid;
1402        mid = (min + mid)/2;
1403        }
1404      else break;    /* Some other error */
1405      }
1406    
1407    extra->flags &= ~flag;
1408    return count;
1409    }
1410    
1411    
1412    
1413    /*************************************************
1414    *         Case-independent strncmp() function    *
1415    *************************************************/
1416    
1417    /*
1418    Arguments:
1419      s         first string
1420      t         second string
1421      n         number of characters to compare
1422    
1423    Returns:    < 0, = 0, or > 0, according to the comparison
1424    */
1425    
1426    static int
1427    strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
1428    {
1429    while (n--)
1430      {
1431      int c = tolower(*s++) - tolower(*t++);
1432      if (c) return c;
1433      }
1434    return 0;
1435    }
1436    
1437    
1438    
1439    /*************************************************
1440    *         Check newline indicator                *
1441    *************************************************/
1442    
1443    /* This is used both at compile and run-time to check for <xxx> escapes. Print
1444    a message and return 0 if there is no match.
1445    
1446    Arguments:
1447      p           points after the leading '<'
1448      f           file for error message
1449    
1450    Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
1451    */
1452    
1453    static int
1454    check_newline(pcre_uint8 *p, FILE *f)
1455    {
1456    if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1457    if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1458    if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1459    if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1460    if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1461    if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1462    if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1463    fprintf(f, "Unknown newline type at: <%s\n", p);
1464    return 0;
1465    }
1466    
1467    
1468    
1469    /*************************************************
1470    *             Usage function                     *
1471    *************************************************/
1472    
1473    static void
1474    usage(void)
1475    {
1476    printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
1477    printf("Input and output default to stdin and stdout.\n");
1478    #ifdef SUPPORT_LIBREADLINE
1479    printf("If input is a terminal, readline() is used to read from it.\n");
1480    #else
1481    printf("This version of pcretest is not linked with readline().\n");
1482    #endif
1483    printf("\nOptions:\n");
1484    #ifdef SUPPORT_PCRE16
1485    printf("  -16      use 16-bit interface\n");
1486    #endif
1487    printf("  -b       show compiled code (bytecode)\n");
1488    printf("  -C       show PCRE compile-time options and exit\n");
1489    printf("  -d       debug: show compiled code and information (-b and -i)\n");
1490    #if !defined NODFA
1491    printf("  -dfa     force DFA matching for all subjects\n");
1492    #endif
1493    printf("  -help    show usage information\n");
1494    printf("  -i       show information about compiled patterns\n"
1495           "  -M       find MATCH_LIMIT minimum for each subject\n"
1496           "  -m       output memory used information\n"
1497           "  -o <n>   set size of offsets vector to <n>\n");
1498    #if !defined NOPOSIX
1499    printf("  -p       use POSIX interface\n");
1500    #endif
1501    printf("  -q       quiet: do not output PCRE version number at start\n");
1502    printf("  -S <n>   set stack size to <n> megabytes\n");
1503    printf("  -s       force each pattern to be studied at basic level\n"
1504           "  -s+      force each pattern to be studied, using JIT if available\n"
1505           "  -t       time compilation and execution\n");
1506    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
1507    printf("  -tm      time execution (matching) only\n");
1508    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
1509    }
1510    
1511    
1512    
1513    /*************************************************
1514    *                Main Program                    *
1515    *************************************************/
1516    
1517    /* Read lines from named file or stdin and write to named file or stdout; lines
1518    consist of a regular expression, in delimiters and optionally followed by
1519    options, followed by a set of test data, terminated by an empty line. */
1520    
1521    int main(int argc, char **argv)
1522    {
1523    FILE *infile = stdin;
1524    int options = 0;
1525    int study_options = 0;
1526    int default_find_match_limit = FALSE;
1527    int op = 1;
1528    int timeit = 0;
1529    int timeitm = 0;
1530    int showinfo = 0;
1531    int showstore = 0;
1532    int force_study = -1;
1533    int force_study_options = 0;
1534    int quiet = 0;
1535    int size_offsets = 45;
1536    int size_offsets_max;
1537    int *offsets = NULL;
1538    #if !defined NOPOSIX
1539    int posix = 0;
1540    #endif
1541    int debug = 0;
1542    int done = 0;
1543    int all_use_dfa = 0;
1544    int yield = 0;
1545    int stack_size;
1546    
1547    pcre_jit_stack *jit_stack = NULL;
1548    
1549    /* These vectors store, end-to-end, a list of captured substring names. Assume
1550    that 1024 is plenty long enough for the few names we'll be testing. */
1551    
1552    pcre_uchar copynames[1024];
1553    pcre_uchar getnames[1024];
1554    
1555    pcre_uchar *copynamesptr;
1556    pcre_uchar *getnamesptr;
1557    
1558    /* Get buffers from malloc() so that valgrind will check their misuse when
1559    debugging. They grow automatically when very long lines are read. The 16-bit
1560    buffer (buffer16) is obtained only if needed. */
1561    
1562    buffer = (pcre_uint8 *)malloc(buffer_size);
1563    dbuffer = (pcre_uint8 *)malloc(buffer_size);
1564    pbuffer = (pcre_uint8 *)malloc(buffer_size);
1565    
1566    /* The outfile variable is static so that new_malloc can use it. */
1567    
1568    outfile = stdout;
1569    
1570    /* The following  _setmode() stuff is some Windows magic that tells its runtime
1571    library to translate CRLF into a single LF character. At least, that's what
1572    I've been told: never having used Windows I take this all on trust. Originally
1573    it set 0x8000, but then I was advised that _O_BINARY was better. */
1574    
1575    #if defined(_WIN32) || defined(WIN32)
1576    _setmode( _fileno( stdout ), _O_BINARY );
1577    #endif
1578    
1579    /* Scan options */
1580    
1581    while (argc > 1 && argv[op][0] == '-')
1582      {
1583      pcre_uint8 *endptr;
1584    
1585      if (strcmp(argv[op], "-m") == 0) showstore = 1;
1586      else if (strcmp(argv[op], "-s") == 0) force_study = 0;
1587      else if (strcmp(argv[op], "-s+") == 0)
1588        {
1589        force_study = 1;
1590        force_study_options = PCRE_STUDY_JIT_COMPILE;
1591        }
1592    #ifdef SUPPORT_PCRE16
1593      else if (strcmp(argv[op], "-16") == 0) use_pcre16 = 1;
1594    #endif
1595    
1596      else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1597      else if (strcmp(argv[op], "-b") == 0) debug = 1;
1598      else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1599      else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1600      else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1601    #if !defined NODFA
1602      else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1603    #endif
1604      else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1605          ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
1606            *endptr == 0))
1607        {
1608        op++;
1609        argc--;
1610        }
1611      else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
1612        {
1613        int both = argv[op][2] == 0;
1614        int temp;
1615        if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
1616                         *endptr == 0))
1617          {
1618          timeitm = temp;
1619          op++;
1620          argc--;
1621          }
1622        else timeitm = LOOPREPEAT;
1623        if (both) timeit = timeitm;
1624        }
1625      else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1626          ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
1627            *endptr == 0))
1628        {
1629    #if defined(_WIN32) || defined(WIN32) || defined(__minix)
1630        printf("PCRE: -S not supported on this OS\n");
1631        exit(1);
1632    #else
1633        int rc;
1634        struct rlimit rlim;
1635        getrlimit(RLIMIT_STACK, &rlim);
1636        rlim.rlim_cur = stack_size * 1024 * 1024;
1637        rc = setrlimit(RLIMIT_STACK, &rlim);
1638        if (rc != 0)
1639          {
1640        printf("PCRE: setrlimit() failed with error %d\n", rc);
1641        exit(1);
1642          }
1643        op++;
1644        argc--;
1645    #endif
1646        }
1647    #if !defined NOPOSIX
1648      else if (strcmp(argv[op], "-p") == 0) posix = 1;
1649    #endif
1650      else if (strcmp(argv[op], "-C") == 0)
1651        {
1652        int rc;
1653        unsigned long int lrc;
1654        printf("PCRE version %s\n", pcre_version());
1655        printf("Compiled with\n");
1656    
1657    /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
1658    are set, either both UTFs are supported or both are not supported. */
1659    
1660    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1661        printf("  8-bit and 16-bit support\n");
1662        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1663        if (rc)
1664          printf("  UTF-8 and UTF-16 support\n");
1665        else
1666          printf("  No UTF-8 or UTF-16 support\n");
1667    #elif defined SUPPORT_PCRE8
1668        printf("  8-bit support only\n");
1669        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1670        printf("  %sUTF-8 support\n", rc? "" : "No ");
1671    #else
1672        printf("  16-bit support only\n");
1673        (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
1674        printf("  %sUTF-16 support\n", rc? "" : "No ");
1675    #endif
1676    
1677        (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1678        printf("  %sUnicode properties support\n", rc? "" : "No ");
1679        (void)pcre_config(PCRE_CONFIG_JIT, &rc);
1680        if (rc)
1681          printf("  Just-in-time compiler support\n");
1682        else
1683          printf("  No just-in-time compiler support\n");
1684        (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1685        /* Note that these values are always the ASCII values, even
1686        in EBCDIC environments. CR is 13 and NL is 10. */
1687        printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
1688          (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1689          (rc == -2)? "ANYCRLF" :
1690          (rc == -1)? "ANY" : "???");
1691        (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1692        printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1693                                         "all Unicode newlines");
1694        (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1695        printf("  Internal link size = %d\n", rc);
1696        (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1697        printf("  POSIX malloc threshold = %d\n", rc);
1698        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1699        printf("  Default match limit = %ld\n", lrc);
1700        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1701        printf("  Default recursion depth limit = %ld\n", lrc);
1702        (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1703        printf("  Match recursion uses %s\n", rc? "stack" : "heap");
1704        goto EXIT;
1705        }
1706      else if (strcmp(argv[op], "-help") == 0 ||
1707               strcmp(argv[op], "--help") == 0)
1708        {
1709        usage();
1710        goto EXIT;
1711        }
1712      else
1713        {
1714        printf("** Unknown or malformed option %s\n", argv[op]);
1715        usage();
1716        yield = 1;
1717        goto EXIT;
1718        }
1719      op++;
1720      argc--;
1721      }
1722    
1723    /* Get the store for the offsets vector, and remember what it was */
1724    
1725    size_offsets_max = size_offsets;
1726    offsets = (int *)malloc(size_offsets_max * sizeof(int));
1727    if (offsets == NULL)
1728      {
1729      printf("** Failed to get %d bytes of memory for offsets vector\n",
1730        (int)(size_offsets_max * sizeof(int)));
1731      yield = 1;
1732      goto EXIT;
1733      }
1734    
1735    /* Sort out the input and output files */
1736    
1737    if (argc > 1)
1738      {
1739      infile = fopen(argv[op], INPUT_MODE);
1740      if (infile == NULL)
1741        {
1742        printf("** Failed to open %s\n", argv[op]);
1743        yield = 1;
1744        goto EXIT;
1745        }
1746      }
1747    
1748    if (argc > 2)
1749      {
1750      outfile = fopen(argv[op+1], OUTPUT_MODE);
1751      if (outfile == NULL)
1752        {
1753        printf("** Failed to open %s\n", argv[op+1]);
1754        yield = 1;
1755        goto EXIT;
1756        }
1757      }
1758    
1759    /* Set alternative malloc function */
1760    
1761    #ifdef SUPPORT_PCRE8
1762    pcre_malloc = new_malloc;
1763    pcre_free = new_free;
1764    pcre_stack_malloc = stack_malloc;
1765    pcre_stack_free = stack_free;
1766    #endif
1767    
1768    #ifdef SUPPORT_PCRE16
1769    pcre16_malloc = new_malloc;
1770    pcre16_free = new_free;
1771    pcre16_stack_malloc = stack_malloc;
1772    pcre16_stack_free = stack_free;
1773    #endif
1774    
1775    /* Heading line unless quiet, then prompt for first regex if stdin */
1776    
1777    if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1778    
1779    /* Main loop */
1780    
1781    while (!done)
1782      {
1783      pcre *re = NULL;
1784      pcre_extra *extra = NULL;
1785    
1786    #if !defined NOPOSIX  /* There are still compilers that require no indent */
1787      regex_t preg;
1788      int do_posix = 0;
1789    #endif
1790    
1791      const char *error;
1792      pcre_uint8 *markptr;
1793      pcre_uint8 *p, *pp, *ppp;
1794      pcre_uint8 *to_file = NULL;
1795      const pcre_uint8 *tables = NULL;
1796      unsigned long int true_size, true_study_size = 0;
1797      size_t size, regex_gotten_store;
1798      int do_allcaps = 0;
1799      int do_mark = 0;
1800      int do_study = 0;
1801      int no_force_study = 0;
1802      int do_debug = debug;
1803      int do_G = 0;
1804      int do_g = 0;
1805      int do_showinfo = showinfo;
1806      int do_showrest = 0;
1807      int do_showcaprest = 0;
1808      int do_flip = 0;
1809      int erroroffset, len, delimiter, poffset;
1810    
1811      use_utf = 0;
1812      debug_lengths = 1;
1813    
1814      if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
1815      if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1816      fflush(outfile);
1817    
1818      p = buffer;
1819      while (isspace(*p)) p++;
1820      if (*p == 0) continue;
1821    
1822      /* See if the pattern is to be loaded pre-compiled from a file. */
1823    
1824      if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1825        {
1826        unsigned long int magic, get_options;
1827        pcre_uint8 sbuf[8];
1828        FILE *f;
1829    
1830        p++;
1831        pp = p + (int)strlen((char *)p);
1832        while (isspace(pp[-1])) pp--;
1833        *pp = 0;
1834    
1835        f = fopen((char *)p, "rb");
1836        if (f == NULL)
1837          {
1838          fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1839          continue;
1840          }
1841    
1842        if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1843    
1844        true_size =
1845          (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1846        true_study_size =
1847          (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1848    
1849        re = (real_pcre *)new_malloc(true_size);
1850        regex_gotten_store = first_gotten_store;
1851    
1852        if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1853    
1854        magic = ((real_pcre *)re)->magic_number;
1855        if (magic != MAGIC_NUMBER)
1856          {
1857          if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1858            {
1859            do_flip = 1;
1860            }
1861          else
1862            {
1863            fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1864            fclose(f);
1865            continue;
1866            }
1867          }
1868    
1869        fprintf(outfile, "Compiled pattern%s loaded from %s\n",
1870          do_flip? " (byte-inverted)" : "", p);
1871    
1872        /* Now see if there is any following study data. */
1873    
1874        if (true_study_size != 0)
1875          {
1876          pcre_study_data *psd;
1877    
1878          extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1879          extra->flags = PCRE_EXTRA_STUDY_DATA;
1880    
1881          psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1882          extra->study_data = psd;
1883    
1884          if (fread(psd, 1, true_study_size, f) != true_study_size)
1885            {
1886            FAIL_READ:
1887            fprintf(outfile, "Failed to read data from %s\n", p);
1888            if (extra != NULL)
1889              {
1890              PCRE_FREE_STUDY(extra);
1891              }
1892            if (re != NULL) new_free(re);
1893            fclose(f);
1894            continue;
1895            }
1896          fprintf(outfile, "Study data loaded from %s\n", p);
1897          do_study = 1;     /* To get the data output if requested */
1898          }
1899        else fprintf(outfile, "No study data\n");
1900    
1901        /* Flip the necessary bytes. */
1902        if (do_flip != 0)
1903          {
1904          PCRE_PATTERN_TO_HOST_BYTE_ORDER(re, extra, NULL);
1905          }
1906    
1907        /* Need to know if UTF-8 for printing data strings */
1908    
1909        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1910        use_utf = (get_options & PCRE_UTF8) != 0;
1911    
1912        fclose(f);
1913        goto SHOW_INFO;
1914        }
1915    
1916      /* In-line pattern (the usual case). Get the delimiter and seek the end of
1917      the pattern; if it isn't complete, read more. */
1918    
1919      delimiter = *p++;
1920    
1921      if (isalnum(delimiter) || delimiter == '\\')
1922        {
1923        fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1924        goto SKIP_DATA;
1925        }
1926    
1927      pp = p;
1928      poffset = (int)(p - buffer);
1929    
1930      for(;;)
1931        {
1932        while (*pp != 0)
1933          {
1934          if (*pp == '\\' && pp[1] != 0) pp++;
1935            else if (*pp == delimiter) break;
1936          pp++;
1937          }
1938        if (*pp != 0) break;
1939        if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
1940          {
1941          fprintf(outfile, "** Unexpected EOF\n");
1942          done = 1;
1943          goto CONTINUE;
1944          }
1945        if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1946        }
1947    
1948      /* The buffer may have moved while being extended; reset the start of data
1949      pointer to the correct relative point in the buffer. */
1950    
1951      p = buffer + poffset;
1952    
1953      /* If the first character after the delimiter is backslash, make
1954      the pattern end with backslash. This is purely to provide a way
1955      of testing for the error message when a pattern ends with backslash. */
1956    
1957      if (pp[1] == '\\') *pp++ = '\\';
1958    
1959      /* Terminate the pattern at the delimiter, and save a copy of the pattern
1960      for callouts. */
1961    
1962      *pp++ = 0;
1963      strcpy((char *)pbuffer, (char *)p);
1964    
1965      /* Look for options after final delimiter */
1966    
1967      options = 0;
1968      study_options = 0;
1969      log_store = showstore;  /* default from command line */
1970    
1971      while (*pp != 0)
1972        {
1973        switch (*pp++)
1974          {
1975          case 'f': options |= PCRE_FIRSTLINE; break;
1976          case 'g': do_g = 1; break;
1977          case 'i': options |= PCRE_CASELESS; break;
1978          case 'm': options |= PCRE_MULTILINE; break;
1979          case 's': options |= PCRE_DOTALL; break;
1980          case 'x': options |= PCRE_EXTENDED; break;
1981    
1982          case '+':
1983          if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
1984          break;
1985    
1986          case '=': do_allcaps = 1; break;
1987          case 'A': options |= PCRE_ANCHORED; break;
1988          case 'B': do_debug = 1; break;
1989          case 'C': options |= PCRE_AUTO_CALLOUT; break;
1990          case 'D': do_debug = do_showinfo = 1; break;
1991          case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1992          case 'F': do_flip = 1; break;
1993          case 'G': do_G = 1; break;
1994          case 'I': do_showinfo = 1; break;
1995          case 'J': options |= PCRE_DUPNAMES; break;
1996          case 'K': do_mark = 1; break;
1997          case 'M': log_store = 1; break;
1998          case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1999    
2000    #if !defined NOPOSIX
2001          case 'P': do_posix = 1; break;
2002    #endif
2003    
2004          case 'S':
2005          if (do_study == 0)
2006            {
2007            do_study = 1;
2008            if (*pp == '+')
2009              {
2010              study_options |= PCRE_STUDY_JIT_COMPILE;
2011              pp++;
2012              }
2013            }
2014          else
2015            {
2016            do_study = 0;
2017            no_force_study = 1;
2018            }
2019          break;
2020    
2021          case 'U': options |= PCRE_UNGREEDY; break;
2022          case 'W': options |= PCRE_UCP; break;
2023          case 'X': options |= PCRE_EXTRA; break;
2024          case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2025          case 'Z': debug_lengths = 0; break;
2026          case '8': options |= PCRE_UTF8; use_utf = 1; break;
2027          case '?': options |= PCRE_NO_UTF8_CHECK; break;
2028    
2029          case 'T':
2030          switch (*pp++)
2031            {
2032            case '0': tables = tables0; break;
2033            case '1': tables = tables1; break;
2034    
2035            case '\r':
2036            case '\n':
2037            case ' ':
2038            case 0:
2039            fprintf(outfile, "** Missing table number after /T\n");
2040            goto SKIP_DATA;
2041    
2042            default:
2043            fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2044            goto SKIP_DATA;
2045            }
2046          break;
2047    
2048          case 'L':
2049          ppp = pp;
2050          /* The '\r' test here is so that it works on Windows. */
2051          /* The '0' test is just in case this is an unterminated line. */
2052          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
2053          *ppp = 0;
2054          if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
2055            {
2056            fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
2057            goto SKIP_DATA;
2058            }
2059          locale_set = 1;
2060          tables = pcre_maketables();
2061          pp = ppp;
2062          break;
2063    
2064          case '>':
2065          to_file = pp;
2066          while (*pp != 0) pp++;
2067          while (isspace(pp[-1])) pp--;
2068          *pp = 0;
2069          break;
2070    
2071          case '<':
2072            {
2073            if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2074              {
2075              options |= PCRE_JAVASCRIPT_COMPAT;
2076              pp += 3;
2077              }
2078            else
2079              {
2080              int x = check_newline(pp, outfile);
2081              if (x == 0) goto SKIP_DATA;
2082              options |= x;
2083              while (*pp++ != '>');
2084              }
2085            }
2086          break;
2087    
2088          case '\r':                      /* So that it works in Windows */
2089          case '\n':
2090          case ' ':
2091          break;
2092    
2093          default:
2094          fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
2095          goto SKIP_DATA;
2096          }
2097        }
2098    
2099      /* Handle compiling via the POSIX interface, which doesn't support the
2100      timing, showing, or debugging options, nor the ability to pass over
2101      local character tables. Neither does it have 16-bit support. */
2102    
2103      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);  #if !defined NOPOSIX
2104      if (posix || do_posix)
2105        {
2106        int rc;
2107        int cflags = 0;
2108    
2109        if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
2110        if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
2111        if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
2112        if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
2113        if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
2114        if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2115        if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2116    
2117        first_gotten_store = 0;
2118        rc = regcomp(&preg, (char *)p, cflags);
2119    
2120        /* Compilation failed; go back for another re, skipping to blank line
2121        if non-interactive. */
2122    
2123        if (rc != 0)
2124          {
2125          (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2126          fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
2127          goto SKIP_DATA;
2128          }
2129        }
2130    
2131      /* Handle compiling via the native interface */
2132    
2133      else
2134    #endif  /* !defined NOPOSIX */
2135    
2136        {
2137        unsigned long int get_options;
2138    
2139        /* In 16-bit mode, convert the input. */
2140    
2141    #ifdef SUPPORT_PCRE16
2142        if (use_pcre16)
2143          {
2144          if (to16(p, options & PCRE_UTF8, (int)strlen((char *)p)) < 0)
2145            {
2146            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2147              "converted to UTF-16\n");
2148            goto SKIP_DATA;
2149            }
2150          p = (pcre_uint8 *)buffer16;
2151          }
2152    #endif
2153    
2154        /* Compile many times when timing */
2155    
2156        if (timeit > 0)
2157          {
2158          register int i;
2159          clock_t time_taken;
2160          clock_t start_time = clock();
2161          for (i = 0; i < timeit; i++)
2162            {
2163            PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2164            if (re != NULL) free(re);
2165            }
2166          time_taken = clock() - start_time;
2167          fprintf(outfile, "Compile time %.4f milliseconds\n",
2168            (((double)time_taken * 1000.0) / (double)timeit) /
2169              (double)CLOCKS_PER_SEC);
2170          }
2171    
2172        first_gotten_store = 0;
2173        PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2174    
2175      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
2176      if non-interactive. */      if non-interactive. */
# Line 517  while (!done) Line 2183  while (!done)
2183          {          {
2184          for (;;)          for (;;)
2185            {            {
2186            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (extend_inputline(infile, buffer, NULL) == NULL)
2187              {              {
2188              done = 1;              done = 1;
2189              goto CONTINUE;              goto CONTINUE;
# Line 531  while (!done) Line 2197  while (!done)
2197        goto CONTINUE;        goto CONTINUE;
2198        }        }
2199    
2200      /* Compilation succeeded; print data if required */      /* Compilation succeeded. It is now possible to set the UTF-8 option from
2201        within the regex; check for this so that we know how to process the data
2202        lines. */
2203    
2204      if (do_showinfo)      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
2205        {      if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
       int first_char, count;  
2206    
2207        if (do_debug) print_internals(re, outfile);      /* Extract the size for possible writing before possibly flipping it,
2208        and remember the store that was got. */
2209    
2210        count = pcre_info(re, &options, &first_char);      true_size = ((real_pcre *)re)->size;
2211        if (count < 0) fprintf(outfile,      regex_gotten_store = first_gotten_store;
2212          "Error %d while reading info\n", count);  
2213        else      /* Output code size information if requested */
2214    
2215        if (log_store)
2216          fprintf(outfile, "Memory allocation (code space): %d\n",
2217            (int)(first_gotten_store -
2218                  sizeof(real_pcre) -
2219                  ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
2220    
2221        /* If -s or /S was present, study the regex to generate additional info to
2222        help with the matching, unless the pattern has the SS option, which
2223        suppresses the effect of /S (used for a few test patterns where studying is
2224        never sensible). */
2225    
2226        if (do_study || (force_study >= 0 && !no_force_study))
2227          {
2228          if (timeit > 0)
2229          {          {
2230          fprintf(outfile, "Identifying subpattern count = %d\n", count);          register int i;
2231          if (options == 0) fprintf(outfile, "No options\n");          clock_t time_taken;
2232            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s\n",          clock_t start_time = clock();
2233              ((options & PCRE_ANCHORED) != 0)? " anchored" : "",          for (i = 0; i < timeit; i++)
             ((options & PCRE_CASELESS) != 0)? " caseless" : "",  
             ((options & PCRE_EXTENDED) != 0)? " extended" : "",  
             ((options & PCRE_MULTILINE) != 0)? " multiline" : "",  
             ((options & PCRE_DOTALL) != 0)? " dotall" : "",  
             ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",  
             ((options & PCRE_EXTRA) != 0)? " extra" : "",  
             ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");  
         if (first_char == -1)  
2234            {            {
2235            fprintf(outfile, "First char at start or follows \\n\n");            PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2236            }            }
2237          else if (first_char < 0)          time_taken = clock() - start_time;
2238            if (extra != NULL)
2239            {            {
2240            fprintf(outfile, "No first char\n");            PCRE_FREE_STUDY(extra);
2241            }            }
2242          else          fprintf(outfile, "  Study time %.4f milliseconds\n",
2243              (((double)time_taken * 1000.0) / (double)timeit) /
2244                (double)CLOCKS_PER_SEC);
2245            }
2246          PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2247          if (error != NULL)
2248            fprintf(outfile, "Failed to study: %s\n", error);
2249          else if (extra != NULL)
2250            {
2251            true_study_size = ((pcre_study_data *)(extra->study_data))->size;
2252            if (log_store)
2253            {            {
2254            if (isprint(first_char))            size_t jitsize;
2255              fprintf(outfile, "First char = \'%c\'\n", first_char);            new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize);
2256            else            if (jitsize != 0)
2257              fprintf(outfile, "First char = %d\n", first_char);              fprintf(outfile, "Memory allocation (JIT code): %d\n", jitsize);
2258            }            }
2259          }          }
2260        }        }
2261    
2262      /* If /S was present, study the regexp to generate additional info to      /* If /K was present, we set up for handling MARK data. */
     help with the matching. */  
2263    
2264      if (do_study)      if (do_mark)
2265        {        {
2266        if (timeit)        if (extra == NULL)
2267          {          {
2268          register int i;          extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2269          clock_t time_taken;          extra->flags = 0;
         clock_t start_time = clock();  
         for (i = 0; i < LOOPREPEAT; i++)  
           extra = pcre_study(re, study_options, &error);  
         time_taken = clock() - start_time;  
         if (extra != NULL) free(extra);  
         fprintf(outfile, "  Study time %.3f milliseconds\n",  
           ((double)time_taken * 1000.0)/  
           ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));  
2270          }          }
2271          extra->mark = &markptr;
2272          extra->flags |= PCRE_EXTRA_MARK;
2273          }
2274    
2275        extra = pcre_study(re, study_options, &error);      /* Extract and display information from the compiled data if required. */
2276        if (error != NULL)  
2277          fprintf(outfile, "Failed to study: %s\n", error);      SHOW_INFO:
2278        else if (extra == NULL)  
2279          fprintf(outfile, "Study returned NULL\n");      if (do_debug)
2280          {
2281          fprintf(outfile, "------------------------------------------------------------------\n");
2282    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2283          if (use_pcre16)
2284            pcre16_printint(re, outfile, debug_lengths);
2285          else
2286            pcre_printint(re, outfile, debug_lengths);
2287    #elif defined SUPPORT_PCRE8
2288          pcre_printint(re, outfile, debug_lengths);
2289    #else
2290          pcre16_printint(re, outfile, debug_lengths);
2291    #endif
2292          }
2293    
2294        /* We already have the options in get_options (see above) */
2295    
2296        if (do_showinfo)
2297          {
2298          unsigned long int all_options;
2299    #if !defined NOINFOCHECK
2300          int old_first_char, old_options, old_count;
2301    #endif
2302          int count, backrefmax, first_char, need_char, okpartial, jchanged,
2303            hascrorlf;
2304          int nameentrysize, namecount;
2305          const pcre_uchar *nametable;
2306    
2307          new_info(re, NULL, PCRE_INFO_SIZE, &size);
2308          new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
2309          new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
2310          new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
2311          new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
2312          new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
2313          new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
2314          new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
2315          new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
2316          new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
2317          new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
2318    
2319          /* The old, obsolete function pcre_info() works only in 8-bit mode. Check
2320          that it gives the same results as the new function. */
2321    
2322    #if !defined NOINFOCHECK
2323          if (!use_pcre16)
2324            {
2325            old_count = pcre_info(re, &old_options, &old_first_char);
2326            if (count < 0) fprintf(outfile,
2327              "Error %d from pcre_info()\n", count);
2328            else
2329              {
2330              if (old_count != count) fprintf(outfile,
2331                "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
2332                  old_count);
2333    
2334              if (old_first_char != first_char) fprintf(outfile,
2335                "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
2336                  first_char, old_first_char);
2337    
2338              if (old_options != (int)get_options) fprintf(outfile,
2339                "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
2340                  get_options, old_options);
2341              }
2342            }
2343    #endif
2344    
2345          if (size != regex_gotten_store) fprintf(outfile,
2346            "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
2347            (int)size, (int)regex_gotten_store);
2348    
2349          fprintf(outfile, "Capturing subpattern count = %d\n", count);
2350          if (backrefmax > 0)
2351            fprintf(outfile, "Max back reference = %d\n", backrefmax);
2352    
2353          if (namecount > 0)
2354            {
2355            fprintf(outfile, "Named capturing subpatterns:\n");
2356            while (namecount-- > 0)
2357              {
2358              fprintf(outfile, "  %s %*s%3d\n", nametable + 2,
2359                nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
2360                GET2(nametable, 0));
2361              nametable += nameentrysize;
2362              }
2363            }
2364    
2365          if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
2366          if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
2367    
2368          all_options = ((real_pcre *)re)->options;
2369          if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
2370    
2371          if (get_options == 0) fprintf(outfile, "No options\n");
2372            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
2373              ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
2374              ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
2375              ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
2376              ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
2377              ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
2378              ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
2379              ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
2380              ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
2381              ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
2382              ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
2383              ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
2384              ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
2385              ((get_options & PCRE_UTF8) != 0)? " utf" : "",
2386              ((get_options & PCRE_UCP) != 0)? " ucp" : "",
2387              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
2388              ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
2389              ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
2390    
2391          if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
2392    
2393          switch (get_options & PCRE_NEWLINE_BITS)
2394            {
2395            case PCRE_NEWLINE_CR:
2396            fprintf(outfile, "Forced newline sequence: CR\n");
2397            break;
2398    
2399            case PCRE_NEWLINE_LF:
2400            fprintf(outfile, "Forced newline sequence: LF\n");
2401            break;
2402    
2403            case PCRE_NEWLINE_CRLF:
2404            fprintf(outfile, "Forced newline sequence: CRLF\n");
2405            break;
2406    
2407            case PCRE_NEWLINE_ANYCRLF:
2408            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
2409            break;
2410    
2411            case PCRE_NEWLINE_ANY:
2412            fprintf(outfile, "Forced newline sequence: ANY\n");
2413            break;
2414    
2415            default:
2416            break;
2417            }
2418    
2419          if (first_char == -1)
2420            {
2421            fprintf(outfile, "First char at start or follows newline\n");
2422            }
2423          else if (first_char < 0)
2424            {
2425            fprintf(outfile, "No first char\n");
2426            }
2427          else
2428            {
2429            const char *caseless =
2430              ((((real_pcre *)re)->flags & PCRE_FCH_CASELESS) == 0)?
2431              "" : " (caseless)";
2432    
2433            if (PRINTOK(first_char))
2434              fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
2435            else
2436              {
2437              fprintf(outfile, "First char = ");
2438              pchar(first_char, outfile);
2439              fprintf(outfile, "%s\n", caseless);
2440              }
2441            }
2442    
2443          if (need_char < 0)
2444            {
2445            fprintf(outfile, "No need char\n");
2446            }
2447          else
2448            {
2449            const char *caseless =
2450              ((((real_pcre *)re)->flags & PCRE_RCH_CASELESS) == 0)?
2451              "" : " (caseless)";
2452    
2453            if (PRINTOK(need_char))
2454              fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
2455            else
2456              fprintf(outfile, "Need char = %d%s\n", need_char, caseless);
2457            }
2458    
2459        /* This looks at internal information. A bit kludgy to do it this        /* Don't output study size; at present it is in any case a fixed
2460        way, but it is useful for testing. */        value, but it varies, depending on the computer architecture, and
2461          so messes up the test suite. (And with the /F option, it might be
2462          flipped.) If study was forced by an external -s, don't show this
2463          information unless -i or -d was also present. This means that, except
2464          when auto-callouts are involved, the output from runs with and without
2465          -s should be identical. */
2466    
2467        else if (do_showinfo)        if (do_study || (force_study >= 0 && showinfo && !no_force_study))
2468          {          {
2469          real_pcre_extra *xx = (real_pcre_extra *)extra;          if (extra == NULL)
2470          if ((xx->options & PCRE_STUDY_MAPPED) == 0)            fprintf(outfile, "Study returned NULL\n");
           fprintf(outfile, "No starting character set\n");  
2471          else          else
2472            {            {
2473            int i;            pcre_uint8 *start_bits = NULL;
2474            int c = 24;            int minlength;
2475            fprintf(outfile, "Starting character set: ");  
2476            for (i = 0; i < 256; i++)            new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2477              fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2478    
2479              new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2480              if (start_bits == NULL)
2481                fprintf(outfile, "No set of starting bytes\n");
2482              else
2483              {              {
2484              if ((xx->start_bits[i/8] & (1<<(i%8))) != 0)              int i;
2485                int c = 24;
2486                fprintf(outfile, "Starting byte set: ");
2487                for (i = 0; i < 256; i++)
2488                {                {
2489                if (c > 75)                if ((start_bits[i/8] & (1<<(i&7))) != 0)
                 {  
                 fprintf(outfile, "\n  ");  
                 c = 2;  
                 }  
               if (isprint(i) && i != ' ')  
                 {  
                 fprintf(outfile, "%c ", i);  
                 c += 2;  
                 }  
               else  
2490                  {                  {
2491                  fprintf(outfile, "\\x%02x ", i);                  if (c > 75)
2492                  c += 5;                    {
2493                      fprintf(outfile, "\n  ");
2494                      c = 2;
2495                      }
2496                    if (PRINTOK(i) && i != ' ')
2497                      {
2498                      fprintf(outfile, "%c ", i);
2499                      c += 2;
2500                      }
2501                    else
2502                      {
2503                      fprintf(outfile, "\\x%02x ", i);
2504                      c += 5;
2505                      }
2506                  }                  }
2507                }                }
2508                fprintf(outfile, "\n");
2509              }              }
2510            fprintf(outfile, "\n");            }
2511    
2512            /* Show this only if the JIT was set by /S, not by -s. */
2513    
2514            if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2515              {
2516              int jit;
2517              new_info(re, extra, PCRE_INFO_JIT, &jit);
2518              if (jit)
2519                fprintf(outfile, "JIT study was successful\n");
2520              else
2521    #ifdef SUPPORT_JIT
2522                fprintf(outfile, "JIT study was not successful\n");
2523    #else
2524                fprintf(outfile, "JIT support is not available in this version of PCRE\n");
2525    #endif
2526            }            }
2527          }          }
2528        }        }
2529      }  
2530        /* If the '>' option was present, we write out the regex to a file, and
2531        that is all. The first 8 bytes of the file are the regex length and then
2532        the study length, in big-endian order. */
2533    
2534        if (to_file != NULL)
2535          {
2536          /* If the 'F' option was present, we flip the bytes of all the integer
2537          fields in the regex data block and the study block. This is to make it
2538          possible to test PCRE's handling of byte-flipped patterns, e.g. those
2539          compiled on a different architecture. */
2540    
2541          if (do_flip)
2542            {
2543            real_pcre *rre = (real_pcre *)re;
2544            rre->magic_number =
2545              byteflip(rre->magic_number, sizeof(rre->magic_number));
2546            rre->size = byteflip(rre->size, sizeof(rre->size));
2547            rre->options = byteflip(rre->options, sizeof(rre->options));
2548            rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
2549            rre->top_bracket =
2550              (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
2551            rre->top_backref =
2552              (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
2553            rre->first_char =
2554              (pcre_uint16)byteflip(rre->first_char, sizeof(rre->first_char));
2555            rre->req_char =
2556              (pcre_uint16)byteflip(rre->req_char, sizeof(rre->req_char));
2557            rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
2558              sizeof(rre->name_table_offset));
2559            rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
2560              sizeof(rre->name_entry_size));
2561            rre->name_count = (pcre_uint16)byteflip(rre->name_count,
2562              sizeof(rre->name_count));
2563    
2564            if (extra != NULL)
2565              {
2566              pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2567              rsd->size = byteflip(rsd->size, sizeof(rsd->size));
2568              rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
2569              rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
2570              }
2571            }
2572    
2573          FILE *f = fopen((char *)to_file, "wb");
2574          if (f == NULL)
2575            {
2576            fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
2577            }
2578          else
2579            {
2580            pcre_uint8 sbuf[8];
2581            sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
2582            sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
2583            sbuf[2] = (pcre_uint8)((true_size >>  8) & 255);
2584            sbuf[3] = (pcre_uint8)((true_size) & 255);
2585    
2586            sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
2587            sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
2588            sbuf[6] = (pcre_uint8)((true_study_size >>  8) & 255);
2589            sbuf[7] = (pcre_uint8)((true_study_size) & 255);
2590    
2591            if (fwrite(sbuf, 1, 8, f) < 8 ||
2592                fwrite(re, 1, true_size, f) < true_size)
2593              {
2594              fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
2595              }
2596            else
2597              {
2598              fprintf(outfile, "Compiled pattern written to %s\n", to_file);
2599    
2600              /* If there is study data, write it. */
2601    
2602              if (extra != NULL)
2603                {
2604                if (fwrite(extra->study_data, 1, true_study_size, f) <
2605                    true_study_size)
2606                  {
2607                  fprintf(outfile, "Write error on %s: %s\n", to_file,
2608                    strerror(errno));
2609                  }
2610                else fprintf(outfile, "Study data written to %s\n", to_file);
2611                }
2612              }
2613            fclose(f);
2614            }
2615    
2616          new_free(re);
2617          if (extra != NULL)
2618            {
2619            PCRE_FREE_STUDY(extra);
2620            }
2621          if (locale_set)
2622            {
2623            new_free((void *)tables);
2624            setlocale(LC_CTYPE, "C");
2625            locale_set = 0;
2626            }
2627          continue;  /* With next regex */
2628          }
2629        }        /* End of non-POSIX compile */
2630    
2631    /* Read data lines and test them */    /* Read data lines and test them */
2632    
2633    for (;;)    for (;;)
2634      {      {
2635      unsigned char *q;      pcre_uint8 *q;
2636        pcre_uint8 *bptr;
2637        int *use_offsets = offsets;
2638        int use_size_offsets = size_offsets;
2639        int callout_data = 0;
2640        int callout_data_set = 0;
2641      int count, c;      int count, c;
2642      int offsets[45];      int copystrings = 0;
2643      int size_offsets = sizeof(offsets)/sizeof(int);      int find_match_limit = default_find_match_limit;
2644        int getstrings = 0;
2645        int getlist = 0;
2646        int gmatched = 0;
2647        int start_offset = 0;
2648        int start_offset_sign = 1;
2649        int g_notempty = 0;
2650        int use_dfa = 0;
2651    
2652      options = 0;      options = 0;
2653    
2654      if (infile == stdin) printf("  data> ");      *copynames = 0;
2655      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)      *getnames = 0;
2656    
2657        copynamesptr = copynames;
2658        getnamesptr = getnames;
2659    
2660        pcre_callout = callout;
2661        first_callout = 1;
2662        last_callout_mark = NULL;
2663        callout_extra = 0;
2664        callout_count = 0;
2665        callout_fail_count = 999999;
2666        callout_fail_id = -1;
2667        show_malloc = 0;
2668    
2669        if (extra != NULL) extra->flags &=
2670          ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
2671    
2672        len = 0;
2673        for (;;)
2674        {        {
2675        done = 1;        if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2676        goto CONTINUE;          {
2677            if (len > 0)    /* Reached EOF without hitting a newline */
2678              {
2679              fprintf(outfile, "\n");
2680              break;
2681              }
2682            done = 1;
2683            goto CONTINUE;
2684            }
2685          if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2686          len = (int)strlen((char *)buffer);
2687          if (buffer[len-1] == '\n') break;
2688        }        }
     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);  
2689    
     len = (int)strlen((char *)buffer);  
2690      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
2691      buffer[len] = 0;      buffer[len] = 0;
2692      if (len == 0) break;      if (len == 0) break;
# Line 665  while (!done) Line 2694  while (!done)
2694      p = buffer;      p = buffer;
2695      while (isspace(*p)) p++;      while (isspace(*p)) p++;
2696    
2697      q = dbuffer;      bptr = q = dbuffer;
2698      while ((c = *p++) != 0)      while ((c = *p++) != 0)
2699        {        {
2700        int i = 0;        int i = 0;
2701        int n = 0;        int n = 0;
2702    
2703        if (c == '\\') switch ((c = *p++))        if (c == '\\') switch ((c = *p++))
2704          {          {
2705          case 'a': c =    7; break;          case 'a': c =    7; break;
# Line 686  while (!done) Line 2716  while (!done)
2716          c -= '0';          c -= '0';
2717          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
2718            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
2719    
2720    #if !defined NOUTF8
2721            if (use_utf && c > 255)
2722              {
2723              pcre_uint8 buff8[8];
2724              int ii, utn;
2725              utn = ord2utf8(c, buff8);
2726              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2727              c = buff8[ii];   /* Last byte */
2728              }
2729    #endif
2730          break;          break;
2731    
2732          case 'x':          case 'x':
2733    
2734            /* Handle \x{..} specially - new Perl thing for utf8 */
2735    
2736    #if !defined NOUTF8
2737            if (*p == '{')
2738              {
2739              pcre_uint8 *pt = p;
2740              c = 0;
2741    
2742              /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
2743              when isxdigit() is a macro that refers to its argument more than
2744              once. This is banned by the C Standard, but apparently happens in at
2745              least one MacOS environment. */
2746    
2747              for (pt++; isxdigit(*pt); pt++)
2748                c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
2749              if (*pt == '}')
2750                {
2751                pcre_uint8 buff8[8];
2752                int ii, utn;
2753                if (use_utf)
2754                  {
2755                  utn = ord2utf8(c, buff8);
2756                  for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2757                  c = buff8[ii];   /* Last byte */
2758                  }
2759                else
2760                 {
2761                 if (c > 255)
2762                   fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2763                     "UTF-8 mode is not enabled.\n"
2764                     "** Truncation will probably give the wrong result.\n", c);
2765                 }
2766                p = pt + 1;
2767                break;
2768                }
2769              /* Not correct form; fall through */
2770              }
2771    #endif
2772    
2773            /* Ordinary \x */
2774    
2775          c = 0;          c = 0;
2776          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
2777            {            {
2778            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
2779            p++;            p++;
2780            }            }
2781          break;          break;
2782    
2783          case 0:   /* Allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
2784          p--;          p--;
2785          continue;          continue;
2786    
2787            case '>':
2788            if (*p == '-')
2789              {
2790              start_offset_sign = -1;
2791              p++;
2792              }
2793            while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2794            start_offset *= start_offset_sign;
2795            continue;
2796    
2797          case 'A':  /* Option setting */          case 'A':  /* Option setting */
2798          options |= PCRE_ANCHORED;          options |= PCRE_ANCHORED;
2799          continue;          continue;
# Line 709  while (!done) Line 2802  while (!done)
2802          options |= PCRE_NOTBOL;          options |= PCRE_NOTBOL;
2803          continue;          continue;
2804    
2805            case 'C':
2806            if (isdigit(*p))    /* Set copy string */
2807              {
2808              while(isdigit(*p)) n = n * 10 + *p++ - '0';
2809              copystrings |= 1 << n;
2810              }
2811            else if (isalnum(*p))
2812              {
2813              pcre_uchar *npp = copynamesptr;
2814              while (isalnum(*p)) *npp++ = *p++;
2815              *npp++ = 0;
2816              *npp = 0;
2817              n = pcre_get_stringnumber(re, (char *)copynamesptr);
2818              if (n < 0)
2819                fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
2820              copynamesptr = npp;
2821              }
2822            else if (*p == '+')
2823              {
2824              callout_extra = 1;
2825              p++;
2826              }
2827            else if (*p == '-')
2828              {
2829              pcre_callout = NULL;
2830              p++;
2831              }
2832            else if (*p == '!')
2833              {
2834              callout_fail_id = 0;
2835              p++;
2836              while(isdigit(*p))
2837                callout_fail_id = callout_fail_id * 10 + *p++ - '0';
2838              callout_fail_count = 0;
2839              if (*p == '!')
2840                {
2841                p++;
2842                while(isdigit(*p))
2843                  callout_fail_count = callout_fail_count * 10 + *p++ - '0';
2844                }
2845              }
2846            else if (*p == '*')
2847              {
2848              int sign = 1;
2849              callout_data = 0;
2850              if (*(++p) == '-') { sign = -1; p++; }
2851              while(isdigit(*p))
2852                callout_data = callout_data * 10 + *p++ - '0';
2853              callout_data *= sign;
2854              callout_data_set = 1;
2855              }
2856            continue;
2857    
2858    #if !defined NODFA
2859            case 'D':
2860    #if !defined NOPOSIX
2861            if (posix || do_posix)
2862              printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
2863            else
2864    #endif
2865              use_dfa = 1;
2866            continue;
2867    #endif
2868    
2869    #if !defined NODFA
2870            case 'F':
2871            options |= PCRE_DFA_SHORTEST;
2872            continue;
2873    #endif
2874    
2875            case 'G':
2876            if (isdigit(*p))
2877              {
2878              while(isdigit(*p)) n = n * 10 + *p++ - '0';
2879              getstrings |= 1 << n;
2880              }
2881            else if (isalnum(*p))
2882              {
2883              pcre_uchar *npp = getnamesptr;
2884              while (isalnum(*p)) *npp++ = *p++;
2885              *npp++ = 0;
2886              *npp = 0;
2887              n = pcre_get_stringnumber(re, (char *)getnamesptr);
2888              if (n < 0)
2889                fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
2890              getnamesptr = npp;
2891              }
2892            continue;
2893    
2894            case 'J':
2895            while(isdigit(*p)) n = n * 10 + *p++ - '0';
2896            if (extra != NULL
2897                && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
2898                && extra->executable_jit != NULL)
2899              {
2900              if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
2901              jit_stack = pcre_jit_stack_alloc(1, n * 1024);
2902              pcre_assign_jit_stack(extra, jit_callback, jit_stack);
2903              }
2904            continue;
2905    
2906            case 'L':
2907            getlist = 1;
2908            continue;
2909    
2910            case 'M':
2911            find_match_limit = 1;
2912            continue;
2913    
2914            case 'N':
2915            if ((options & PCRE_NOTEMPTY) != 0)
2916              options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2917            else
2918              options |= PCRE_NOTEMPTY;
2919            continue;
2920    
2921          case 'O':          case 'O':
2922          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
2923          if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;          if (n > size_offsets_max)
2924              {
2925              size_offsets_max = n;
2926              free(offsets);
2927              use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
2928              if (offsets == NULL)
2929                {
2930                printf("** Failed to get %d bytes of memory for offsets vector\n",
2931                  (int)(size_offsets_max * sizeof(int)));
2932                yield = 1;
2933                goto EXIT;
2934                }
2935              }
2936            use_size_offsets = n;
2937            if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
2938            continue;
2939    
2940            case 'P':
2941            options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2942              PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2943            continue;
2944    
2945            case 'Q':
2946            while(isdigit(*p)) n = n * 10 + *p++ - '0';
2947            if (extra == NULL)
2948              {
2949              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2950              extra->flags = 0;
2951              }
2952            extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2953            extra->match_limit_recursion = n;
2954            continue;
2955    
2956            case 'q':
2957            while(isdigit(*p)) n = n * 10 + *p++ - '0';
2958            if (extra == NULL)
2959              {
2960              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2961              extra->flags = 0;
2962              }
2963            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2964            extra->match_limit = n;
2965            continue;
2966    
2967    #if !defined NODFA
2968            case 'R':
2969            options |= PCRE_DFA_RESTART;
2970            continue;
2971    #endif
2972    
2973            case 'S':
2974            show_malloc = 1;
2975            continue;
2976    
2977            case 'Y':
2978            options |= PCRE_NO_START_OPTIMIZE;
2979          continue;          continue;
2980    
2981          case 'Z':          case 'Z':
2982          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
2983          continue;          continue;
2984    
2985            case '?':
2986            options |= PCRE_NO_UTF8_CHECK;
2987            continue;
2988    
2989            case '<':
2990              {
2991              int x = check_newline(p, outfile);
2992              if (x == 0) goto NEXT_DATA;
2993              options |= x;
2994              while (*p++ != '>');
2995              }
2996            continue;
2997          }          }
2998        *q++ = c;        *q++ = c;
2999        }        }
3000      *q = 0;      *q = 0;
3001      len = q - dbuffer;      len = (int)(q - dbuffer);
3002    
3003        /* Move the data to the end of the buffer so that a read over the end of
3004        the buffer will be seen by valgrind, even if it doesn't cause a crash. If
3005        we are using the POSIX interface, we must include the terminating zero. */
3006    
3007    #if !defined NOPOSIX
3008        if (posix || do_posix)
3009          {
3010          memmove(bptr + buffer_size - len - 1, bptr, len + 1);
3011          bptr += buffer_size - len - 1;
3012          }
3013        else
3014    #endif
3015          {
3016          memmove(bptr + buffer_size - len, bptr, len);
3017          bptr += buffer_size - len;
3018          }
3019    
3020        if ((all_use_dfa || use_dfa) && find_match_limit)
3021          {
3022          printf("**Match limit not relevant for DFA matching: ignored\n");
3023          find_match_limit = 0;
3024          }
3025    
3026      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
3027      support timing. */      support timing or playing with the match limit or callout data. */
3028    
3029    #if !defined NOPOSIX
3030      if (posix || do_posix)      if (posix || do_posix)
3031        {        {
3032        int rc;        int rc;
3033        int eflags = 0;        int eflags = 0;
3034        regmatch_t pmatch[30];        regmatch_t *pmatch = NULL;
3035          if (use_size_offsets > 0)
3036            pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
3037        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
3038        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
3039          if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
3040    
3041        rc = regexec(&preg, (char *)dbuffer, sizeof(pmatch)/sizeof(regmatch_t),        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
         pmatch, eflags);  
3042    
3043        if (rc != 0)        if (rc != 0)
3044          {          {
3045          (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));          (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3046          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
3047          }          }
3048          else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
3049                  != 0)
3050            {
3051            fprintf(outfile, "Matched with REG_NOSUB\n");
3052            }
3053        else        else
3054          {          {
3055          size_t i;          size_t i;
3056          for (i = 0; i < sizeof(pmatch)/sizeof(regmatch_t); i++)          for (i = 0; i < (size_t)use_size_offsets; i++)
3057            {            {
3058            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
3059              {              {
3060              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
3061              pchars(dbuffer + pmatch[i].rm_so,              PCHARSV(dbuffer + pmatch[i].rm_so,
3062                pmatch[i].rm_eo - pmatch[i].rm_so);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3063              fprintf(outfile, "\n");              fprintf(outfile, "\n");
3064                if (do_showcaprest || (i == 0 && do_showrest))
3065                  {
3066                  fprintf(outfile, "%2d+ ", (int)i);
3067                  PCHARSV(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3068                    outfile);
3069                  fprintf(outfile, "\n");
3070                  }
3071              }              }
3072            }            }
3073          }          }
3074          free(pmatch);
3075          goto NEXT_DATA;
3076        }        }
3077    
3078      /* Handle matching via the native interface */  #endif  /* !defined NOPOSIX */
3079    
3080      else      /* Handle matching via the native interface - repeats for /g and /G */
3081    
3082    #ifdef SUPPORT_PCRE16
3083        if (use_pcre16)
3084        {        {
3085        if (timeit)        len = to16(bptr, (((real_pcre *)re)->options) & PCRE_UTF8, len);
3086          if (len < 0)
3087            {
3088            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3089              "converted to UTF-16\n");
3090            goto NEXT_DATA;
3091            }
3092          bptr = (pcre_uint8 *)buffer16;
3093          }
3094    #endif
3095    
3096        for (;; gmatched++)    /* Loop for /g or /G */
3097          {
3098          markptr = NULL;
3099    
3100          if (timeitm > 0)
3101          {          {
3102          register int i;          register int i;
3103          clock_t time_taken;          clock_t time_taken;
3104          clock_t start_time = clock();          clock_t start_time = clock();
3105          for (i = 0; i < LOOPREPEAT; i++)  
3106            count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,  #if !defined NODFA
3107              size_offsets);          if (all_use_dfa || use_dfa)
3108              {
3109              int workspace[1000];
3110              for (i = 0; i < timeitm; i++)
3111                count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
3112                  options | g_notempty, use_offsets, use_size_offsets, workspace,
3113                  sizeof(workspace)/sizeof(int));
3114              }
3115            else
3116    #endif
3117    
3118            for (i = 0; i < timeitm; i++)
3119              {
3120              PCRE_EXEC(count, re, extra, bptr, len,
3121                start_offset, options | g_notempty, use_offsets, use_size_offsets);
3122              }
3123          time_taken = clock() - start_time;          time_taken = clock() - start_time;
3124          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
3125            ((double)time_taken * 1000.0)/            (((double)time_taken * 1000.0) / (double)timeitm) /
3126            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));              (double)CLOCKS_PER_SEC);
3127            }
3128    
3129          /* If find_match_limit is set, we want to do repeated matches with
3130          varying limits in order to find the minimum value for the match limit and
3131          for the recursion limit. The match limits are relevant only to the normal
3132          running of pcre_exec(), so disable the JIT optimization. This makes it
3133          possible to run the same set of tests with and without JIT externally
3134          requested. */
3135    
3136          if (find_match_limit)
3137            {
3138            if (extra == NULL)
3139              {
3140              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3141              extra->flags = 0;
3142              }
3143            else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
3144    
3145            (void)check_match_limit(re, extra, bptr, len, start_offset,
3146              options|g_notempty, use_offsets, use_size_offsets,
3147              PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
3148              PCRE_ERROR_MATCHLIMIT, "match()");
3149    
3150            count = check_match_limit(re, extra, bptr, len, start_offset,
3151              options|g_notempty, use_offsets, use_size_offsets,
3152              PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
3153              PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
3154            }
3155    
3156          /* If callout_data is set, use the interface with additional data */
3157    
3158          else if (callout_data_set)
3159            {
3160            if (extra == NULL)
3161              {
3162              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3163              extra->flags = 0;
3164              }
3165            extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
3166            extra->callout_data = &callout_data;
3167            PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3168              options | g_notempty, use_offsets, use_size_offsets);
3169            extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
3170          }          }
3171    
3172        count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,        /* The normal case is just to do the match once, with the default
3173          size_offsets);        value of match_limit. */
3174    
3175    #if !defined NODFA
3176          else if (all_use_dfa || use_dfa)
3177            {
3178            int workspace[1000];
3179            count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
3180              options | g_notempty, use_offsets, use_size_offsets, workspace,
3181              sizeof(workspace)/sizeof(int));
3182            if (count == 0)
3183              {
3184              fprintf(outfile, "Matched, but too many subsidiary matches\n");
3185              count = use_size_offsets/2;
3186              }
3187            }
3188    #endif
3189    
3190        if (count == 0)        else
3191          {          {
3192          fprintf(outfile, "Matched, but too many substrings\n");          PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3193          count = size_offsets/3;            options | g_notempty, use_offsets, use_size_offsets);
3194            if (count == 0)
3195              {
3196              fprintf(outfile, "Matched, but too many substrings\n");
3197              count = use_size_offsets/3;
3198              }
3199          }          }
3200    
3201          /* Matched */
3202    
3203        if (count >= 0)        if (count >= 0)
3204          {          {
3205          int i;          int i, maxcount;
3206          count *= 2;  
3207          for (i = 0; i < count; i += 2)  #if !defined NODFA
3208            if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
3209    #endif
3210              maxcount = use_size_offsets/3;
3211    
3212            /* This is a check against a lunatic return value. */
3213    
3214            if (count > maxcount)
3215              {
3216              fprintf(outfile,
3217                "** PCRE error: returned count %d is too big for offset size %d\n",
3218                count, use_size_offsets);
3219              count = use_size_offsets/3;
3220              if (do_g || do_G)
3221                {
3222                fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
3223                do_g = do_G = FALSE;        /* Break g/G loop */
3224                }
3225              }
3226    
3227            /* do_allcaps requests showing of all captures in the pattern, to check
3228            unset ones at the end. */
3229    
3230            if (do_allcaps)
3231              {
3232              new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
3233              count++;   /* Allow for full match */
3234              if (count * 2 > use_size_offsets) count = use_size_offsets/2;
3235              }
3236    
3237            /* Output the captured substrings */
3238    
3239            for (i = 0; i < count * 2; i += 2)
3240            {            {
3241            if (offsets[i] < 0)            if (use_offsets[i] < 0)
3242                {
3243                if (use_offsets[i] != -1)
3244                  fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3245                    use_offsets[i], i);
3246                if (use_offsets[i+1] != -1)
3247                  fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3248                    use_offsets[i+1], i+1);
3249              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
3250                }
3251            else            else
3252              {              {
3253              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
3254              pchars(dbuffer + offsets[i], offsets[i+1] - offsets[i]);              PCHARSV(bptr + use_offsets[i],
3255                  use_offsets[i+1] - use_offsets[i], outfile);
3256                fprintf(outfile, "\n");
3257                if (do_showcaprest || (i == 0 && do_showrest))
3258                  {
3259                  fprintf(outfile, "%2d+ ", i/2);
3260                  PCHARSV(bptr + use_offsets[i+1], len - use_offsets[i+1],
3261                    outfile);
3262                  fprintf(outfile, "\n");
3263                  }
3264                }
3265              }
3266    
3267            if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
3268    
3269            for (i = 0; i < 32; i++)
3270              {
3271              if ((copystrings & (1 << i)) != 0)
3272                {
3273                char copybuffer[256];
3274                int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
3275                  i, copybuffer, sizeof(copybuffer));
3276                if (rc < 0)
3277                  fprintf(outfile, "copy substring %d failed %d\n", i, rc);
3278                else
3279                  fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
3280                }
3281              }
3282    
3283            for (copynamesptr = copynames;
3284                 *copynamesptr != 0;
3285                 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
3286              {
3287              char copybuffer[256];
3288              int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
3289                count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
3290              if (rc < 0)
3291                fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
3292              else
3293                fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);
3294              }
3295    
3296            for (i = 0; i < 32; i++)
3297              {
3298              if ((getstrings & (1 << i)) != 0)
3299                {
3300                const char *substring;
3301                int rc = pcre_get_substring((char *)bptr, use_offsets, count,
3302                  i, &substring);
3303                if (rc < 0)
3304                  fprintf(outfile, "get substring %d failed %d\n", i, rc);
3305                else
3306                  {
3307                  fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
3308                  pcre_free_substring(substring);
3309                  }
3310                }
3311              }
3312    
3313            for (getnamesptr = getnames;
3314                 *getnamesptr != 0;
3315                 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
3316              {
3317              const char *substring;
3318              int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,