/[pcre]/code/branches/pcre16/pcretest.c
ViewVC logotype

Diff of /code/branches/pcre16/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

code/trunk/pcretest.c revision 73 by nigel, Sat Feb 24 21:40:30 2007 UTC code/branches/pcre16/pcretest.c revision 813 by ph10, Tue Dec 20 14:03:16 2011 UTC
# Line 4  Line 4 
4    
5  /* This program was hacked up as a tester for PCRE. I really should have  /* This program was hacked up as a tester for PCRE. I really should have
6  written it more tidily in the first place. Will I ever learn? It has grown and  written it more tidily in the first place. Will I ever learn? It has grown and
7  been extended and consequently is now rather untidy in places. */  been extended and consequently is now rather, er, *very* untidy in places. The
8    addition of 16-bit support has made it even worse. :-(
9    
10    -----------------------------------------------------------------------------
11    Redistribution and use in source and binary forms, with or without
12    modification, are permitted provided that the following conditions are met:
13    
14        * Redistributions of source code must retain the above copyright notice,
15          this list of conditions and the following disclaimer.
16    
17        * Redistributions in binary form must reproduce the above copyright
18          notice, this list of conditions and the following disclaimer in the
19          documentation and/or other materials provided with the distribution.
20    
21        * Neither the name of the University of Cambridge nor the names of its
22          contributors may be used to endorse or promote products derived from
23          this software without specific prior written permission.
24    
25    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35    POSSIBILITY OF SUCH DAMAGE.
36    -----------------------------------------------------------------------------
37    */
38    
39    
40    #ifdef HAVE_CONFIG_H
41    #include "config.h"
42    #endif
43    
44  #include <ctype.h>  #include <ctype.h>
45  #include <stdio.h>  #include <stdio.h>
# Line 12  been extended and consequently is now ra Line 47  been extended and consequently is now ra
47  #include <stdlib.h>  #include <stdlib.h>
48  #include <time.h>  #include <time.h>
49  #include <locale.h>  #include <locale.h>
50    #include <errno.h>
51    
52    #ifdef SUPPORT_LIBREADLINE
53    #ifdef HAVE_UNISTD_H
54    #include <unistd.h>
55    #endif
56    #include <readline/readline.h>
57    #include <readline/history.h>
58    #endif
59    
60    
61    /* A number of things vary for Windows builds. Originally, pcretest opened its
62    input and output without "b"; then I was told that "b" was needed in some
63    environments, so it was added for release 5.0 to both the input and output. (It
64    makes no difference on Unix-like systems.) Later I was told that it is wrong
65    for the input on Windows. I've now abstracted the modes into two macros that
66    are set here, to make it easier to fiddle with them, and removed "b" from the
67    input mode under Windows. */
68    
69    #if defined(_WIN32) || defined(WIN32)
70    #include <io.h>                /* For _setmode() */
71    #include <fcntl.h>             /* For _O_BINARY */
72    #define INPUT_MODE   "r"
73    #define OUTPUT_MODE  "wb"
74    
75    #ifndef isatty
76    #define isatty _isatty         /* This is what Windows calls them, I'm told, */
77    #endif                         /* though in some environments they seem to   */
78                                   /* be already defined, hence the #ifndefs.    */
79    #ifndef fileno
80    #define fileno _fileno
81    #endif
82    
83    /* A user sent this fix for Borland Builder 5 under Windows. */
84    
85    #ifdef __BORLANDC__
86    #define _setmode(handle, mode) setmode(handle, mode)
87    #endif
88    
89  /* We need the internal info for displaying the results of pcre_study(). Also  /* Not Windows */
 for getting the opcodes for showing compiled code. */  
90    
91  #define PCRE_SPY        /* For Win32 build, import data, not export */  #else
92  #include "internal.h"  #include <sys/time.h>          /* These two includes are needed */
93    #include <sys/resource.h>      /* for setrlimit(). */
94    #define INPUT_MODE   "rb"
95    #define OUTPUT_MODE  "wb"
96    #endif
97    
98    
99    /* We have to include pcre_internal.h because we need the internal info for
100    displaying the results of pcre_study() and we also need to know about the
101    internal macros, structures, and other internal data values; pcretest has
102    "inside information" compared to a program that strictly follows the PCRE API.
103    
104    Although pcre_internal.h does itself include pcre.h, we explicitly include it
105    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
106    appropriately for an application, not for building PCRE. */
107    
108    #include "pcre.h"
109    #include "pcre_internal.h"
110    
111    /* The pcre_printint() function, which prints the internal form of a compiled
112    regex, is held in a separate file so that (a) it can be compiled in either
113    8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
114    when that is compiled in debug mode. */
115    
116    #ifdef SUPPORT_PCRE8
117    void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
118    #endif
119    #ifdef SUPPORT_PCRE16
120    void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
121    #endif
122    
123    /* We need access to some of the data tables that PCRE uses. So as not to have
124    to keep two copies, we include the source file here, changing the names of the
125    external symbols to prevent clashes. */
126    
127    #define _pcre_ucp_gentype      ucp_gentype
128    #define _pcre_ucp_typerange    ucp_typerange
129    #define _pcre_utf8_table1      utf8_table1
130    #define _pcre_utf8_table1_size utf8_table1_size
131    #define _pcre_utf8_table2      utf8_table2
132    #define _pcre_utf8_table3      utf8_table3
133    #define _pcre_utf8_table4      utf8_table4
134    #define _pcre_utt              utt
135    #define _pcre_utt_size         utt_size
136    #define _pcre_utt_names        utt_names
137    #define _pcre_OP_lengths       OP_lengths
138    
139    #include "pcre_tables.c"
140    
141    /* The definition of the macro PRINTABLE, which determines whether to print an
142    output character as-is or as a hex value when showing compiled patterns, is
143    the same as in the printint.src file. We uses it here in cases when the locale
144    has not been explicitly changed, so as to get consistent output from systems
145    that differ in their output from isprint() even in the "C" locale. */
146    
147    #ifdef EBCDIC
148    #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
149    #else
150    #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
151    #endif
152    
153    #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
154    
155  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
156  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 27  Makefile. */ Line 160  Makefile. */
160  #include "pcreposix.h"  #include "pcreposix.h"
161  #endif  #endif
162    
163    /* It is also possible, originally for the benefit of a version that was
164    imported into Exim, to build pcretest without support for UTF8 (define NOUTF8),
165    without the interface to the DFA matcher (NODFA), and without the doublecheck
166    of the old "info" function (define NOINFOCHECK). In fact, we automatically cut
167    out the UTF8 support if PCRE is built without it. */
168    
169    #ifndef SUPPORT_UTF8
170    #ifndef NOUTF8
171    #define NOUTF8
172    #endif
173    #endif
174    
175    /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
176    for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
177    only from one place and is handled differently). I couldn't dream up any way of
178    using a single macro to do this in a generic way, because of the many different
179    argument requirements. We know that at least one of SUPPORT_PCRE8 and
180    SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
181    use these in the definitions of generic macros. */
182    
183    #ifdef SUPPORT_PCRE8
184    
185    #define PCHARS8(lv, p, len, f) \
186      lv = pchars((pcre_uint8 *)p, len, f)
187    
188    #define PCHARSV8(p, len, f) \
189      (void)pchars((pcre_uint8 *)p, len, f)
190    
191    #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
192      re = pcre_compile((char *)pat, options, error, erroffset, tables)
193    
194    #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
195        offsets, size_offsets) \
196      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
197        offsets, size_offsets)
198    
199    #define PCRE_FREE_STUDY8(extra) \
200      pcre_free_study(extra)
201    
202    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(re, extra, tables) \
203      pcre_pattern_to_host_byte_order(re, extra, tables)
204    
205    #define PCRE_STUDY8(extra, re, options, error) \
206      extra = pcre_study(re, options, error)
207    
208    #endif /* SUPPORT_PCRE8 */
209    
210    
211    #ifdef SUPPORT_PCRE16
212    
213    #define PCHARS16(lv, p, len, f) \
214      lv = pchars16((PCRE_SPTR16)p, len, f)
215    
216    #define PCHARSV16(p, len, f) \
217      (void)pchars16((PCRE_SPTR16)p, len, f)
218    
219    #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
220      re = pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, tables)
221    
222    #define PCRE_FREE_STUDY16(extra) \
223      pcre16_free_study(extra)
224    
225    #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
226        offsets, size_offsets) \
227      count = pcre16_exec(re, extra, (PCRE_SPTR16)bptr, len, start_offset, \
228        options, offsets, size_offsets)
229    
230    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(re, extra, tables) \
231      pcre16_pattern_to_host_byte_order(re, extra, tables)
232    
233    #define PCRE_STUDY16(extra, re, options, error) \
234      extra = pcre16_study(re, options, error)
235    
236    #endif /* SUPPORT_PCRE16 */
237    
238    
239    /* ----- Both modes are supported; a runtime test is needed ----- */
240    
241    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
242    
243    #define PCHARS(lv, p, len, f) \
244      if (use_pcre16) \
245        PCHARS16(lv, p, len, f); \
246      else \
247        PCHARS8(lv, p, len, f)
248    
249    #define PCHARSV(p, len, f) \
250      if (use_pcre16) \
251        PCHARSV16(p, len, f); \
252      else \
253        PCHARSV8(p, len, f)
254    
255    #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
256      if (use_pcre16) \
257        PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
258      else \
259        PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
260    
261    #define PCRE_FREE_STUDY(extra) \
262      if (use_pcre16) \
263        PCRE_FREE_STUDY16(extra); \
264      else \
265        PCRE_FREE_STUDY8(extra)
266    
267    #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
268        offsets, size_offsets) \
269      if (use_pcre16) \
270        PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
271          offsets, size_offsets); \
272      else \
273        PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
274          offsets, size_offsets)
275    
276    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(re, extra, tables) \
277      if (use_pcre16) \
278        PCRE_PATTERN_TO_HOST_BYTE_ORDER16(re, extra, tables); \
279      else \
280        PCRE_PATTERN_TO_HOST_BYTE_ORDER8(re, extra, tables)
281    
282    #define PCRE_STUDY(extra, re, options, error) \
283      if (use_pcre16) \
284        PCRE_STUDY16(extra, re, options, error); \
285      else \
286        PCRE_STUDY8(extra, re, options, error)
287    
288    /* ----- Only 8-bit mode is supported ----- */
289    
290    #elif defined SUPPORT_PCRE8
291    #define PCHARS           PCHARS8
292    #define PCHARSV          PCHARSV8
293    #define PCRE_COMPILE     PCRE_COMPILE8
294    #define PCRE_EXEC        PCRE_EXEC8
295    #define PCRE_FREE_STUDY  PCRE_FREE_STUDY8
296    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
297    #define PCRE_STUDY       PCRE_STUDY8
298    
299    /* ----- Only 16-bit mode is supported ----- */
300    
301    #else
302    #define PCHARS           PCHARS16
303    #define PCHARSV          PCHARSV16
304    #define PCRE_COMPILE     PCRE_COMPILE16
305    #define PCRE_EXEC        PCRE_EXEC16
306    #define PCRE_FREE_STUDY  PCRE_FREE_STUDY16
307    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
308    #define PCRE_STUDY       PCRE_STUDY16
309    #endif
310    
311    /* ----- End of mode-specific function call macros ----- */
312    
313    
314    /* Other parameters */
315    
316  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
317  #ifdef CLK_TCK  #ifdef CLK_TCK
318  #define CLOCKS_PER_SEC CLK_TCK  #define CLOCKS_PER_SEC CLK_TCK
# Line 35  Makefile. */ Line 321  Makefile. */
321  #endif  #endif
322  #endif  #endif
323    
324  #define LOOPREPEAT 50000  /* This is the default loop count for timing. */
325    
326  #define BUFFER_SIZE 30000  #define LOOPREPEAT 500000
 #define DBUFFER_SIZE BUFFER_SIZE  
327    
328    /* Static variables */
329    
330  static FILE *outfile;  static FILE *outfile;
331  static int log_store = 0;  static int log_store = 0;
# Line 47  static int callout_count; Line 333  static int callout_count;
333  static int callout_extra;  static int callout_extra;
334  static int callout_fail_count;  static int callout_fail_count;
335  static int callout_fail_id;  static int callout_fail_id;
336    static int debug_lengths;
337  static int first_callout;  static int first_callout;
338    static int locale_set = 0;
339  static int show_malloc;  static int show_malloc;
340  static int use_utf8;  static int use_utf;
341  static size_t gotten_store;  static size_t gotten_store;
342    static size_t first_gotten_store = 0;
343    static const unsigned char *last_callout_mark = NULL;
344    
345    /* The buffers grow automatically if very long input lines are encountered. */
346    
347  static const int utf8_table1[] = {  static int buffer_size = 50000;
348    0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};  static pcre_uint8 *buffer = NULL;
349    static pcre_uint8 *dbuffer = NULL;
350    static pcre_uint8 *pbuffer = NULL;
351    
352    /* Another buffer is needed translation to 16-bit character strings. It will
353    obtained and extended as required. */
354    
355    #ifdef SUPPORT_PCRE16
356    static int buffer16_size = 0;
357    static pcre_uint16 *buffer16 = NULL;
358    
359    /* We need the table of operator lengths that is used for 16-bit compiling, in
360    order to swap bytes in a pattern for saving/reloading testing. Luckily, the
361    data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
362    appropriately for the 16-bit world. Just as a safety check, make sure that
363    COMPILE_PCRE16 is *not* set. */
364    
365  static const int utf8_table2[] = {  #ifdef COMPILE_PCRE16
366    0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};  #error COMPILE_PCRE16 must not be set when compiling pcretest.c
367    #endif
368    
369  static const int utf8_table3[] = {  #if LINK_SIZE == 2
370    0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};  #undef LINK_SIZE
371    #define LINK_SIZE 1
372    #elif LINK_SIZE == 3 || LINK_SIZE == 4
373    #undef LINK_SIZE
374    #define LINK_SIZE 2
375    #else
376    #error LINK_SIZE must be either 2, 3, or 4
377    #endif
378    
379    static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
380    
381    #endif  /* SUPPORT_PCRE16 */
382    
383    /* If we have 8-bit support, default use_pcre16 to false; if there is also
384    16-bit support, it can be changed by an option. If there is no 8-bit support,
385    there must be 16-bit support, so default it to 1. */
386    
387    #ifdef SUPPORT_PCRE8
388    static int use_pcre16 = 0;
389    #else
390    static int use_pcre16 = 1;
391    #endif
392    
393    /* Textual explanations for runtime error codes */
394    
395    static const char *errtexts[] = {
396      NULL,  /* 0 is no error */
397      NULL,  /* NOMATCH is handled specially */
398      "NULL argument passed",
399      "bad option value",
400      "magic number missing",
401      "unknown opcode - pattern overwritten?",
402      "no more memory",
403      NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */
404      "match limit exceeded",
405      "callout error code",
406      NULL,  /* BADUTF8 is handled specially */
407      "bad UTF-8 offset",
408      NULL,  /* PARTIAL is handled specially */
409      "not used - internal error",
410      "internal error - pattern overwritten?",
411      "bad count value",
412      "item unsupported for DFA matching",
413      "backreference condition or recursion test not supported for DFA matching",
414      "match limit not supported for DFA matching",
415      "workspace size exceeded in DFA matching",
416      "too much recursion for DFA matching",
417      "recursion limit exceeded",
418      "not used - internal error",
419      "invalid combination of newline options",
420      "bad offset value",
421      NULL,  /* SHORTUTF8 is handled specially */
422      "nested recursion at the same subject position",
423      "JIT stack limit reached",
424      "pattern compiled in wrong mode (8-bit/16-bit error)"
425    };
426    
427    
428  /*************************************************  /*************************************************
429  *         Print compiled regex                   *  *         Alternate character tables             *
430  *************************************************/  *************************************************/
431    
432  /* The code for doing this is held in a separate file that is also included in  /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
433  pcre.c when it is compiled with the debug switch. It defines a function called  using the default tables of the library. However, the T option can be used to
434  print_internals(), which uses a table of opcode lengths defined by the macro  select alternate sets of tables, for different kinds of testing. Note also that
435  OP_LENGTHS, whose name must be OP_lengths. */  the L (locale) option also adjusts the tables. */
436    
437    /* This is the set of tables distributed as default with PCRE. It recognizes
438    only ASCII characters. */
439    
440    static const pcre_uint8 tables0[] = {
441    
442    /* This table is a lower casing table. */
443    
444        0,  1,  2,  3,  4,  5,  6,  7,
445        8,  9, 10, 11, 12, 13, 14, 15,
446       16, 17, 18, 19, 20, 21, 22, 23,
447       24, 25, 26, 27, 28, 29, 30, 31,
448       32, 33, 34, 35, 36, 37, 38, 39,
449       40, 41, 42, 43, 44, 45, 46, 47,
450       48, 49, 50, 51, 52, 53, 54, 55,
451       56, 57, 58, 59, 60, 61, 62, 63,
452       64, 97, 98, 99,100,101,102,103,
453      104,105,106,107,108,109,110,111,
454      112,113,114,115,116,117,118,119,
455      120,121,122, 91, 92, 93, 94, 95,
456       96, 97, 98, 99,100,101,102,103,
457      104,105,106,107,108,109,110,111,
458      112,113,114,115,116,117,118,119,
459      120,121,122,123,124,125,126,127,
460      128,129,130,131,132,133,134,135,
461      136,137,138,139,140,141,142,143,
462      144,145,146,147,148,149,150,151,
463      152,153,154,155,156,157,158,159,
464      160,161,162,163,164,165,166,167,
465      168,169,170,171,172,173,174,175,
466      176,177,178,179,180,181,182,183,
467      184,185,186,187,188,189,190,191,
468      192,193,194,195,196,197,198,199,
469      200,201,202,203,204,205,206,207,
470      208,209,210,211,212,213,214,215,
471      216,217,218,219,220,221,222,223,
472      224,225,226,227,228,229,230,231,
473      232,233,234,235,236,237,238,239,
474      240,241,242,243,244,245,246,247,
475      248,249,250,251,252,253,254,255,
476    
477    /* This table is a case flipping table. */
478    
479        0,  1,  2,  3,  4,  5,  6,  7,
480        8,  9, 10, 11, 12, 13, 14, 15,
481       16, 17, 18, 19, 20, 21, 22, 23,
482       24, 25, 26, 27, 28, 29, 30, 31,
483       32, 33, 34, 35, 36, 37, 38, 39,
484       40, 41, 42, 43, 44, 45, 46, 47,
485       48, 49, 50, 51, 52, 53, 54, 55,
486       56, 57, 58, 59, 60, 61, 62, 63,
487       64, 97, 98, 99,100,101,102,103,
488      104,105,106,107,108,109,110,111,
489      112,113,114,115,116,117,118,119,
490      120,121,122, 91, 92, 93, 94, 95,
491       96, 65, 66, 67, 68, 69, 70, 71,
492       72, 73, 74, 75, 76, 77, 78, 79,
493       80, 81, 82, 83, 84, 85, 86, 87,
494       88, 89, 90,123,124,125,126,127,
495      128,129,130,131,132,133,134,135,
496      136,137,138,139,140,141,142,143,
497      144,145,146,147,148,149,150,151,
498      152,153,154,155,156,157,158,159,
499      160,161,162,163,164,165,166,167,
500      168,169,170,171,172,173,174,175,
501      176,177,178,179,180,181,182,183,
502      184,185,186,187,188,189,190,191,
503      192,193,194,195,196,197,198,199,
504      200,201,202,203,204,205,206,207,
505      208,209,210,211,212,213,214,215,
506      216,217,218,219,220,221,222,223,
507      224,225,226,227,228,229,230,231,
508      232,233,234,235,236,237,238,239,
509      240,241,242,243,244,245,246,247,
510      248,249,250,251,252,253,254,255,
511    
512    /* This table contains bit maps for various character classes. Each map is 32
513    bytes long and the bits run from the least significant end of each byte. The
514    classes that have their own maps are: space, xdigit, digit, upper, lower, word,
515    graph, print, punct, and cntrl. Other classes are built from combinations. */
516    
517      0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
518      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
519      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
520      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
521    
522      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
523      0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
524      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
525      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
526    
527      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
528      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
529      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
530      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
531    
532      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
533      0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
534      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
535      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
536    
537      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
538      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
539      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
540      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
541    
542      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
543      0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
544      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
545      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
546    
547      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
548      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
549      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
550      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
551    
552      0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
553      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
554      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
555      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
556    
557      0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
558      0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
559      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
560      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
561    
562      0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
563      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
564      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
565      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
566    
567    /* This table identifies various classes of character by individual bits:
568      0x01   white space character
569      0x02   letter
570      0x04   decimal digit
571      0x08   hexadecimal digit
572      0x10   alphanumeric or '_'
573      0x80   regular expression metacharacter or binary zero
574    */
575    
576  static uschar OP_lengths[] = { OP_LENGTHS };    0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
577      0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
578      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
579      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
580      0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
581      0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
582      0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
583      0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
584      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
585      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
586      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
587      0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
588      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
589      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
590      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
591      0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
592      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
593      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
594      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
595      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
596      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
597      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
598      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
599      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
600      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
601      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
602      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
603      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
604      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
605      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
606      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
607      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
608    
609    /* This is a set of tables that came orginally from a Windows user. It seems to
610    be at least an approximation of ISO 8859. In particular, there are characters
611    greater than 128 that are marked as spaces, letters, etc. */
612    
613    static const pcre_uint8 tables1[] = {
614    0,1,2,3,4,5,6,7,
615    8,9,10,11,12,13,14,15,
616    16,17,18,19,20,21,22,23,
617    24,25,26,27,28,29,30,31,
618    32,33,34,35,36,37,38,39,
619    40,41,42,43,44,45,46,47,
620    48,49,50,51,52,53,54,55,
621    56,57,58,59,60,61,62,63,
622    64,97,98,99,100,101,102,103,
623    104,105,106,107,108,109,110,111,
624    112,113,114,115,116,117,118,119,
625    120,121,122,91,92,93,94,95,
626    96,97,98,99,100,101,102,103,
627    104,105,106,107,108,109,110,111,
628    112,113,114,115,116,117,118,119,
629    120,121,122,123,124,125,126,127,
630    128,129,130,131,132,133,134,135,
631    136,137,138,139,140,141,142,143,
632    144,145,146,147,148,149,150,151,
633    152,153,154,155,156,157,158,159,
634    160,161,162,163,164,165,166,167,
635    168,169,170,171,172,173,174,175,
636    176,177,178,179,180,181,182,183,
637    184,185,186,187,188,189,190,191,
638    224,225,226,227,228,229,230,231,
639    232,233,234,235,236,237,238,239,
640    240,241,242,243,244,245,246,215,
641    248,249,250,251,252,253,254,223,
642    224,225,226,227,228,229,230,231,
643    232,233,234,235,236,237,238,239,
644    240,241,242,243,244,245,246,247,
645    248,249,250,251,252,253,254,255,
646    0,1,2,3,4,5,6,7,
647    8,9,10,11,12,13,14,15,
648    16,17,18,19,20,21,22,23,
649    24,25,26,27,28,29,30,31,
650    32,33,34,35,36,37,38,39,
651    40,41,42,43,44,45,46,47,
652    48,49,50,51,52,53,54,55,
653    56,57,58,59,60,61,62,63,
654    64,97,98,99,100,101,102,103,
655    104,105,106,107,108,109,110,111,
656    112,113,114,115,116,117,118,119,
657    120,121,122,91,92,93,94,95,
658    96,65,66,67,68,69,70,71,
659    72,73,74,75,76,77,78,79,
660    80,81,82,83,84,85,86,87,
661    88,89,90,123,124,125,126,127,
662    128,129,130,131,132,133,134,135,
663    136,137,138,139,140,141,142,143,
664    144,145,146,147,148,149,150,151,
665    152,153,154,155,156,157,158,159,
666    160,161,162,163,164,165,166,167,
667    168,169,170,171,172,173,174,175,
668    176,177,178,179,180,181,182,183,
669    184,185,186,187,188,189,190,191,
670    224,225,226,227,228,229,230,231,
671    232,233,234,235,236,237,238,239,
672    240,241,242,243,244,245,246,215,
673    248,249,250,251,252,253,254,223,
674    192,193,194,195,196,197,198,199,
675    200,201,202,203,204,205,206,207,
676    208,209,210,211,212,213,214,247,
677    216,217,218,219,220,221,222,255,
678    0,62,0,0,1,0,0,0,
679    0,0,0,0,0,0,0,0,
680    32,0,0,0,1,0,0,0,
681    0,0,0,0,0,0,0,0,
682    0,0,0,0,0,0,255,3,
683    126,0,0,0,126,0,0,0,
684    0,0,0,0,0,0,0,0,
685    0,0,0,0,0,0,0,0,
686    0,0,0,0,0,0,255,3,
687    0,0,0,0,0,0,0,0,
688    0,0,0,0,0,0,12,2,
689    0,0,0,0,0,0,0,0,
690    0,0,0,0,0,0,0,0,
691    254,255,255,7,0,0,0,0,
692    0,0,0,0,0,0,0,0,
693    255,255,127,127,0,0,0,0,
694    0,0,0,0,0,0,0,0,
695    0,0,0,0,254,255,255,7,
696    0,0,0,0,0,4,32,4,
697    0,0,0,128,255,255,127,255,
698    0,0,0,0,0,0,255,3,
699    254,255,255,135,254,255,255,7,
700    0,0,0,0,0,4,44,6,
701    255,255,127,255,255,255,127,255,
702    0,0,0,0,254,255,255,255,
703    255,255,255,255,255,255,255,127,
704    0,0,0,0,254,255,255,255,
705    255,255,255,255,255,255,255,255,
706    0,2,0,0,255,255,255,255,
707    255,255,255,255,255,255,255,127,
708    0,0,0,0,255,255,255,255,
709    255,255,255,255,255,255,255,255,
710    0,0,0,0,254,255,0,252,
711    1,0,0,248,1,0,0,120,
712    0,0,0,0,254,255,255,255,
713    0,0,128,0,0,0,128,0,
714    255,255,255,255,0,0,0,0,
715    0,0,0,0,0,0,0,128,
716    255,255,255,255,0,0,0,0,
717    0,0,0,0,0,0,0,0,
718    128,0,0,0,0,0,0,0,
719    0,1,1,0,1,1,0,0,
720    0,0,0,0,0,0,0,0,
721    0,0,0,0,0,0,0,0,
722    1,0,0,0,128,0,0,0,
723    128,128,128,128,0,0,128,0,
724    28,28,28,28,28,28,28,28,
725    28,28,0,0,0,0,0,128,
726    0,26,26,26,26,26,26,18,
727    18,18,18,18,18,18,18,18,
728    18,18,18,18,18,18,18,18,
729    18,18,18,128,128,0,128,16,
730    0,26,26,26,26,26,26,18,
731    18,18,18,18,18,18,18,18,
732    18,18,18,18,18,18,18,18,
733    18,18,18,128,128,0,0,0,
734    0,0,0,0,0,1,0,0,
735    0,0,0,0,0,0,0,0,
736    0,0,0,0,0,0,0,0,
737    0,0,0,0,0,0,0,0,
738    1,0,0,0,0,0,0,0,
739    0,0,18,0,0,0,0,0,
740    0,0,20,20,0,18,0,0,
741    0,20,18,0,0,0,0,0,
742    18,18,18,18,18,18,18,18,
743    18,18,18,18,18,18,18,18,
744    18,18,18,18,18,18,18,0,
745    18,18,18,18,18,18,18,18,
746    18,18,18,18,18,18,18,18,
747    18,18,18,18,18,18,18,18,
748    18,18,18,18,18,18,18,0,
749    18,18,18,18,18,18,18,18
750    };
751    
 #include "printint.c"  
752    
753    
754    
755    #ifndef HAVE_STRERROR
756  /*************************************************  /*************************************************
757  *          Read number from string               *  *     Provide strerror() for non-ANSI libraries  *
758  *************************************************/  *************************************************/
759    
760  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess  /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
761  around with conditional compilation, just do the job by hand. It is only used  in their libraries, but can provide the same facility by this simple
762  for unpicking the -o argument, so just keep it simple.  alternative function. */
   
 Arguments:  
   str           string to be converted  
   endptr        where to put the end pointer  
763    
764  Returns:        the unsigned long  extern int   sys_nerr;
765  */  extern char *sys_errlist[];
766    
767  static int  char *
768  get_value(unsigned char *str, unsigned char **endptr)  strerror(int n)
769  {  {
770  int result = 0;  if (n < 0 || n >= sys_nerr) return "unknown error number";
771  while(*str != 0 && isspace(*str)) str++;  return sys_errlist[n];
 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');  
 *endptr = str;  
 return(result);  
772  }  }
773    #endif /* HAVE_STRERROR */
774    
775    
776  /*************************************************  /*************************************************
777  *       Convert character value to UTF-8         *  *         JIT memory callback                    *
778  *************************************************/  *************************************************/
779    
780  /* This function takes an integer value in the range 0 - 0x7fffffff  static pcre_jit_stack* jit_callback(void *arg)
 and encodes it as a UTF-8 character in 0 to 6 bytes.  
   
 Arguments:  
   cvalue     the character value  
   buffer     pointer to buffer for result - at least 6 bytes long  
   
 Returns:     number of characters placed in the buffer  
              -1 if input character is negative  
              0 if input character is positive but too big (only when  
              int is longer than 32 bits)  
 */  
   
 static int  
 ord2utf8(int cvalue, unsigned char *buffer)  
781  {  {
782  register int i, j;  return (pcre_jit_stack *)arg;
 for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)  
   if (cvalue <= utf8_table1[i]) break;  
 if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;  
 if (cvalue < 0) return -1;  
   
 buffer += i;  
 for (j = i; j > 0; j--)  
  {  
  *buffer-- = 0x80 | (cvalue & 0x3f);  
  cvalue >>= 6;  
  }  
 *buffer = utf8_table2[i] | cvalue;  
 return i + 1;  
783  }  }
784    
785    
# Line 151  return i + 1; Line 791  return i + 1;
791  and returns the value of the character.  and returns the value of the character.
792    
793  Argument:  Argument:
794    buffer   a pointer to the byte vector    utf8bytes   a pointer to the byte vector
795    vptr     a pointer to an int to receive the value    vptr        a pointer to an int to receive the value
796    
797  Returns:   >  0 => the number of bytes consumed  Returns:      >  0 => the number of bytes consumed
798             -6 to 0 => malformed UTF-8 character at offset = (-return)                -6 to 0 => malformed UTF-8 character at offset = (-return)
799  */  */
800    
801    #if !defined NOUTF8
802    
803  static int  static int
804  utf82ord(unsigned char *buffer, int *vptr)  utf82ord(pcre_uint8 *utf8bytes, int *vptr)
805  {  {
806  int c = *buffer++;  int c = *utf8bytes++;
807  int d = c;  int d = c;
808  int i, j, s;  int i, j, s;
809    
# Line 181  d = (c & utf8_table3[i]) << s; Line 823  d = (c & utf8_table3[i]) << s;
823    
824  for (j = 0; j < i; j++)  for (j = 0; j < i; j++)
825    {    {
826    c = *buffer++;    c = *utf8bytes++;
827    if ((c & 0xc0) != 0x80) return -(j+1);    if ((c & 0xc0) != 0x80) return -(j+1);
828    s -= 6;    s -= 6;
829    d |= (c & 0x3f) << s;    d |= (c & 0x3f) << s;
# Line 189  for (j = 0; j < i; j++) Line 831  for (j = 0; j < i; j++)
831    
832  /* Check that encoding was the correct unique one */  /* Check that encoding was the correct unique one */
833    
834  for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)  for (j = 0; j < utf8_table1_size; j++)
835    if (d <= utf8_table1[j]) break;    if (d <= utf8_table1[j]) break;
836  if (j != i) return -(i+1);  if (j != i) return -(i+1);
837    
# Line 199  if (j != i) return -(i+1); Line 841  if (j != i) return -(i+1);
841  return i+1;  return i+1;
842  }  }
843    
844    #endif
845    
846    
847    
848    /*************************************************
849    *       Convert character value to UTF-8         *
850    *************************************************/
851    
852    /* This function takes an integer value in the range 0 - 0x7fffffff
853    and encodes it as a UTF-8 character in 0 to 6 bytes.
854    
855    Arguments:
856      cvalue     the character value
857      utf8bytes  pointer to buffer for result - at least 6 bytes long
858    
859    Returns:     number of characters placed in the buffer
860    */
861    
862    #if !defined NOUTF8
863    
864    static int
865    ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
866    {
867    register int i, j;
868    for (i = 0; i < utf8_table1_size; i++)
869      if (cvalue <= utf8_table1[i]) break;
870    utf8bytes += i;
871    for (j = i; j > 0; j--)
872     {
873     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
874     cvalue >>= 6;
875     }
876    *utf8bytes = utf8_table2[i] | cvalue;
877    return i + 1;
878    }
879    
880    #endif
881    
882    
883    
884    #ifdef SUPPORT_PCRE16
885    /*************************************************
886    *         Convert a string to 16-bit             *
887    *************************************************/
888    
889    /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
890    8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
891    double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
892    in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
893    result is always left in buffer16.
894    
895    Arguments:
896      p          points to a byte string
897      utf        true if UTF-8 (to be converted to UTF-16)
898      len        number of bytes in the string (excluding trailing zero)
899    
900    Returns:     number of 16-bit data items used (excluding trailing zero)
901                 OR -1 if a UTF-8 string is malformed
902    */
903    
904    static int
905    to16(pcre_uint8 *p, int utf, int len)
906    {
907    pcre_uint16 *pp;
908    
909    if (buffer16_size < 2*len + 2)
910      {
911      if (buffer16 != NULL) free(buffer16);
912      buffer16_size = 2*len + 2;
913      buffer16 = (pcre_uint16 *)malloc(buffer16_size);
914      if (buffer16 == NULL)
915        {
916        fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
917        exit(1);
918        }
919      }
920    
921    pp = buffer16;
922    
923    if (!utf)
924      {
925      while (len-- > 0) *pp++ = *p++;
926      }
927    
928    else
929      {
930      int c;
931      while (len > 0)
932        {
933        int chlen = utf82ord(p, &c);
934        if (chlen <= 0) return -1;
935        p += chlen;
936        len -= chlen;
937        if (c < 0x10000) *pp++ = c; else
938          {
939          c -= 0x10000;
940          *pp++ = 0xD800 | (c >> 10);
941          *pp++ = 0xDC00 | (c & 0x3ff);
942          }
943        }
944      }
945    
946    *pp = 0;
947    return pp - buffer16;
948    }
949    #endif
950    
951    
952    /*************************************************
953    *        Read or extend an input line            *
954    *************************************************/
955    
956    /* Input lines are read into buffer, but both patterns and data lines can be
957    continued over multiple input lines. In addition, if the buffer fills up, we
958    want to automatically expand it so as to be able to handle extremely large
959    lines that are needed for certain stress tests. When the input buffer is
960    expanded, the other two buffers must also be expanded likewise, and the
961    contents of pbuffer, which are a copy of the input for callouts, must be
962    preserved (for when expansion happens for a data line). This is not the most
963    optimal way of handling this, but hey, this is just a test program!
964    
965    Arguments:
966      f            the file to read
967      start        where in buffer to start (this *must* be within buffer)
968      prompt       for stdin or readline()
969    
970    Returns:       pointer to the start of new data
971                   could be a copy of start, or could be moved
972                   NULL if no data read and EOF reached
973    */
974    
975    static pcre_uint8 *
976    extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
977    {
978    pcre_uint8 *here = start;
979    
980    for (;;)
981      {
982      int rlen = (int)(buffer_size - (here - buffer));
983    
984      if (rlen > 1000)
985        {
986        int dlen;
987    
988        /* If libreadline support is required, use readline() to read a line if the
989        input is a terminal. Note that readline() removes the trailing newline, so
990        we must put it back again, to be compatible with fgets(). */
991    
992    #ifdef SUPPORT_LIBREADLINE
993        if (isatty(fileno(f)))
994          {
995          size_t len;
996          char *s = readline(prompt);
997          if (s == NULL) return (here == start)? NULL : start;
998          len = strlen(s);
999          if (len > 0) add_history(s);
1000          if (len > rlen - 1) len = rlen - 1;
1001          memcpy(here, s, len);
1002          here[len] = '\n';
1003          here[len+1] = 0;
1004          free(s);
1005          }
1006        else
1007    #endif
1008    
1009        /* Read the next line by normal means, prompting if the file is stdin. */
1010    
1011          {
1012          if (f == stdin) printf("%s", prompt);
1013          if (fgets((char *)here, rlen,  f) == NULL)
1014            return (here == start)? NULL : start;
1015          }
1016    
1017        dlen = (int)strlen((char *)here);
1018        if (dlen > 0 && here[dlen - 1] == '\n') return start;
1019        here += dlen;
1020        }
1021    
1022      else
1023        {
1024        int new_buffer_size = 2*buffer_size;
1025        pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1026        pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1027        pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1028    
1029        if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1030          {
1031          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1032          exit(1);
1033          }
1034    
1035        memcpy(new_buffer, buffer, buffer_size);
1036        memcpy(new_pbuffer, pbuffer, buffer_size);
1037    
1038        buffer_size = new_buffer_size;
1039    
1040        start = new_buffer + (start - buffer);
1041        here = new_buffer + (here - buffer);
1042    
1043        free(buffer);
1044        free(dbuffer);
1045        free(pbuffer);
1046    
1047        buffer = new_buffer;
1048        dbuffer = new_dbuffer;
1049        pbuffer = new_pbuffer;
1050        }
1051      }
1052    
1053    return NULL;  /* Control never gets here */
1054    }
1055    
1056    
1057    
1058    /*************************************************
1059    *          Read number from string               *
1060    *************************************************/
1061    
1062    /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1063    around with conditional compilation, just do the job by hand. It is only used
1064    for unpicking arguments, so just keep it simple.
1065    
1066    Arguments:
1067      str           string to be converted
1068      endptr        where to put the end pointer
1069    
1070    Returns:        the unsigned long
1071    */
1072    
1073    static int
1074    get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1075    {
1076    int result = 0;
1077    while(*str != 0 && isspace(*str)) str++;
1078    while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1079    *endptr = str;
1080    return(result);
1081    }
1082    
1083    
1084    
1085  /*************************************************  /*************************************************
1086  *             Print character string             *  *             Print one character                *
1087  *************************************************/  *************************************************/
1088    
1089  /* Character string printing function. Must handle UTF-8 strings in utf8  /* Print a single character either literally, or as a hex escape. */
 mode. Yields number of characters printed. If handed a NULL file, just counts  
 chars without printing. */  
1090    
1091  static int pchars(unsigned char *p, int length, FILE *f)  static int pchar(int c, FILE *f)
1092  {  {
1093  int c;  if (PRINTOK(c))
1094      {
1095      if (f != NULL) fprintf(f, "%c", c);
1096      return 1;
1097      }
1098    
1099    if (c < 0x100)
1100      {
1101      if (use_utf)
1102        {
1103        if (f != NULL) fprintf(f, "\\x{%02x}", c);
1104        return 6;
1105        }
1106      else
1107        {
1108        if (f != NULL) fprintf(f, "\\x%02x", c);
1109        return 4;
1110        }
1111      }
1112    
1113    if (f != NULL) fprintf(f, "\\x{%02x}", c);
1114    return (c <= 0x000000ff)? 6 :
1115           (c <= 0x00000fff)? 7 :
1116           (c <= 0x0000ffff)? 8 :
1117           (c <= 0x000fffff)? 9 : 10;
1118    }
1119    
1120    
1121    
1122    #ifdef SUPPORT_PCRE8
1123    /*************************************************
1124    *         Print 8-bit character string           *
1125    *************************************************/
1126    
1127    /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1128    If handed a NULL file, just counts chars without printing. */
1129    
1130    static int pchars(pcre_uint8 *p, int length, FILE *f)
1131    {
1132    int c = 0;
1133  int yield = 0;  int yield = 0;
1134    
1135  while (length-- > 0)  while (length-- > 0)
1136    {    {
1137    if (use_utf8)  #if !defined NOUTF8
1138      if (use_utf)
1139      {      {
1140      int rc = utf82ord(p, &c);      int rc = utf82ord(p, &c);
   
1141      if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */      if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
1142        {        {
1143        length -= rc - 1;        length -= rc - 1;
1144        p += rc;        p += rc;
1145        if (c < 256 && isprint(c))        yield += pchar(c, f);
         {  
         if (f != NULL) fprintf(f, "%c", c);  
         yield++;  
         }  
       else  
         {  
         int n;  
         if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);  
         yield += n;  
         }  
1146        continue;        continue;
1147        }        }
1148      }      }
1149    #endif
1150      c = *p++;
1151      yield += pchar(c, f);
1152      }
1153    
1154     /* Not UTF-8, or malformed UTF-8  */  return yield;
1155    }
1156    #endif
1157    
1158    if (isprint(c = *(p++)))  
1159      {  
1160      if (f != NULL) fprintf(f, "%c", c);  #ifdef SUPPORT_PCRE16
1161      yield++;  /*************************************************
1162      }  *           Print 16-bit character string        *
1163    else  *************************************************/
1164    
1165    /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1166    If handed a NULL file, just counts chars without printing. */
1167    
1168    static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1169    {
1170    int yield = 0;
1171    
1172    while (length-- > 0)
1173      {
1174      int c = *p++ & 0xffff;
1175    #if !defined NOUTF8
1176      if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1177      {      {
1178      if (f != NULL) fprintf(f, "\\x%02x", c);      int d = *p & 0xffff;
1179      yield += 4;      if (d >= 0xDC00 && d < 0xDFFF)
1180          {
1181          c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1182          length--;
1183          p++;
1184          }
1185      }      }
1186    #endif
1187      yield += pchar(c, f);
1188    }    }
1189    
1190  return yield;  return yield;
1191  }  }
1192    #endif
1193    
1194    
1195    
# Line 269  data is not zero. */ Line 1204  data is not zero. */
1204  static int callout(pcre_callout_block *cb)  static int callout(pcre_callout_block *cb)
1205  {  {
1206  FILE *f = (first_callout | callout_extra)? outfile : NULL;  FILE *f = (first_callout | callout_extra)? outfile : NULL;
1207  int i, pre_start, post_start;  int i, pre_start, post_start, subject_length;
1208    
1209  if (callout_extra)  if (callout_extra)
1210    {    {
# Line 283  if (callout_extra) Line 1218  if (callout_extra)
1218      else      else
1219        {        {
1220        fprintf(f, "%2d: ", i/2);        fprintf(f, "%2d: ", i/2);
1221        (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],        PCHARSV(cb->subject + cb->offset_vector[i],
1222          cb->offset_vector[i+1] - cb->offset_vector[i], f);          cb->offset_vector[i+1] - cb->offset_vector[i], f);
1223        fprintf(f, "\n");        fprintf(f, "\n");
1224        }        }
# Line 296  printed lengths of the substrings. */ Line 1231  printed lengths of the substrings. */
1231    
1232  if (f != NULL) fprintf(f, "--->");  if (f != NULL) fprintf(f, "--->");
1233    
1234  pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);  PCHARS(pre_start, cb->subject, cb->start_match, f);
1235  post_start = pchars((unsigned char *)(cb->subject + cb->start_match),  PCHARS(post_start, cb->subject + cb->start_match,
1236    cb->current_position - cb->start_match, f);    cb->current_position - cb->start_match, f);
1237    
1238  (void)pchars((unsigned char *)(cb->subject + cb->current_position),  PCHARS(subject_length, cb->subject, cb->subject_length, NULL);
1239    
1240    PCHARSV(cb->subject + cb->current_position,
1241    cb->subject_length - cb->current_position, f);    cb->subject_length - cb->current_position, f);
1242    
1243  if (f != NULL) fprintf(f, "\n");  if (f != NULL) fprintf(f, "\n");
1244    
1245  /* Always print appropriate indicators, with callout number if not already  /* Always print appropriate indicators, with callout number if not already
1246  shown */  shown. For automatic callouts, show the pattern offset. */
1247    
1248  if (callout_extra) fprintf(outfile, "    ");  if (cb->callout_number == 255)
1249    else fprintf(outfile, "%3d ", cb->callout_number);    {
1250      fprintf(outfile, "%+3d ", cb->pattern_position);
1251      if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
1252      }
1253    else
1254      {
1255      if (callout_extra) fprintf(outfile, "    ");
1256        else fprintf(outfile, "%3d ", cb->callout_number);
1257      }
1258    
1259  for (i = 0; i < pre_start; i++) fprintf(outfile, " ");  for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
1260  fprintf(outfile, "^");  fprintf(outfile, "^");
# Line 320  if (post_start > 0) Line 1265  if (post_start > 0)
1265    fprintf(outfile, "^");    fprintf(outfile, "^");
1266    }    }
1267    
1268    for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
1269      fprintf(outfile, " ");
1270    
1271    fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
1272      pbuffer + cb->pattern_position);
1273    
1274  fprintf(outfile, "\n");  fprintf(outfile, "\n");
1275  first_callout = 0;  first_callout = 0;
1276    
1277  if (cb->callout_data != NULL)  if (cb->mark != last_callout_mark)
1278    {    {
1279    int callout_data = *((int *)(cb->callout_data));    fprintf(outfile, "Latest Mark: %s\n",
1280    if (callout_data != 0)      (cb->mark == NULL)? "<unset>" : (char *)(cb->mark));
1281      last_callout_mark = cb->mark;
1282      }
1283    
1284    if (cb->callout_data != NULL)
1285      {
1286      int callout_data = *((int *)(cb->callout_data));
1287      if (callout_data != 0)
1288        {
1289        fprintf(outfile, "Callout data = %d\n", callout_data);
1290        return callout_data;
1291        }
1292      }
1293    
1294    return (cb->callout_number != callout_fail_id)? 0 :
1295           (++callout_count >= callout_fail_count)? 1 : 0;
1296    }
1297    
1298    
1299    /*************************************************
1300    *            Local malloc functions              *
1301    *************************************************/
1302    
1303    /* Alternative malloc function, to test functionality and save the size of a
1304    compiled re, which is the first store request that pcre_compile() makes. The
1305    show_malloc variable is set only during matching. */
1306    
1307    static void *new_malloc(size_t size)
1308    {
1309    void *block = malloc(size);
1310    gotten_store = size;
1311    if (first_gotten_store == 0) first_gotten_store = size;
1312    if (show_malloc)
1313      fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
1314    return block;
1315    }
1316    
1317    static void new_free(void *block)
1318    {
1319    if (show_malloc)
1320      fprintf(outfile, "free             %p\n", block);
1321    free(block);
1322    }
1323    
1324    /* For recursion malloc/free, to test stacking calls */
1325    
1326    static void *stack_malloc(size_t size)
1327    {
1328    void *block = malloc(size);
1329    if (show_malloc)
1330      fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1331    return block;
1332    }
1333    
1334    static void stack_free(void *block)
1335    {
1336    if (show_malloc)
1337      fprintf(outfile, "stack_free       %p\n", block);
1338    free(block);
1339    }
1340    
1341    
1342    /*************************************************
1343    *          Call pcre_fullinfo()                  *
1344    *************************************************/
1345    
1346    /* Get one piece of information from the pcre_fullinfo() function. When only
1347    one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1348    value, but the code is defensive. */
1349    
1350    static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1351    {
1352    int rc;
1353    
1354    if (use_pcre16)
1355    #ifdef SUPPORT_PCRE16
1356      rc = pcre16_fullinfo(re, study, option, ptr);
1357    #else
1358      rc = PCRE_ERROR_BADMODE;
1359    #endif
1360    else
1361    #ifdef SUPPORT_PCRE8
1362      rc = pcre_fullinfo(re, study, option, ptr);
1363    #else
1364      rc = PCRE_ERROR_BADMODE;
1365    #endif
1366    
1367    if (rc < 0) fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1368      use_pcre16? "16" : "", option);
1369    }
1370    
1371    
1372    
1373    /*************************************************
1374    *             Swap byte functions                *
1375    *************************************************/
1376    
1377    /* The following functions swap the bytes of a pcre_uint16
1378    and pcre_uint32 value.
1379    
1380    Arguments:
1381      value        any number
1382    
1383    Returns:       the byte swapped value
1384    */
1385    
1386    static pcre_uint32
1387    swap_uint32(pcre_uint32 value)
1388    {
1389    return ((value & 0x000000ff) << 24) |
1390           ((value & 0x0000ff00) <<  8) |
1391           ((value & 0x00ff0000) >>  8) |
1392           (value >> 24);
1393    }
1394    
1395    static pcre_uint16
1396    swap_uint16(pcre_uint16 value)
1397    {
1398    return (value >> 8) | (value << 8);
1399    }
1400    
1401    
1402    
1403    /*************************************************
1404    *        Flip bytes in a compiled pattern        *
1405    *************************************************/
1406    
1407    /* This function is called if the 'F' option was present on a pattern that is
1408    to be written to a file. We flip the bytes of all the integer fields in the
1409    regex data block and the study block. In 16-bit mode this also flips relevant
1410    bytes in the pattern itself. This is to make it possible to test PCRE's
1411    ability to reload byte-flipped patterns, e.g. those compiled on a different
1412    architecture. */
1413    
1414    static void
1415    regexflip(pcre *ere, pcre_extra *extra)
1416    {
1417    real_pcre *re = (real_pcre *)ere;
1418    int op;
1419    
1420    #ifdef SUPPORT_PCRE16
1421    pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1422    int length = re->name_count * re->name_entry_size;
1423    #ifdef SUPPORT_UTF
1424    BOOL utf = (re->options & PCRE_UTF16) != 0;
1425    BOOL utf16_char = FALSE;
1426    #endif /* SUPPORT_UTF */
1427    #endif /* SUPPORT_PCRE16 */
1428    
1429    /* Always flip the bytes in the main data block and study blocks. */
1430    
1431    re->magic_number = REVERSED_MAGIC_NUMBER;
1432    re->size = swap_uint32(re->size);
1433    re->options = swap_uint32(re->options);
1434    re->flags = swap_uint16(re->flags);
1435    re->top_bracket = swap_uint16(re->top_bracket);
1436    re->top_backref = swap_uint16(re->top_backref);
1437    re->first_char = swap_uint16(re->first_char);
1438    re->req_char = swap_uint16(re->req_char);
1439    re->name_table_offset = swap_uint16(re->name_table_offset);
1440    re->name_entry_size = swap_uint16(re->name_entry_size);
1441    re->name_count = swap_uint16(re->name_count);
1442    
1443    if (extra != NULL)
1444      {
1445      pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1446      rsd->size = swap_uint32(rsd->size);
1447      rsd->flags = swap_uint32(rsd->flags);
1448      rsd->minlength = swap_uint32(rsd->minlength);
1449      }
1450    
1451    /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1452    in the name table, if present, and then in the pattern itself. */
1453    
1454    #ifdef SUPPORT_PCRE16
1455    if (!use_pcre16) return;
1456    
1457    while(TRUE)
1458      {
1459      /* Swap previous characters. */
1460      while (length-- > 0)
1461        {
1462        *ptr = swap_uint16(*ptr);
1463        ptr++;
1464        }
1465    #ifdef SUPPORT_UTF
1466      if (utf16_char)
1467        {
1468        if ((ptr[-1] & 0xfc00) == 0xd800)
1469          {
1470          /* We know that there is only one extra character in UTF-16. */
1471          *ptr = swap_uint16(*ptr);
1472          ptr++;
1473          }
1474        }
1475      utf16_char = FALSE;
1476    #endif /* SUPPORT_UTF */
1477    
1478      /* Get next opcode. */
1479    
1480      length = 0;
1481      op = *ptr;
1482      *ptr++ = swap_uint16(op);
1483    
1484      switch (op)
1485        {
1486        case OP_END:
1487        return;
1488    
1489        case OP_CHAR:
1490        case OP_CHARI:
1491        case OP_NOT:
1492        case OP_NOTI:
1493        case OP_STAR:
1494        case OP_MINSTAR:
1495        case OP_PLUS:
1496        case OP_MINPLUS:
1497        case OP_QUERY:
1498        case OP_MINQUERY:
1499        case OP_UPTO:
1500        case OP_MINUPTO:
1501        case OP_EXACT:
1502        case OP_POSSTAR:
1503        case OP_POSPLUS:
1504        case OP_POSQUERY:
1505        case OP_POSUPTO:
1506        case OP_STARI:
1507        case OP_MINSTARI:
1508        case OP_PLUSI:
1509        case OP_MINPLUSI:
1510        case OP_QUERYI:
1511        case OP_MINQUERYI:
1512        case OP_UPTOI:
1513        case OP_MINUPTOI:
1514        case OP_EXACTI:
1515        case OP_POSSTARI:
1516        case OP_POSPLUSI:
1517        case OP_POSQUERYI:
1518        case OP_POSUPTOI:
1519        case OP_NOTSTAR:
1520        case OP_NOTMINSTAR:
1521        case OP_NOTPLUS:
1522        case OP_NOTMINPLUS:
1523        case OP_NOTQUERY:
1524        case OP_NOTMINQUERY:
1525        case OP_NOTUPTO:
1526        case OP_NOTMINUPTO:
1527        case OP_NOTEXACT:
1528        case OP_NOTPOSSTAR:
1529        case OP_NOTPOSPLUS:
1530        case OP_NOTPOSQUERY:
1531        case OP_NOTPOSUPTO:
1532        case OP_NOTSTARI:
1533        case OP_NOTMINSTARI:
1534        case OP_NOTPLUSI:
1535        case OP_NOTMINPLUSI:
1536        case OP_NOTQUERYI:
1537        case OP_NOTMINQUERYI:
1538        case OP_NOTUPTOI:
1539        case OP_NOTMINUPTOI:
1540        case OP_NOTEXACTI:
1541        case OP_NOTPOSSTARI:
1542        case OP_NOTPOSPLUSI:
1543        case OP_NOTPOSQUERYI:
1544        case OP_NOTPOSUPTOI:
1545    #ifdef SUPPORT_UTF
1546        if (utf) utf16_char = TRUE;
1547    #endif
1548        length = OP_lengths16[op] - 1;
1549        break;
1550    
1551        case OP_CLASS:
1552        case OP_NCLASS:
1553        /* Skip the character bit map. */
1554        ptr += 32/sizeof(pcre_uint16);
1555        length = 0;
1556        break;
1557    
1558        case OP_XCLASS:
1559        /* Reverse the size of the XCLASS instance. */
1560        ptr++;
1561        *ptr = swap_uint16(*ptr);
1562        if (LINK_SIZE > 1)
1563          {
1564          /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
1565          ptr++;
1566          *ptr = swap_uint16(*ptr);
1567          }
1568        ptr++;
1569    
1570        if (LINK_SIZE > 1)
1571          length = ((ptr[-LINK_SIZE] << 16) | ptr[-LINK_SIZE + 1]) -
1572            (1 + LINK_SIZE + 1);
1573        else
1574          length = ptr[-LINK_SIZE] - (1 + LINK_SIZE + 1);
1575    
1576        op = *ptr;
1577        *ptr = swap_uint16(op);
1578        if ((op & XCL_MAP) != 0)
1579          {
1580          /* Skip the character bit map. */
1581          ptr += 32/sizeof(pcre_uint16);
1582          length -= 32/sizeof(pcre_uint16);
1583          }
1584        break;
1585    
1586        default:
1587        length = OP_lengths16[op] - 1;
1588        break;
1589        }
1590      }
1591    /* Control should never reach here in 16 bit mode. */
1592    #endif /* SUPPORT_PCRE16 */
1593    }
1594    
1595    
1596    
1597    /*************************************************
1598    *        Check match or recursion limit          *
1599    *************************************************/
1600    
1601    static int
1602    check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1603      int start_offset, int options, int *use_offsets, int use_size_offsets,
1604      int flag, unsigned long int *limit, int errnumber, const char *msg)
1605    {
1606    int count;
1607    int min = 0;
1608    int mid = 64;
1609    int max = -1;
1610    
1611    extra->flags |= flag;
1612    
1613    for (;;)
1614      {
1615      *limit = mid;
1616    
1617      PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
1618        use_offsets, use_size_offsets);
1619    
1620      if (count == errnumber)
1621        {
1622        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1623        min = mid;
1624        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1625        }
1626    
1627      else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1628                             count == PCRE_ERROR_PARTIAL)
1629      {      {
1630      fprintf(outfile, "Callout data = %d\n", callout_data);      if (mid == min + 1)
1631      return callout_data;        {
1632          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1633          break;
1634          }
1635        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1636        max = mid;
1637        mid = (min + mid)/2;
1638      }      }
1639      else break;    /* Some other error */
1640    }    }
1641    
1642  return (cb->callout_number != callout_fail_id)? 0 :  extra->flags &= ~flag;
1643         (++callout_count >= callout_fail_count)? 1 : 0;  return count;
1644  }  }
1645    
1646    
1647    
1648  /*************************************************  /*************************************************
1649  *            Local malloc functions              *  *         Case-independent strncmp() function    *
1650  *************************************************/  *************************************************/
1651    
1652  /* Alternative malloc function, to test functionality and show the size of the  /*
1653  compiled re. */  Arguments:
1654      s         first string
1655      t         second string
1656      n         number of characters to compare
1657    
1658  static void *new_malloc(size_t size)  Returns:    < 0, = 0, or > 0, according to the comparison
1659  {  */
 void *block = malloc(size);  
 gotten_store = size;  
 if (show_malloc)  
   fprintf(outfile, "malloc       %3d %p\n", size, block);  
 return block;  
 }  
1660    
1661  static void new_free(void *block)  static int
1662    strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
1663  {  {
1664  if (show_malloc)  while (n--)
1665    fprintf(outfile, "free             %p\n", block);    {
1666  free(block);    int c = tolower(*s++) - tolower(*t++);
1667      if (c) return c;
1668      }
1669    return 0;
1670  }  }
1671    
1672    
 /* For recursion malloc/free, to test stacking calls */  
1673    
1674  static void *stack_malloc(size_t size)  /*************************************************
1675  {  *         Check newline indicator                *
1676  void *block = malloc(size);  *************************************************/
 if (show_malloc)  
   fprintf(outfile, "stack_malloc %3d %p\n", size, block);  
 return block;  
 }  
1677    
1678  static void stack_free(void *block)  /* This is used both at compile and run-time to check for <xxx> escapes. Print
1679    a message and return 0 if there is no match.
1680    
1681    Arguments:
1682      p           points after the leading '<'
1683      f           file for error message
1684    
1685    Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
1686    */
1687    
1688    static int
1689    check_newline(pcre_uint8 *p, FILE *f)
1690  {  {
1691  if (show_malloc)  if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1692    fprintf(outfile, "stack_free       %p\n", block);  if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1693  free(block);  if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1694    if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1695    if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1696    if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1697    if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1698    fprintf(f, "Unknown newline type at: <%s\n", p);
1699    return 0;
1700  }  }
1701    
1702    
1703    
1704  /*************************************************  /*************************************************
1705  *          Call pcre_fullinfo()                  *  *             Usage function                     *
1706  *************************************************/  *************************************************/
1707    
1708  /* Get one piece of information from the pcre_fullinfo() function */  static void
1709    usage(void)
 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)  
1710  {  {
1711  int rc;  printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
1712  if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)  printf("Input and output default to stdin and stdout.\n");
1713    fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);  #ifdef SUPPORT_LIBREADLINE
1714    printf("If input is a terminal, readline() is used to read from it.\n");
1715    #else
1716    printf("This version of pcretest is not linked with readline().\n");
1717    #endif
1718    printf("\nOptions:\n");
1719    #ifdef SUPPORT_PCRE16
1720    printf("  -16      use 16-bit interface\n");
1721    #endif
1722    printf("  -b       show compiled code (bytecode)\n");
1723    printf("  -C       show PCRE compile-time options and exit\n");
1724    printf("  -d       debug: show compiled code and information (-b and -i)\n");
1725    #if !defined NODFA
1726    printf("  -dfa     force DFA matching for all subjects\n");
1727    #endif
1728    printf("  -help    show usage information\n");
1729    printf("  -i       show information about compiled patterns\n"
1730           "  -M       find MATCH_LIMIT minimum for each subject\n"
1731           "  -m       output memory used information\n"
1732           "  -o <n>   set size of offsets vector to <n>\n");
1733    #if !defined NOPOSIX
1734    printf("  -p       use POSIX interface\n");
1735    #endif
1736    printf("  -q       quiet: do not output PCRE version number at start\n");
1737    printf("  -S <n>   set stack size to <n> megabytes\n");
1738    printf("  -s       force each pattern to be studied at basic level\n"
1739           "  -s+      force each pattern to be studied, using JIT if available\n"
1740           "  -t       time compilation and execution\n");
1741    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
1742    printf("  -tm      time execution (matching) only\n");
1743    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
1744  }  }
1745    
1746    
# Line 408  int main(int argc, char **argv) Line 1758  int main(int argc, char **argv)
1758  FILE *infile = stdin;  FILE *infile = stdin;
1759  int options = 0;  int options = 0;
1760  int study_options = 0;  int study_options = 0;
1761    int default_find_match_limit = FALSE;
1762  int op = 1;  int op = 1;
1763  int timeit = 0;  int timeit = 0;
1764    int timeitm = 0;
1765  int showinfo = 0;  int showinfo = 0;
1766  int showstore = 0;  int showstore = 0;
1767    int force_study = -1;
1768    int force_study_options = 0;
1769    int quiet = 0;
1770  int size_offsets = 45;  int size_offsets = 45;
1771  int size_offsets_max;  int size_offsets_max;
1772  int *offsets;  int *offsets = NULL;
1773  #if !defined NOPOSIX  #if !defined NOPOSIX
1774  int posix = 0;  int posix = 0;
1775  #endif  #endif
1776  int debug = 0;  int debug = 0;
1777  int done = 0;  int done = 0;
1778    int all_use_dfa = 0;
1779    int yield = 0;
1780    int stack_size;
1781    
1782    pcre_jit_stack *jit_stack = NULL;
1783    
1784    /* These vectors store, end-to-end, a list of captured substring names. Assume
1785    that 1024 is plenty long enough for the few names we'll be testing. */
1786    
1787  unsigned char *buffer;  pcre_uchar copynames[1024];
1788  unsigned char *dbuffer;  pcre_uchar getnames[1024];
1789    
1790  /* Get buffers from malloc() so that Electric Fence will check their misuse  pcre_uchar *copynamesptr;
1791  when I am debugging. */  pcre_uchar *getnamesptr;
1792    
1793  buffer = (unsigned char *)malloc(BUFFER_SIZE);  /* Get buffers from malloc() so that valgrind will check their misuse when
1794  dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);  debugging. They grow automatically when very long lines are read. The 16-bit
1795    buffer (buffer16) is obtained only if needed. */
1796    
1797  /* Static so that new_malloc can use it. */  buffer = (pcre_uint8 *)malloc(buffer_size);
1798    dbuffer = (pcre_uint8 *)malloc(buffer_size);
1799    pbuffer = (pcre_uint8 *)malloc(buffer_size);
1800    
1801    /* The outfile variable is static so that new_malloc can use it. */
1802    
1803  outfile = stdout;  outfile = stdout;
1804    
1805    /* The following  _setmode() stuff is some Windows magic that tells its runtime
1806    library to translate CRLF into a single LF character. At least, that's what
1807    I've been told: never having used Windows I take this all on trust. Originally
1808    it set 0x8000, but then I was advised that _O_BINARY was better. */
1809    
1810    #if defined(_WIN32) || defined(WIN32)
1811    _setmode( _fileno( stdout ), _O_BINARY );
1812    #endif
1813    
1814  /* Scan options */  /* Scan options */
1815    
1816  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
1817    {    {
1818    unsigned char *endptr;    pcre_uint8 *endptr;
1819    
1820      if (strcmp(argv[op], "-m") == 0) showstore = 1;
1821      else if (strcmp(argv[op], "-s") == 0) force_study = 0;
1822      else if (strcmp(argv[op], "-s+") == 0)
1823        {
1824        force_study = 1;
1825        force_study_options = PCRE_STUDY_JIT_COMPILE;
1826        }
1827    #ifdef SUPPORT_PCRE16
1828      else if (strcmp(argv[op], "-16") == 0) use_pcre16 = 1;
1829    #endif
1830    
1831    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1832      showstore = 1;    else if (strcmp(argv[op], "-b") == 0) debug = 1;
   else if (strcmp(argv[op], "-t") == 0) timeit = 1;  
1833    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1834    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1835      else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1836    #if !defined NODFA
1837      else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1838    #endif
1839    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1840        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),        ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
1841            *endptr == 0))
1842        {
1843        op++;
1844        argc--;
1845        }
1846      else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
1847        {
1848        int both = argv[op][2] == 0;
1849        int temp;
1850        if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
1851                         *endptr == 0))
1852          {
1853          timeitm = temp;
1854          op++;
1855          argc--;
1856          }
1857        else timeitm = LOOPREPEAT;
1858        if (both) timeit = timeitm;
1859        }
1860      else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1861          ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
1862          *endptr == 0))          *endptr == 0))
1863      {      {
1864    #if defined(_WIN32) || defined(WIN32) || defined(__minix)
1865        printf("PCRE: -S not supported on this OS\n");
1866        exit(1);
1867    #else
1868        int rc;
1869        struct rlimit rlim;
1870        getrlimit(RLIMIT_STACK, &rlim);
1871        rlim.rlim_cur = stack_size * 1024 * 1024;
1872        rc = setrlimit(RLIMIT_STACK, &rlim);
1873        if (rc != 0)
1874          {
1875        printf("PCRE: setrlimit() failed with error %d\n", rc);
1876        exit(1);
1877          }
1878      op++;      op++;
1879      argc--;      argc--;
1880    #endif
1881      }      }
1882  #if !defined NOPOSIX  #if !defined NOPOSIX
1883    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
# Line 458  while (argc > 1 && argv[op][0] == '-') Line 1885  while (argc > 1 && argv[op][0] == '-')
1885    else if (strcmp(argv[op], "-C") == 0)    else if (strcmp(argv[op], "-C") == 0)
1886      {      {
1887      int rc;      int rc;
1888        unsigned long int lrc;
1889      printf("PCRE version %s\n", pcre_version());      printf("PCRE version %s\n", pcre_version());
1890      printf("Compiled with\n");      printf("Compiled with\n");
1891    
1892    /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
1893    are set, either both UTFs are supported or both are not supported. */
1894    
1895    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1896        printf("  8-bit and 16-bit support\n");
1897        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1898        if (rc)
1899          printf("  UTF-8 and UTF-16 support\n");
1900        else
1901          printf("  No UTF-8 or UTF-16 support\n");
1902    #elif defined SUPPORT_PCRE8
1903        printf("  8-bit support only\n");
1904      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1905      printf("  %sUTF-8 support\n", rc? "" : "No ");      printf("  %sUTF-8 support\n", rc? "" : "No ");
1906    #else
1907        printf("  16-bit support only\n");
1908        (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
1909        printf("  %sUTF-16 support\n", rc? "" : "No ");
1910    #endif
1911    
1912        (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1913        printf("  %sUnicode properties support\n", rc? "" : "No ");
1914        (void)pcre_config(PCRE_CONFIG_JIT, &rc);
1915        if (rc)
1916          printf("  Just-in-time compiler support\n");
1917        else
1918          printf("  No just-in-time compiler support\n");
1919      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1920      printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");      /* Note that these values are always the ASCII values, even
1921        in EBCDIC environments. CR is 13 and NL is 10. */
1922        printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
1923          (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1924          (rc == -2)? "ANYCRLF" :
1925          (rc == -1)? "ANY" : "???");
1926        (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1927        printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1928                                         "all Unicode newlines");
1929      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1930      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
1931      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1932      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
1933      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1934      printf("  Default match limit = %d\n", rc);      printf("  Default match limit = %ld\n", lrc);
1935        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1936        printf("  Default recursion depth limit = %ld\n", lrc);
1937      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1938      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s\n", rc? "stack" : "heap");
1939      exit(0);      goto EXIT;
1940        }
1941      else if (strcmp(argv[op], "-help") == 0 ||
1942               strcmp(argv[op], "--help") == 0)
1943        {
1944        usage();
1945        goto EXIT;
1946      }      }
1947    else    else
1948      {      {
1949      printf("** Unknown or malformed option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
1950      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");      usage();
1951      printf("  -C     show PCRE compile-time options and exit\n");      yield = 1;
1952      printf("  -d     debug: show compiled code; implies -i\n"      goto EXIT;
            "  -i     show information about compiled pattern\n"  
            "  -o <n> set size of offsets vector to <n>\n");  
 #if !defined NOPOSIX  
     printf("  -p     use POSIX interface\n");  
 #endif  
     printf("  -s     output store information\n"  
            "  -t     time compilation and execution\n");  
     return 1;  
1953      }      }
1954    op++;    op++;
1955    argc--;    argc--;
# Line 500  offsets = (int *)malloc(size_offsets_max Line 1962  offsets = (int *)malloc(size_offsets_max
1962  if (offsets == NULL)  if (offsets == NULL)
1963    {    {
1964    printf("** Failed to get %d bytes of memory for offsets vector\n",    printf("** Failed to get %d bytes of memory for offsets vector\n",
1965      size_offsets_max * sizeof(int));      (int)(size_offsets_max * sizeof(int)));
1966    return 1;    yield = 1;
1967      goto EXIT;
1968    }    }
1969    
1970  /* Sort out the input and output files */  /* Sort out the input and output files */
1971    
1972  if (argc > 1)  if (argc > 1)
1973    {    {
1974    infile = fopen(argv[op], "r");    infile = fopen(argv[op], INPUT_MODE);
1975    if (infile == NULL)    if (infile == NULL)
1976      {      {
1977      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
1978      return 1;      yield = 1;
1979        goto EXIT;
1980      }      }
1981    }    }
1982    
1983  if (argc > 2)  if (argc > 2)
1984    {    {
1985    outfile = fopen(argv[op+1], "w");    outfile = fopen(argv[op+1], OUTPUT_MODE);
1986    if (outfile == NULL)    if (outfile == NULL)
1987      {      {
1988      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
1989      return 1;      yield = 1;
1990        goto EXIT;
1991      }      }
1992    }    }
1993    
1994  /* Set alternative malloc function */  /* Set alternative malloc function */
1995    
1996    #ifdef SUPPORT_PCRE8
1997  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
1998  pcre_free = new_free;  pcre_free = new_free;
1999  pcre_stack_malloc = stack_malloc;  pcre_stack_malloc = stack_malloc;
2000  pcre_stack_free = stack_free;  pcre_stack_free = stack_free;
2001    #endif
2002    
2003    #ifdef SUPPORT_PCRE16
2004    pcre16_malloc = new_malloc;
2005    pcre16_free = new_free;
2006    pcre16_stack_malloc = stack_malloc;
2007    pcre16_stack_free = stack_free;
2008    #endif
2009    
2010  /* Heading line, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
2011    
2012  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
2013    
2014  /* Main loop */  /* Main loop */
2015    
# Line 550  while (!done) Line 2024  while (!done)
2024  #endif  #endif
2025    
2026    const char *error;    const char *error;
2027    unsigned char *p, *pp, *ppp;    pcre_uint8 *markptr;
2028    const unsigned char *tables = NULL;    pcre_uint8 *p, *pp, *ppp;
2029      pcre_uint8 *to_file = NULL;
2030      const pcre_uint8 *tables = NULL;
2031      unsigned long int true_size, true_study_size = 0;
2032      size_t size, regex_gotten_store;
2033      int do_allcaps = 0;
2034      int do_mark = 0;
2035    int do_study = 0;    int do_study = 0;
2036      int no_force_study = 0;
2037    int do_debug = debug;    int do_debug = debug;
2038    int do_G = 0;    int do_G = 0;
2039    int do_g = 0;    int do_g = 0;
2040    int do_showinfo = showinfo;    int do_showinfo = showinfo;
2041    int do_showrest = 0;    int do_showrest = 0;
2042    int erroroffset, len, delimiter;    int do_showcaprest = 0;
2043      int do_flip = 0;
2044      int erroroffset, len, delimiter, poffset;
2045    
2046    use_utf8 = 0;    use_utf = 0;
2047      debug_lengths = 1;
2048    
2049    if (infile == stdin) printf("  re> ");    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
   if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;  
2050    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2051    fflush(outfile);    fflush(outfile);
2052    
# Line 571  while (!done) Line 2054  while (!done)
2054    while (isspace(*p)) p++;    while (isspace(*p)) p++;
2055    if (*p == 0) continue;    if (*p == 0) continue;
2056    
2057    /* Get the delimiter and seek the end of the pattern; if is isn't    /* See if the pattern is to be loaded pre-compiled from a file. */
2058    complete, read more. */  
2059      if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2060        {
2061        unsigned long int magic, get_options;
2062        pcre_uint8 sbuf[8];
2063        FILE *f;
2064    
2065        p++;
2066        pp = p + (int)strlen((char *)p);
2067        while (isspace(pp[-1])) pp--;
2068        *pp = 0;
2069    
2070        f = fopen((char *)p, "rb");
2071        if (f == NULL)
2072          {
2073          fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
2074          continue;
2075          }
2076    
2077        if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2078    
2079        true_size =
2080          (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
2081        true_study_size =
2082          (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2083    
2084        re = (real_pcre *)new_malloc(true_size);
2085        regex_gotten_store = first_gotten_store;
2086    
2087        if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2088    
2089        magic = ((real_pcre *)re)->magic_number;
2090        if (magic != MAGIC_NUMBER)
2091          {
2092          if (swap_uint32(magic) == MAGIC_NUMBER)
2093            {
2094            do_flip = 1;
2095            }
2096          else
2097            {
2098            fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
2099            fclose(f);
2100            continue;
2101            }
2102          }
2103    
2104        fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2105          do_flip? " (byte-inverted)" : "", p);
2106    
2107        /* Now see if there is any following study data. */
2108    
2109        if (true_study_size != 0)
2110          {
2111          pcre_study_data *psd;
2112    
2113          extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
2114          extra->flags = PCRE_EXTRA_STUDY_DATA;
2115    
2116          psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
2117          extra->study_data = psd;
2118    
2119          if (fread(psd, 1, true_study_size, f) != true_study_size)
2120            {
2121            FAIL_READ:
2122            fprintf(outfile, "Failed to read data from %s\n", p);
2123            if (extra != NULL)
2124              {
2125              PCRE_FREE_STUDY(extra);
2126              }
2127            if (re != NULL) new_free(re);
2128            fclose(f);
2129            continue;
2130            }
2131          fprintf(outfile, "Study data loaded from %s\n", p);
2132          do_study = 1;     /* To get the data output if requested */
2133          }
2134        else fprintf(outfile, "No study data\n");
2135    
2136        /* Flip the necessary bytes. */
2137        if (do_flip)
2138          {
2139          PCRE_PATTERN_TO_HOST_BYTE_ORDER(re, extra, NULL);
2140          }
2141    
2142        /* Need to know if UTF-8 for printing data strings */
2143    
2144        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
2145        use_utf = (get_options & PCRE_UTF8) != 0;
2146    
2147        fclose(f);
2148        goto SHOW_INFO;
2149        }
2150    
2151      /* In-line pattern (the usual case). Get the delimiter and seek the end of
2152      the pattern; if it isn't complete, read more. */
2153    
2154    delimiter = *p++;    delimiter = *p++;
2155    
2156    if (isalnum(delimiter) || delimiter == '\\')    if (isalnum(delimiter) || delimiter == '\\')
2157      {      {
2158      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");      fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
2159      goto SKIP_DATA;      goto SKIP_DATA;
2160      }      }
2161    
2162    pp = p;    pp = p;
2163      poffset = (int)(p - buffer);
2164    
2165    for(;;)    for(;;)
2166      {      {
# Line 593  while (!done) Line 2171  while (!done)
2171        pp++;        pp++;
2172        }        }
2173      if (*pp != 0) break;      if (*pp != 0) break;
2174        if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
     len = BUFFER_SIZE - (pp - buffer);  
     if (len < 256)  
       {  
       fprintf(outfile, "** Expression too long - missing delimiter?\n");  
       goto SKIP_DATA;  
       }  
   
     if (infile == stdin) printf("    > ");  
     if (fgets((char *)pp, len, infile) == NULL)  
2175        {        {
2176        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
2177        done = 1;        done = 1;
# Line 611  while (!done) Line 2180  while (!done)
2180      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
2181      }      }
2182    
2183      /* The buffer may have moved while being extended; reset the start of data
2184      pointer to the correct relative point in the buffer. */
2185    
2186      p = buffer + poffset;
2187    
2188    /* If the first character after the delimiter is backslash, make    /* If the first character after the delimiter is backslash, make
2189    the pattern end with backslash. This is purely to provide a way    the pattern end with backslash. This is purely to provide a way
2190    of testing for the error message when a pattern ends with backslash. */    of testing for the error message when a pattern ends with backslash. */
2191    
2192    if (pp[1] == '\\') *pp++ = '\\';    if (pp[1] == '\\') *pp++ = '\\';
2193    
2194    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter, and save a copy of the pattern
2195      for callouts. */
2196    
2197    *pp++ = 0;    *pp++ = 0;
2198      strcpy((char *)pbuffer, (char *)p);
2199    
2200    /* Look for options after final delimiter */    /* Look for options after final delimiter */
2201    
# Line 631  while (!done) Line 2207  while (!done)
2207      {      {
2208      switch (*pp++)      switch (*pp++)
2209        {        {
2210          case 'f': options |= PCRE_FIRSTLINE; break;
2211        case 'g': do_g = 1; break;        case 'g': do_g = 1; break;
2212        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
2213        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
2214        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
2215        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
2216    
2217        case '+': do_showrest = 1; break;        case '+':
2218          if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2219          break;
2220    
2221          case '=': do_allcaps = 1; break;
2222        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
2223          case 'B': do_debug = 1; break;
2224          case 'C': options |= PCRE_AUTO_CALLOUT; break;
2225        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
2226        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
2227          case 'F': do_flip = 1; break;
2228        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
2229        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
2230          case 'J': options |= PCRE_DUPNAMES; break;
2231          case 'K': do_mark = 1; break;
2232        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
2233        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
2234    
# Line 650  while (!done) Line 2236  while (!done)
2236        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
2237  #endif  #endif
2238    
2239        case 'S': do_study = 1; break;        case 'S':
2240          if (do_study == 0)
2241            {
2242            do_study = 1;
2243            if (*pp == '+')
2244              {
2245              study_options |= PCRE_STUDY_JIT_COMPILE;
2246              pp++;
2247              }
2248            }
2249          else
2250            {
2251            do_study = 0;
2252            no_force_study = 1;
2253            }
2254          break;
2255    
2256        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
2257          case 'W': options |= PCRE_UCP; break;
2258        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
2259        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2260          case 'Z': debug_lengths = 0; break;
2261          case '8': options |= PCRE_UTF8; use_utf = 1; break;
2262        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
2263    
2264          case 'T':
2265          switch (*pp++)
2266            {
2267            case '0': tables = tables0; break;
2268            case '1': tables = tables1; break;
2269    
2270            case '\r':
2271            case '\n':
2272            case ' ':
2273            case 0:
2274            fprintf(outfile, "** Missing table number after /T\n");
2275            goto SKIP_DATA;
2276    
2277            default:
2278            fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2279            goto SKIP_DATA;
2280            }
2281          break;
2282    
2283        case 'L':        case 'L':
2284        ppp = pp;        ppp = pp;
2285        while (*ppp != '\n' && *ppp != ' ') ppp++;        /* The '\r' test here is so that it works on Windows. */
2286          /* The '0' test is just in case this is an unterminated line. */
2287          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
2288        *ppp = 0;        *ppp = 0;
2289        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
2290          {          {
2291          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
2292          goto SKIP_DATA;          goto SKIP_DATA;
2293          }          }
2294          locale_set = 1;
2295        tables = pcre_maketables();        tables = pcre_maketables();
2296        pp = ppp;        pp = ppp;
2297        break;        break;
2298    
2299        case '\n': case ' ': break;        case '>':
2300          to_file = pp;
2301          while (*pp != 0) pp++;
2302          while (isspace(pp[-1])) pp--;
2303          *pp = 0;
2304          break;
2305    
2306          case '<':
2307            {
2308            if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2309              {
2310              options |= PCRE_JAVASCRIPT_COMPAT;
2311              pp += 3;
2312              }
2313            else
2314              {
2315              int x = check_newline(pp, outfile);
2316              if (x == 0) goto SKIP_DATA;
2317              options |= x;
2318              while (*pp++ != '>');
2319              }
2320            }
2321          break;
2322    
2323          case '\r':                      /* So that it works in Windows */
2324          case '\n':
2325          case ' ':
2326          break;
2327    
2328        default:        default:
2329        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
2330        goto SKIP_DATA;        goto SKIP_DATA;
# Line 678  while (!done) Line 2333  while (!done)
2333    
2334    /* Handle compiling via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
2335    timing, showing, or debugging options, nor the ability to pass over    timing, showing, or debugging options, nor the ability to pass over
2336    local character tables. */    local character tables. Neither does it have 16-bit support. */
2337    
2338  #if !defined NOPOSIX  #if !defined NOPOSIX
2339    if (posix || do_posix)    if (posix || do_posix)
2340      {      {
2341      int rc;      int rc;
2342      int cflags = 0;      int cflags = 0;
2343    
2344      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
2345      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
2346        if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
2347        if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
2348        if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
2349        if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2350        if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2351    
2352        first_gotten_store = 0;
2353      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
2354    
2355      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 694  while (!done) Line 2357  while (!done)
2357    
2358      if (rc != 0)      if (rc != 0)
2359        {        {
2360        (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);        (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2361        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
2362        goto SKIP_DATA;        goto SKIP_DATA;
2363        }        }
# Line 706  while (!done) Line 2369  while (!done)
2369  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
2370    
2371      {      {
2372      if (timeit)      unsigned long int get_options;
2373    
2374        /* In 16-bit mode, convert the input. */
2375    
2376    #ifdef SUPPORT_PCRE16
2377        if (use_pcre16)
2378          {
2379          if (to16(p, options & PCRE_UTF8, (int)strlen((char *)p)) < 0)
2380            {
2381            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2382              "converted to UTF-16\n");
2383            goto SKIP_DATA;
2384            }
2385          p = (pcre_uint8 *)buffer16;
2386          }
2387    #endif
2388    
2389        /* Compile many times when timing */
2390    
2391        if (timeit > 0)
2392        {        {
2393        register int i;        register int i;
2394        clock_t time_taken;        clock_t time_taken;
2395        clock_t start_time = clock();        clock_t start_time = clock();
2396        for (i = 0; i < LOOPREPEAT; i++)        for (i = 0; i < timeit; i++)
2397          {          {
2398          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2399          if (re != NULL) free(re);          if (re != NULL) free(re);
2400          }          }
2401        time_taken = clock() - start_time;        time_taken = clock() - start_time;
2402        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.4f milliseconds\n",
2403          (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /          (((double)time_taken * 1000.0) / (double)timeit) /
2404            (double)CLOCKS_PER_SEC);            (double)CLOCKS_PER_SEC);
2405        }        }
2406    
2407      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      first_gotten_store = 0;
2408        PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2409    
2410      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
2411      if non-interactive. */      if non-interactive. */
# Line 735  while (!done) Line 2418  while (!done)
2418          {          {
2419          for (;;)          for (;;)
2420            {            {
2421            if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)            if (extend_inputline(infile, buffer, NULL) == NULL)
2422              {              {
2423              done = 1;              done = 1;
2424              goto CONTINUE;              goto CONTINUE;
# Line 749  while (!done) Line 2432  while (!done)
2432        goto CONTINUE;        goto CONTINUE;
2433        }        }
2434    
2435      /* Compilation succeeded; print data if required. There are now two      /* Compilation succeeded. It is now possible to set the UTF-8 option from
2436      info-returning functions. The old one has a limited interface and      within the regex; check for this so that we know how to process the data
2437      returns only limited data. Check that it agrees with the newer one. */      lines. */
2438    
2439        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
2440        if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
2441    
2442        /* Extract the size for possible writing before possibly flipping it,
2443        and remember the store that was got. */
2444    
2445        true_size = ((real_pcre *)re)->size;
2446        regex_gotten_store = first_gotten_store;
2447    
2448        /* Output code size information if requested */
2449    
2450      if (log_store)      if (log_store)
2451        fprintf(outfile, "Memory allocation (code space): %d\n",        fprintf(outfile, "Memory allocation (code space): %d\n",
2452          (int)(gotten_store -          (int)(first_gotten_store -
2453                sizeof(real_pcre) -                sizeof(real_pcre) -
2454                ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));                ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
2455    
2456      if (do_showinfo)      /* If -s or /S was present, study the regex to generate additional info to
2457        help with the matching, unless the pattern has the SS option, which
2458        suppresses the effect of /S (used for a few test patterns where studying is
2459        never sensible). */
2460    
2461        if (do_study || (force_study >= 0 && !no_force_study))
2462        {        {
2463        unsigned long int get_options;        if (timeit > 0)
2464        int old_first_char, old_options, old_count;          {
2465        int count, backrefmax, first_char, need_char;          register int i;
2466        int nameentrysize, namecount;          clock_t time_taken;
2467        const uschar *nametable;          clock_t start_time = clock();
2468        size_t size;          for (i = 0; i < timeit; i++)
2469              {
2470              PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2471              }
2472            time_taken = clock() - start_time;
2473            if (extra != NULL)
2474              {
2475              PCRE_FREE_STUDY(extra);
2476              }
2477            fprintf(outfile, "  Study time %.4f milliseconds\n",
2478              (((double)time_taken * 1000.0) / (double)timeit) /
2479                (double)CLOCKS_PER_SEC);
2480            }
2481          PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2482          if (error != NULL)
2483            fprintf(outfile, "Failed to study: %s\n", error);
2484          else if (extra != NULL)
2485            {
2486            true_study_size = ((pcre_study_data *)(extra->study_data))->size;
2487            if (log_store)
2488              {
2489              size_t jitsize;
2490              new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize);
2491              if (jitsize != 0)
2492                fprintf(outfile, "Memory allocation (JIT code): %d\n", jitsize);
2493              }
2494            }
2495          }
2496    
2497        /* If /K was present, we set up for handling MARK data. */
2498    
2499        if (do_debug)      if (do_mark)
2500          {
2501          if (extra == NULL)
2502          {          {
2503          fprintf(outfile, "------------------------------------------------------------------\n");          extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2504          print_internals(re, outfile);          extra->flags = 0;
2505          }          }
2506          extra->mark = &markptr;
2507          extra->flags |= PCRE_EXTRA_MARK;
2508          }
2509    
2510        /* Extract and display information from the compiled data if required. */
2511    
2512        SHOW_INFO:
2513    
2514        if (do_debug)
2515          {
2516          fprintf(outfile, "------------------------------------------------------------------\n");
2517    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2518          if (use_pcre16)
2519            pcre16_printint(re, outfile, debug_lengths);
2520          else
2521            pcre_printint(re, outfile, debug_lengths);
2522    #elif defined SUPPORT_PCRE8
2523          pcre_printint(re, outfile, debug_lengths);
2524    #else
2525          pcre16_printint(re, outfile, debug_lengths);
2526    #endif
2527          }
2528    
2529        /* We already have the options in get_options (see above) */
2530    
2531        if (do_showinfo)
2532          {
2533          unsigned long int all_options;
2534    #if !defined NOINFOCHECK
2535          int old_first_char, old_options, old_count;
2536    #endif
2537          int count, backrefmax, first_char, need_char, okpartial, jchanged,
2538            hascrorlf;
2539          int nameentrysize, namecount;
2540          const pcre_uchar *nametable;
2541    
       new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);  
2542        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
2543        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
2544        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
# Line 783  while (!done) Line 2547  while (!done)
2547        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
2548        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
2549        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
2550          new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
2551        old_count = pcre_info(re, &old_options, &old_first_char);        new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
2552        if (count < 0) fprintf(outfile,        new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
2553          "Error %d from pcre_info()\n", count);  
2554        else        /* The old, obsolete function pcre_info() works only in 8-bit mode. Check
2555          {        that it gives the same results as the new function. */
2556          if (old_count != count) fprintf(outfile,  
2557            "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,  #if !defined NOINFOCHECK
2558              old_count);        if (!use_pcre16)
2559            {
2560          if (old_first_char != first_char) fprintf(outfile,          old_count = pcre_info(re, &old_options, &old_first_char);
2561            "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",          if (count < 0) fprintf(outfile,
2562              first_char, old_first_char);            "Error %d from pcre_info()\n", count);
2563            else
2564          if (old_options != (int)get_options) fprintf(outfile,            {
2565            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",            if (old_count != count) fprintf(outfile,
2566              get_options, old_options);              "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
2567                  old_count);
2568    
2569              if (old_first_char != first_char) fprintf(outfile,
2570                "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
2571                  first_char, old_first_char);
2572    
2573              if (old_options != (int)get_options) fprintf(outfile,
2574                "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
2575                  get_options, old_options);
2576              }
2577          }          }
2578    #endif
2579    
2580        if (size != gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
2581          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
2582          size, gotten_store);          (int)size, (int)regex_gotten_store);
2583    
2584        fprintf(outfile, "Capturing subpattern count = %d\n", count);        fprintf(outfile, "Capturing subpattern count = %d\n", count);
2585        if (backrefmax > 0)        if (backrefmax > 0)
# Line 822  while (!done) Line 2597  while (!done)
2597            }            }
2598          }          }
2599    
2600          if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
2601          if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
2602    
2603          all_options = ((real_pcre *)re)->options;
2604          if (do_flip) all_options = swap_uint32(all_options);
2605    
2606        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
2607          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
2608            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
2609            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
2610            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
2611            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
2612              ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
2613            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
2614              ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
2615              ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
2616            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
2617            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
2618            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
2619            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
2620            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");            ((get_options & PCRE_UTF8) != 0)? " utf" : "",
2621              ((get_options & PCRE_UCP) != 0)? " ucp" : "",
2622              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
2623              ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
2624              ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
2625    
2626          if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
2627    
2628          switch (get_options & PCRE_NEWLINE_BITS)
2629            {
2630            case PCRE_NEWLINE_CR:
2631            fprintf(outfile, "Forced newline sequence: CR\n");
2632            break;
2633    
2634            case PCRE_NEWLINE_LF:
2635            fprintf(outfile, "Forced newline sequence: LF\n");
2636            break;
2637    
2638        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)          case PCRE_NEWLINE_CRLF:
2639          fprintf(outfile, "Case state changes\n");          fprintf(outfile, "Forced newline sequence: CRLF\n");
2640            break;
2641    
2642            case PCRE_NEWLINE_ANYCRLF:
2643            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
2644            break;
2645    
2646            case PCRE_NEWLINE_ANY:
2647            fprintf(outfile, "Forced newline sequence: ANY\n");
2648            break;
2649    
2650            default:
2651            break;
2652            }
2653    
2654        if (first_char == -1)        if (first_char == -1)
2655          {          {
2656          fprintf(outfile, "First char at start or follows \\n\n");          fprintf(outfile, "First char at start or follows newline\n");
2657          }          }
2658        else if (first_char < 0)        else if (first_char < 0)
2659          {          {
# Line 848  while (!done) Line 2661  while (!done)
2661          }          }
2662        else        else
2663          {          {
2664          int ch = first_char & 255;          const char *caseless =
2665          const char *caseless = ((first_char & REQ_CASELESS) == 0)?            ((((real_pcre *)re)->flags & PCRE_FCH_CASELESS) == 0)?
2666            "" : " (caseless)";            "" : " (caseless)";
2667          if (isprint(ch))  
2668            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);          if (PRINTOK(first_char))
2669              fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
2670          else          else
2671            fprintf(outfile, "First char = %d%s\n", ch, caseless);            {
2672              fprintf(outfile, "First char = ");
2673              pchar(first_char, outfile);
2674              fprintf(outfile, "%s\n", caseless);
2675              }
2676          }          }
2677    
2678        if (need_char < 0)        if (need_char < 0)
# Line 863  while (!done) Line 2681  while (!done)
2681          }          }
2682        else        else
2683          {          {
2684          int ch = need_char & 255;          const char *caseless =
2685          const char *caseless = ((need_char & REQ_CASELESS) == 0)?            ((((real_pcre *)re)->flags & PCRE_RCH_CASELESS) == 0)?
2686            "" : " (caseless)";            "" : " (caseless)";
         if (isprint(ch))  
           fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);  
         else  
           fprintf(outfile, "Need char = %d%s\n", ch, caseless);  
         }  
       }  
2687    
2688      /* If /S was present, study the regexp to generate additional info to          if (PRINTOK(need_char))
2689      help with the matching. */            fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
2690            else
2691      if (do_study)            fprintf(outfile, "Need char = %d%s\n", need_char, caseless);
       {  
       if (timeit)  
         {  
         register int i;  
         clock_t time_taken;  
         clock_t start_time = clock();  
         for (i = 0; i < LOOPREPEAT; i++)  
           extra = pcre_study(re, study_options, &error);  
         time_taken = clock() - start_time;  
         if (extra != NULL) free(extra);  
         fprintf(outfile, "  Study time %.3f milliseconds\n",  
           (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /  
             (double)CLOCKS_PER_SEC);  
2692          }          }
2693    
       extra = pcre_study(re, study_options, &error);  
       if (error != NULL)  
         fprintf(outfile, "Failed to study: %s\n", error);  
       else if (extra == NULL)  
         fprintf(outfile, "Study returned NULL\n");  
   
2694        /* Don't output study size; at present it is in any case a fixed        /* Don't output study size; at present it is in any case a fixed
2695        value, but it varies, depending on the computer architecture, and        value, but it varies, depending on the computer architecture, and
2696        so messes up the test suite. */        so messes up the test suite. (And with the /F option, it might be
2697          flipped.) If study was forced by an external -s, don't show this
2698          information unless -i or -d was also present. This means that, except
2699          when auto-callouts are involved, the output from runs with and without
2700          -s should be identical. */
2701    
2702        else if (do_showinfo)        if (do_study || (force_study >= 0 && showinfo && !no_force_study))
2703          {          {
2704          size_t size;          if (extra == NULL)
2705          uschar *start_bits = NULL;            fprintf(outfile, "Study returned NULL\n");
         new_info(re, extra, PCRE_INFO_STUDYSIZE, &size);  
         new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);  
         /* fprintf(outfile, "Study size = %d\n", size); */  
         if (start_bits == NULL)  
           fprintf(outfile, "No starting character set\n");  
2706          else          else
2707            {            {
2708            int i;            pcre_uint8 *start_bits = NULL;
2709            int c = 24;            int minlength;
2710            fprintf(outfile, "Starting character set: ");  
2711            for (i = 0; i < 256; i++)            new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2712              fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2713    
2714              new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2715              if (start_bits == NULL)
2716                fprintf(outfile, "No set of starting bytes\n");
2717              else
2718              {              {
2719              if ((start_bits[i/8] & (1<<(i%8))) != 0)              int i;
2720                int c = 24;
2721                fprintf(outfile, "Starting byte set: ");
2722                for (i = 0; i < 256; i++)
2723                {                {
2724                if (c > 75)                if ((start_bits[i/8] & (1<<(i&7))) != 0)
                 {  
                 fprintf(outfile, "\n  ");  
                 c = 2;  
                 }  
               if (isprint(i) && i != ' ')  
2725                  {                  {
2726                  fprintf(outfile, "%c ", i);                  if (c > 75)
2727                  c += 2;                    {
2728                  }                    fprintf(outfile, "\n  ");
2729                else                    c = 2;
2730                  {                    }
2731                  fprintf(outfile, "\\x%02x ", i);                  if (PRINTOK(i) && i != ' ')
2732                  c += 5;                    {
2733                      fprintf(outfile, "%c ", i);
2734                      c += 2;
2735                      }
2736                    else
2737                      {
2738                      fprintf(outfile, "\\x%02x ", i);
2739                      c += 5;
2740                      }
2741                  }                  }
2742                }                }
2743                fprintf(outfile, "\n");
2744              }              }
2745            fprintf(outfile, "\n");            }
2746    
2747            /* Show this only if the JIT was set by /S, not by -s. */
2748    
2749            if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2750              {
2751              int jit;
2752              new_info(re, extra, PCRE_INFO_JIT, &jit);
2753              if (jit)
2754                fprintf(outfile, "JIT study was successful\n");
2755              else
2756    #ifdef SUPPORT_JIT
2757                fprintf(outfile, "JIT study was not successful\n");
2758    #else
2759                fprintf(outfile, "JIT support is not available in this version of PCRE\n");
2760    #endif
2761            }            }
2762          }          }
2763        }        }
2764      }  
2765        /* If the '>' option was present, we write out the regex to a file, and
2766        that is all. The first 8 bytes of the file are the regex length and then
2767        the study length, in big-endian order. */
2768    
2769        if (to_file != NULL)
2770          {
2771          FILE *f = fopen((char *)to_file, "wb");
2772          if (f == NULL)
2773            {
2774            fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
2775            }
2776          else
2777            {
2778            pcre_uint8 sbuf[8];
2779    
2780            if (do_flip) regexflip(re, extra);
2781            sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
2782            sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
2783            sbuf[2] = (pcre_uint8)((true_size >>  8) & 255);
2784            sbuf[3] = (pcre_uint8)((true_size) & 255);
2785            sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
2786            sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
2787            sbuf[6] = (pcre_uint8)((true_study_size >>  8) & 255);
2788            sbuf[7] = (pcre_uint8)((true_study_size) & 255);
2789    
2790            if (fwrite(sbuf, 1, 8, f) < 8 ||
2791                fwrite(re, 1, true_size, f) < true_size)
2792              {
2793              fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
2794              }
2795            else
2796              {
2797              fprintf(outfile, "Compiled pattern written to %s\n", to_file);
2798    
2799              /* If there is study data, write it. */
2800    
2801              if (extra != NULL)
2802                {
2803                if (fwrite(extra->study_data, 1, true_study_size, f) <
2804                    true_study_size)
2805                  {
2806                  fprintf(outfile, "Write error on %s: %s\n", to_file,
2807                    strerror(errno));
2808                  }
2809                else fprintf(outfile, "Study data written to %s\n", to_file);
2810                }
2811              }
2812            fclose(f);
2813            }
2814    
2815          new_free(re);
2816          if (extra != NULL)
2817            {
2818            PCRE_FREE_STUDY(extra);
2819            }
2820          if (locale_set)
2821            {
2822            new_free((void *)tables);
2823            setlocale(LC_CTYPE, "C");
2824            locale_set = 0;
2825            }
2826          continue;  /* With next regex */
2827          }
2828        }        /* End of non-POSIX compile */
2829    
2830    /* Read data lines and test them */    /* Read data lines and test them */
2831    
2832    for (;;)    for (;;)
2833      {      {
2834      unsigned char *q;      pcre_uint8 *q;
2835      unsigned char *bptr = dbuffer;      pcre_uint8 *bptr;
2836      int *use_offsets = offsets;      int *use_offsets = offsets;
2837      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
2838      int callout_data = 0;      int callout_data = 0;
2839      int callout_data_set = 0;      int callout_data_set = 0;
2840      int count, c;      int count, c;
2841      int copystrings = 0;      int copystrings = 0;
2842      int find_match_limit = 0;      int find_match_limit = default_find_match_limit;
2843      int getstrings = 0;      int getstrings = 0;
2844      int getlist = 0;      int getlist = 0;
2845      int gmatched = 0;      int gmatched = 0;
2846      int start_offset = 0;      int start_offset = 0;
2847        int start_offset_sign = 1;
2848      int g_notempty = 0;      int g_notempty = 0;
2849        int use_dfa = 0;
2850    
2851      options = 0;      options = 0;
2852    
2853        *copynames = 0;
2854        *getnames = 0;
2855    
2856        copynamesptr = copynames;
2857        getnamesptr = getnames;
2858    
2859      pcre_callout = callout;      pcre_callout = callout;
2860      first_callout = 1;      first_callout = 1;
2861        last_callout_mark = NULL;
2862      callout_extra = 0;      callout_extra = 0;
2863      callout_count = 0;      callout_count = 0;
2864      callout_fail_count = 999999;      callout_fail_count = 999999;
2865      callout_fail_id = -1;      callout_fail_id = -1;
2866      show_malloc = 0;      show_malloc = 0;
2867    
2868      if (infile == stdin) printf("data> ");      if (extra != NULL) extra->flags &=
2869      if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
2870    
2871        len = 0;
2872        for (;;)
2873        {        {
2874        done = 1;        if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2875        goto CONTINUE;          {
2876            if (len > 0)    /* Reached EOF without hitting a newline */
2877              {
2878              fprintf(outfile, "\n");
2879              break;
2880              }
2881            done = 1;
2882            goto CONTINUE;
2883            }
2884          if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2885          len = (int)strlen((char *)buffer);
2886          if (buffer[len-1] == '\n') break;
2887        }        }
     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);  
2888    
     len = (int)strlen((char *)buffer);  
2889      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
2890      buffer[len] = 0;      buffer[len] = 0;
2891      if (len == 0) break;      if (len == 0) break;
# Line 988  while (!done) Line 2893  while (!done)
2893      p = buffer;      p = buffer;
2894      while (isspace(*p)) p++;      while (isspace(*p)) p++;
2895    
2896      q = dbuffer;      bptr = q = dbuffer;
2897      while ((c = *p++) != 0)      while ((c = *p++) != 0)
2898        {        {
2899        int i = 0;        int i = 0;
# Line 1010  while (!done) Line 2915  while (!done)
2915          c -= '0';          c -= '0';
2916          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
2917            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
2918    
2919    #if !defined NOUTF8
2920            if (use_utf && c > 255)
2921              {
2922              pcre_uint8 buff8[8];
2923              int ii, utn;
2924              utn = ord2utf8(c, buff8);
2925              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2926              c = buff8[ii];   /* Last byte */
2927              }
2928    #endif
2929          break;          break;
2930    
2931          case 'x':          case 'x':
2932    
2933          /* Handle \x{..} specially - new Perl thing for utf8 */          /* Handle \x{..} specially - new Perl thing for utf8 */
2934    
2935    #if !defined NOUTF8
2936          if (*p == '{')          if (*p == '{')
2937            {            {
2938            unsigned char *pt = p;            pcre_uint8 *pt = p;
2939            c = 0;            c = 0;
2940            while (isxdigit(*(++pt)))  
2941              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');            /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
2942              when isxdigit() is a macro that refers to its argument more than
2943              once. This is banned by the C Standard, but apparently happens in at
2944              least one MacOS environment. */
2945    
2946              for (pt++; isxdigit(*pt); pt++)
2947                c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
2948            if (*pt == '}')            if (*pt == '}')
2949              {              {
2950              unsigned char buff8[8];              pcre_uint8 buff8[8];
2951              int ii, utn;              int ii, utn;
2952              utn = ord2utf8(c, buff8);              if (use_utf)
2953              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];                {
2954              c = buff8[ii];   /* Last byte */                utn = ord2utf8(c, buff8);
2955                  for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2956                  c = buff8[ii];   /* Last byte */
2957                  }
2958                else
2959                 {
2960                 if (c > 255)
2961                   fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2962                     "UTF-8 mode is not enabled.\n"
2963                     "** Truncation will probably give the wrong result.\n", c);
2964                 }
2965              p = pt + 1;              p = pt + 1;
2966              break;              break;
2967              }              }
2968            /* Not correct form; fall through */            /* Not correct form; fall through */
2969            }            }
2970    #endif
2971    
2972          /* Ordinary \x */          /* Ordinary \x */
2973    
2974          c = 0;          c = 0;
2975          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
2976            {            {
2977            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
2978            p++;            p++;
2979            }            }
2980          break;          break;
2981    
2982          case 0:   /* Allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
2983          p--;          p--;
2984          continue;          continue;
2985    
2986            case '>':
2987            if (*p == '-')
2988              {
2989              start_offset_sign = -1;
2990              p++;
2991              }
2992            while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2993            start_offset *= start_offset_sign;
2994            continue;
2995    
2996          case 'A':  /* Option setting */          case 'A':  /* Option setting */
2997          options |= PCRE_ANCHORED;          options |= PCRE_ANCHORED;
2998          continue;          continue;
# Line 1065  while (!done) Line 3009  while (!done)
3009            }            }
3010          else if (isalnum(*p))          else if (isalnum(*p))
3011            {            {
3012            uschar name[256];            pcre_uchar *npp = copynamesptr;
           uschar *npp = name;  
3013            while (isalnum(*p)) *npp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
3014              *npp++ = 0;
3015            *npp = 0;            *npp = 0;
3016            n = pcre_get_stringnumber(re, (char *)name);            n = pcre_get_stringnumber(re, (char *)copynamesptr);
3017            if (n < 0)            if (n < 0)
3018              fprintf(outfile, "no parentheses with name \"%s\"\n", name);              fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
3019            else copystrings |= 1 << n;            copynamesptr = npp;
3020            }            }
3021          else if (*p == '+')          else if (*p == '+')
3022            {            {
# Line 1110  while (!done) Line 3054  while (!done)
3054            }            }
3055          continue;          continue;
3056    
3057    #if !defined NODFA
3058            case 'D':
3059    #if !defined NOPOSIX
3060            if (posix || do_posix)
3061              printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
3062            else
3063    #endif
3064              use_dfa = 1;
3065            continue;
3066    #endif
3067    
3068    #if !defined NODFA
3069            case 'F':
3070            options |= PCRE_DFA_SHORTEST;
3071            continue;
3072    #endif
3073    
3074          case 'G':          case 'G':
3075          if (isdigit(*p))          if (isdigit(*p))
3076            {            {
# Line 1118  while (!done) Line 3079  while (!done)
3079            }            }
3080          else if (isalnum(*p))          else if (isalnum(*p))
3081            {            {
3082            uschar name[256];            pcre_uchar *npp = getnamesptr;
           uschar *npp = name;  
3083            while (isalnum(*p)) *npp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
3084              *npp++ = 0;
3085            *npp = 0;            *npp = 0;
3086            n = pcre_get_stringnumber(re, (char *)name);            n = pcre_get_stringnumber(re, (char *)getnamesptr);
3087            if (n < 0)            if (n < 0)
3088              fprintf(outfile, "no parentheses with name \"%s\"\n", name);              fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
3089            else getstrings |= 1 << n;            getnamesptr = npp;
3090              }
3091            continue;
3092    
3093            case 'J':
3094            while(isdigit(*p)) n = n * 10 + *p++ - '0';
3095            if (extra != NULL
3096                && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
3097                && extra->executable_jit != NULL)
3098              {
3099              if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
3100              jit_stack = pcre_jit_stack_alloc(1, n * 1024);
3101              pcre_assign_jit_stack(extra, jit_callback, jit_stack);
3102            }            }
3103          continue;          continue;
3104    
# Line 1138  while (!done) Line 3111  while (!done)
3111          continue;          continue;
3112    
3113          case 'N':          case 'N':
3114          options |= PCRE_NOTEMPTY;          if ((options & PCRE_NOTEMPTY) != 0)
3115              options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
3116            else
3117              options |= PCRE_NOTEMPTY;
3118          continue;          continue;
3119    
3120          case 'O':          case 'O':
# Line 1151  while (!done) Line 3127  while (!done)
3127            if (offsets == NULL)            if (offsets == NULL)
3128              {              {
3129              printf("** Failed to get %d bytes of memory for offsets vector\n",              printf("** Failed to get %d bytes of memory for offsets vector\n",
3130                size_offsets_max * sizeof(int));                (int)(size_offsets_max * sizeof(int)));
3131              return 1;              yield = 1;
3132                goto EXIT;
3133              }              }
3134            }            }
3135          use_size_offsets = n;          use_size_offsets = n;
3136          if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */          if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
3137          continue;          continue;
3138    
3139            case 'P':
3140            options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
3141              PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
3142            continue;
3143    
3144            case 'Q':
3145            while(isdigit(*p)) n = n * 10 + *p++ - '0';
3146            if (extra == NULL)
3147              {
3148              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3149              extra->flags = 0;
3150              }
3151            extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
3152            extra->match_limit_recursion = n;
3153            continue;
3154    
3155            case 'q':
3156            while(isdigit(*p)) n = n * 10 + *p++ - '0';
3157            if (extra == NULL)
3158              {
3159              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3160              extra->flags = 0;
3161              }
3162            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
3163            extra->match_limit = n;
3164            continue;
3165    
3166    #if !defined NODFA
3167            case 'R':
3168            options |= PCRE_DFA_RESTART;
3169            continue;
3170    #endif
3171    
3172          case 'S':          case 'S':
3173          show_malloc = 1;          show_malloc = 1;
3174          continue;          continue;
3175    
3176            case 'Y':
3177            options |= PCRE_NO_START_OPTIMIZE;
3178            continue;
3179    
3180          case 'Z':          case 'Z':
3181          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
3182          continue;          continue;
# Line 1170  while (!done) Line 3184  while (!done)
3184          case '?':          case '?':
3185          options |= PCRE_NO_UTF8_CHECK;          options |= PCRE_NO_UTF8_CHECK;
3186          continue;          continue;
3187    
3188            case '<':
3189              {
3190              int x = check_newline(p, outfile);
3191              if (x == 0) goto NEXT_DATA;
3192              options |= x;
3193              while (*p++ != '>');
3194              }
3195            continue;
3196          }          }
3197        *q++ = c;        *q++ = c;
3198        }        }
3199      *q = 0;      *q = 0;
3200      len = q - dbuffer;      len = (int)(q - dbuffer);
3201    
3202        /* Move the data to the end of the buffer so that a read over the end of
3203        the buffer will be seen by valgrind, even if it doesn't cause a crash. If
3204        we are using the POSIX interface, we must include the terminating zero. */
3205    
3206    #if !defined NOPOSIX
3207        if (posix || do_posix)
3208          {
3209          memmove(bptr + buffer_size - len - 1, bptr, len + 1);
3210          bptr += buffer_size - len - 1;
3211          }
3212        else
3213    #endif
3214          {
3215          memmove(bptr + buffer_size - len, bptr, len);
3216          bptr += buffer_size - len;
3217          }
3218    
3219        if ((all_use_dfa || use_dfa) && find_match_limit)
3220          {
3221          printf("**Match limit not relevant for DFA matching: ignored\n");
3222          find_match_limit = 0;
3223          }
3224    
3225      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
3226      support timing or playing with the match limit or callout data. */      support timing or playing with the match limit or callout data. */
# Line 1189  while (!done) Line 3235  while (!done)
3235          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
3236        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
3237        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
3238          if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
3239    
3240        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
3241    
3242        if (rc != 0)        if (rc != 0)
3243          {          {
3244          (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);          (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3245          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
3246          }          }
3247          else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
3248                  != 0)
3249            {
3250            fprintf(outfile, "Matched with REG_NOSUB\n");
3251            }
3252        else        else
3253          {          {
3254          size_t i;          size_t i;
# Line 1205  while (!done) Line 3257  while (!done)
3257            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
3258              {              {
3259              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
3260              (void)pchars(dbuffer + pmatch[i].rm_so,              PCHARSV(dbuffer + pmatch[i].rm_so,
3261                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3262              fprintf(outfile, "\n");              fprintf(outfile, "\n");
3263              if (i == 0 && do_showrest)              if (do_showcaprest || (i == 0 && do_showrest))
3264                {                {
3265                fprintf(outfile, " 0+ ");                fprintf(outfile, "%2d+ ", (int)i);
3266                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,                PCHARSV(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3267                  outfile);                  outfile);
3268                fprintf(outfile, "\n");                fprintf(outfile, "\n");
3269                }                }
# Line 1219  while (!done) Line 3271  while (!done)
3271            }            }
3272          }          }
3273        free(pmatch);        free(pmatch);
3274          goto NEXT_DATA;
3275        }        }
3276    
3277    #endif  /* !defined NOPOSIX */
3278    
3279      /* Handle matching via the native interface - repeats for /g and /G */      /* Handle matching via the native interface - repeats for /g and /G */
3280    
3281      else  #ifdef SUPPORT_PCRE16
3282  #endif  /* !defined NOPOSIX */      if (use_pcre16)
3283          {
3284          len = to16(bptr, (((real_pcre *)re)->options) & PCRE_UTF8, len);
3285          if (len < 0)
3286            {
3287            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3288              "converted to UTF-16\n");
3289            goto NEXT_DATA;
3290            }
3291          bptr = (pcre_uint8 *)buffer16;
3292          }
3293    #endif
3294    
3295      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
3296        {        {
3297        if (timeit)        markptr = NULL;
3298    
3299          if (timeitm > 0)
3300          {          {
3301          register int i;          register int i;
3302          clock_t time_taken;          clock_t time_taken;
3303          clock_t start_time = clock();          clock_t start_time = clock();
3304          for (i = 0; i < LOOPREPEAT; i++)  
3305            count = pcre_exec(re, extra, (char *)bptr, len,  #if !defined NODFA
3306            if (all_use_dfa || use_dfa)
3307              {
3308              int workspace[1000];
3309              for (i = 0; i < timeitm; i++)
3310                count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
3311                  options | g_notempty, use_offsets, use_size_offsets, workspace,
3312                  sizeof(workspace)/sizeof(int));
3313              }
3314            else
3315    #endif
3316    
3317            for (i = 0; i < timeitm; i++)
3318              {
3319              PCRE_EXEC(count, re, extra, bptr, len,
3320              start_offset, options | g_notempty, use_offsets, use_size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
3321              }
3322          time_taken = clock() - start_time;          time_taken = clock() - start_time;
3323          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
3324            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /            (((double)time_taken * 1000.0) / (double)timeitm) /
3325              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
3326          }          }
3327    
3328        /* If find_match_limit is set, we want to do repeated matches with        /* If find_match_limit is set, we want to do repeated matches with
3329        varying limits in order to find the minimum value. */        varying limits in order to find the minimum value for the match limit and
3330          for the recursion limit. The match limits are relevant only to the normal
3331          running of pcre_exec(), so disable the JIT optimization. This makes it
3332          possible to run the same set of tests with and without JIT externally
3333          requested. */
3334    
3335        if (find_match_limit)        if (find_match_limit)
3336          {          {
         int min = 0;  
         int mid = 64;  
         int max = -1;  
   
3337          if (extra == NULL)          if (extra == NULL)
3338            {            {
3339            extra = (pcre_extra *)malloc(sizeof(pcre_extra));            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3340            extra->flags = 0;            extra->flags = 0;
3341            }            }
3342          extra->flags |= PCRE_EXTRA_MATCH_LIMIT;          else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
   
         for (;;)  
           {  
           extra->match_limit = mid;  
           count = pcre_exec(re, extra, (char *)bptr, len, start_offset,  
             options | g_notempty, use_offsets, use_size_offsets);  
           if (count == PCRE_ERROR_MATCHLIMIT)  
             {  
             /* fprintf(outfile, "Testing match limit = %d\n", mid); */  
             min = mid;  
             mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;  
             }  
           else if (count >= 0 || count == PCRE_ERROR_NOMATCH)  
             {  
             if (mid == min + 1)  
               {  
               fprintf(outfile, "Minimum match limit = %d\n", mid);  
               break;  
               }  
             /* fprintf(outfile, "Testing match limit = %d\n", mid); */  
             max = mid;  
             mid = (min + mid)/2;  
             }  
           else break;    /* Some other error */  
           }  
3343    
3344          extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;          (void)check_match_limit(re, extra, bptr, len, start_offset,
3345              options|g_notempty, use_offsets, use_size_offsets,
3346              PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
3347              PCRE_ERROR_MATCHLIMIT, "match()");
3348    
3349            count = check_match_limit(re, extra, bptr, len, start_offset,
3350              options|g_notempty, use_offsets, use_size_offsets,
3351              PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
3352              PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
3353          }          }
3354    
3355        /* If callout_data is set, use the interface with additional data */        /* If callout_data is set, use the interface with additional data */
# Line 1297  while (!done) Line 3363  while (!done)
3363            }            }
3364          extra->flags |= PCRE_EXTRA_CALLOUT_DATA;          extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
3365          extra->callout_data = &callout_data;          extra->callout_data = &callout_data;
3366          count = pcre_exec(re, extra, (char *)bptr, len, start_offset,          PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3367            options | g_notempty, use_offsets, use_size_offsets);            options | g_notempty, use_offsets, use_size_offsets);
3368          extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;          extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
3369          }          }
# Line 1305  while (!done) Line 3371  while (!done)
3371        /* The normal case is just to do the match once, with the default        /* The normal case is just to do the match once, with the default
3372        value of match_limit. */        value of match_limit. */
3373    
3374        else count = pcre_exec(re, extra, (char *)bptr, len,  #if !defined NODFA
3375          start_offset, options | g_notempty, use_offsets, use_size_offsets);        else if (all_use_dfa || use_dfa)
3376            {
3377            int workspace[1000];
3378            count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
3379              options | g_notempty, use_offsets, use_size_offsets, workspace,
3380              sizeof(workspace)/sizeof(int));
3381            if (count == 0)
3382              {
3383              fprintf(outfile, "Matched, but too many subsidiary matches\n");
3384              count = use_size_offsets/2;
3385              }
3386            }
3387    #endif
3388    
3389        if (count == 0)        else
3390          {          {
3391          fprintf(outfile, "Matched, but too many substrings\n");          PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3392          count = use_size_offsets/3;            options | g_notempty, use_offsets, use_size_offsets);
3393            if (count == 0)
3394              {
3395              fprintf(outfile, "Matched, but too many substrings\n");
3396              count = use_size_offsets/3;
3397              }
3398          }          }
3399    
3400        /* Matched */        /* Matched */
3401    
3402        if (count >= 0)        if (count >= 0)
3403          {          {
3404          int i;          int i, maxcount;
3405    
3406    #if !defined NODFA
3407            if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
3408    #endif
3409              maxcount = use_size_offsets/3;
3410    
3411            /* This is a check against a lunatic return value. */
3412    
3413            if (count > maxcount)
3414              {
3415              fprintf(outfile,
3416                "** PCRE error: returned count %d is too big for offset size %d\n",
3417                count, use_size_offsets);
3418              count = use_size_offsets/3;
3419              if (do_g || do_G)
3420                {
3421                fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
3422                do_g = do_G = FALSE;        /* Break g/G loop */
3423                }
3424              }
3425    
3426            /* do_allcaps requests showing of all captures in the pattern, to check
3427            unset ones at the end. */
3428    
3429            if (do_allcaps)
3430              {
3431              new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
3432              count++;   /* Allow for full match */
3433              if (count * 2 > use_size_offsets) count = use_size_offsets/2;
3434              }
3435    
3436            /* Output the captured substrings */
3437    
3438          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
3439            {            {
3440            if (use_offsets[i] < 0)            if (use_offsets[i] < 0)
3441                {
3442                if (use_offsets[i] != -1)
3443                  fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3444                    use_offsets[i], i);
3445                if (use_offsets[i+1] != -1)
3446                  fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3447                    use_offsets[i+1], i+1);
3448              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
3449                }
3450            else            else
3451              {              {
3452</