/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 43 by nigel, Sat Feb 24 21:39:21 2007 UTC revision 773 by ph10, Wed Nov 30 18:10:27 2011 UTC
# Line 2  Line 2 
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5    /* This program was hacked up as a tester for PCRE. I really should have
6    written it more tidily in the first place. Will I ever learn? It has grown and
7    been extended and consequently is now rather, er, *very* untidy in places.
8    
9    -----------------------------------------------------------------------------
10    Redistribution and use in source and binary forms, with or without
11    modification, are permitted provided that the following conditions are met:
12    
13        * Redistributions of source code must retain the above copyright notice,
14          this list of conditions and the following disclaimer.
15    
16        * Redistributions in binary form must reproduce the above copyright
17          notice, this list of conditions and the following disclaimer in the
18          documentation and/or other materials provided with the distribution.
19    
20        * Neither the name of the University of Cambridge nor the names of its
21          contributors may be used to endorse or promote products derived from
22          this software without specific prior written permission.
23    
24    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34    POSSIBILITY OF SUCH DAMAGE.
35    -----------------------------------------------------------------------------
36    */
37    
38    
39    #ifdef HAVE_CONFIG_H
40    #include "config.h"
41    #endif
42    
43  #include <ctype.h>  #include <ctype.h>
44  #include <stdio.h>  #include <stdio.h>
45  #include <string.h>  #include <string.h>
46  #include <stdlib.h>  #include <stdlib.h>
47  #include <time.h>  #include <time.h>
48  #include <locale.h>  #include <locale.h>
49    #include <errno.h>
50    
51    #ifdef SUPPORT_LIBREADLINE
52    #ifdef HAVE_UNISTD_H
53    #include <unistd.h>
54    #endif
55    #include <readline/readline.h>
56    #include <readline/history.h>
57    #endif
58    
59    
60    /* A number of things vary for Windows builds. Originally, pcretest opened its
61    input and output without "b"; then I was told that "b" was needed in some
62    environments, so it was added for release 5.0 to both the input and output. (It
63    makes no difference on Unix-like systems.) Later I was told that it is wrong
64    for the input on Windows. I've now abstracted the modes into two macros that
65    are set here, to make it easier to fiddle with them, and removed "b" from the
66    input mode under Windows. */
67    
68    #if defined(_WIN32) || defined(WIN32)
69    #include <io.h>                /* For _setmode() */
70    #include <fcntl.h>             /* For _O_BINARY */
71    #define INPUT_MODE   "r"
72    #define OUTPUT_MODE  "wb"
73    
74    #ifndef isatty
75    #define isatty _isatty         /* This is what Windows calls them, I'm told, */
76    #endif                         /* though in some environments they seem to   */
77                                   /* be already defined, hence the #ifndefs.    */
78    #ifndef fileno
79    #define fileno _fileno
80    #endif
81    
82    /* A user sent this fix for Borland Builder 5 under Windows. */
83    
84    #ifdef __BORLANDC__
85    #define _setmode(handle, mode) setmode(handle, mode)
86    #endif
87    
88    /* Not Windows */
89    
90  /* Use the internal info for displaying the results of pcre_study(). */  #else
91    #include <sys/time.h>          /* These two includes are needed */
92    #include <sys/resource.h>      /* for setrlimit(). */
93    #define INPUT_MODE   "rb"
94    #define OUTPUT_MODE  "wb"
95    #endif
96    
97    
98    /* We have to include pcre_internal.h because we need the internal info for
99    displaying the results of pcre_study() and we also need to know about the
100    internal macros, structures, and other internal data values; pcretest has
101    "inside information" compared to a program that strictly follows the PCRE API.
102    
103    Although pcre_internal.h does itself include pcre.h, we explicitly include it
104    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
105    appropriately for an application, not for building PCRE. */
106    
107    #include "pcre.h"
108    #include "pcre_internal.h"
109    
110    /* We need access to some of the data tables that PCRE uses. So as not to have
111    to keep two copies, we include the source file here, changing the names of the
112    external symbols to prevent clashes. */
113    
114    #define _pcre_ucp_gentype      ucp_gentype
115    #define _pcre_ucp_typerange    ucp_typerange
116    #define _pcre_utf8_table1      utf8_table1
117    #define _pcre_utf8_table1_size utf8_table1_size
118    #define _pcre_utf8_table2      utf8_table2
119    #define _pcre_utf8_table3      utf8_table3
120    #define _pcre_utf8_table4      utf8_table4
121    #define _pcre_utf8_char_sizes  utf8_char_sizes
122    #define _pcre_utt              utt
123    #define _pcre_utt_size         utt_size
124    #define _pcre_utt_names        utt_names
125    #define _pcre_OP_lengths       OP_lengths
126    
127    #include "pcre_tables.c"
128    
129    /* We also need the pcre_printint() function for printing out compiled
130    patterns. This function is in a separate file so that it can be included in
131    pcre_compile.c when that module is compiled with debugging enabled. It needs to
132    know which case is being compiled. */
133    
134    #define COMPILING_PCRETEST
135    #include "pcre_printint.src"
136    
137    /* The definition of the macro PRINTABLE, which determines whether to print an
138    output character as-is or as a hex value when showing compiled patterns, is
139    contained in the printint.src file. We uses it here also, in cases when the
140    locale has not been explicitly changed, so as to get consistent output from
141    systems that differ in their output from isprint() even in the "C" locale. */
142    
143  #include "internal.h"  #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
144    
145  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
146  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 21  Makefile. */ Line 150  Makefile. */
150  #include "pcreposix.h"  #include "pcreposix.h"
151  #endif  #endif
152    
153    /* It is also possible, for the benefit of the version currently imported into
154    Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
155    interface to the DFA matcher (NODFA), and without the doublecheck of the old
156    "info" function (define NOINFOCHECK). In fact, we automatically cut out the
157    UTF8 support if PCRE is built without it. */
158    
159    #ifndef SUPPORT_UTF8
160    #ifndef NOUTF8
161    #define NOUTF8
162    #endif
163    #endif
164    
165    
166    /* Other parameters */
167    
168  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
169  #ifdef CLK_TCK  #ifdef CLK_TCK
170  #define CLOCKS_PER_SEC CLK_TCK  #define CLOCKS_PER_SEC CLK_TCK
# Line 29  Makefile. */ Line 173  Makefile. */
173  #endif  #endif
174  #endif  #endif
175    
176  #define LOOPREPEAT 20000  /* This is the default loop count for timing. */
177    
178    #define LOOPREPEAT 500000
179    
180    /* Static variables */
181    
182  static FILE *outfile;  static FILE *outfile;
183  static int log_store = 0;  static int log_store = 0;
184    static int callout_count;
185    static int callout_extra;
186    static int callout_fail_count;
187    static int callout_fail_id;
188    static int debug_lengths;
189    static int first_callout;
190    static int locale_set = 0;
191    static int show_malloc;
192    static int use_utf8;
193  static size_t gotten_store;  static size_t gotten_store;
194    static size_t first_gotten_store = 0;
195    static const unsigned char *last_callout_mark = NULL;
196    
197    /* The buffers grow automatically if very long input lines are encountered. */
198    
199    static int buffer_size = 50000;
200    static uschar *buffer = NULL;
201    static uschar *dbuffer = NULL;
202    static uschar *pbuffer = NULL;
203    
204    /* Textual explanations for runtime error codes */
205    
206    static const char *errtexts[] = {
207      NULL,  /* 0 is no error */
208      NULL,  /* NOMATCH is handled specially */
209      "NULL argument passed",
210      "bad option value",
211      "magic number missing",
212      "unknown opcode - pattern overwritten?",
213      "no more memory",
214      NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */
215      "match limit exceeded",
216      "callout error code",
217      NULL,  /* BADUTF8 is handled specially */
218      "bad UTF-8 offset",
219      NULL,  /* PARTIAL is handled specially */
220      "not used - internal error",
221      "internal error - pattern overwritten?",
222      "bad count value",
223      "item unsupported for DFA matching",
224      "backreference condition or recursion test not supported for DFA matching",
225      "match limit not supported for DFA matching",
226      "workspace size exceeded in DFA matching",
227      "too much recursion for DFA matching",
228      "recursion limit exceeded",
229      "not used - internal error",
230      "invalid combination of newline options",
231      "bad offset value",
232      NULL,  /* SHORTUTF8 is handled specially */
233      "nested recursion at the same subject position",
234      "JIT stack limit reached"
235    };
236    
 /* Debugging function to print the internal form of the regex. This is the same  
 code as contained in pcre.c under the DEBUG macro. */  
237    
238  static const char *OP_names[] = {  /*************************************************
239    "End", "\\A", "\\B", "\\b", "\\D", "\\d",  *         Alternate character tables             *
240    "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",  *************************************************/
241    "Opt", "^", "$", "Any", "chars", "not",  
242    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
243    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  using the default tables of the library. However, the T option can be used to
244    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  select alternate sets of tables, for different kinds of testing. Note also that
245    "*", "*?", "+", "+?", "?", "??", "{", "{",  the L (locale) option also adjusts the tables. */
246    "class", "Ref", "Recurse",  
247    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",  /* This is the set of tables distributed as default with PCRE. It recognizes
248    "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",  only ASCII characters. */
249    "Brazero", "Braminzero", "Bra"  
250    static const unsigned char tables0[] = {
251    
252    /* This table is a lower casing table. */
253    
254        0,  1,  2,  3,  4,  5,  6,  7,
255        8,  9, 10, 11, 12, 13, 14, 15,
256       16, 17, 18, 19, 20, 21, 22, 23,
257       24, 25, 26, 27, 28, 29, 30, 31,
258       32, 33, 34, 35, 36, 37, 38, 39,
259       40, 41, 42, 43, 44, 45, 46, 47,
260       48, 49, 50, 51, 52, 53, 54, 55,
261       56, 57, 58, 59, 60, 61, 62, 63,
262       64, 97, 98, 99,100,101,102,103,
263      104,105,106,107,108,109,110,111,
264      112,113,114,115,116,117,118,119,
265      120,121,122, 91, 92, 93, 94, 95,
266       96, 97, 98, 99,100,101,102,103,
267      104,105,106,107,108,109,110,111,
268      112,113,114,115,116,117,118,119,
269      120,121,122,123,124,125,126,127,
270      128,129,130,131,132,133,134,135,
271      136,137,138,139,140,141,142,143,
272      144,145,146,147,148,149,150,151,
273      152,153,154,155,156,157,158,159,
274      160,161,162,163,164,165,166,167,
275      168,169,170,171,172,173,174,175,
276      176,177,178,179,180,181,182,183,
277      184,185,186,187,188,189,190,191,
278      192,193,194,195,196,197,198,199,
279      200,201,202,203,204,205,206,207,
280      208,209,210,211,212,213,214,215,
281      216,217,218,219,220,221,222,223,
282      224,225,226,227,228,229,230,231,
283      232,233,234,235,236,237,238,239,
284      240,241,242,243,244,245,246,247,
285      248,249,250,251,252,253,254,255,
286    
287    /* This table is a case flipping table. */
288    
289        0,  1,  2,  3,  4,  5,  6,  7,
290        8,  9, 10, 11, 12, 13, 14, 15,
291       16, 17, 18, 19, 20, 21, 22, 23,
292       24, 25, 26, 27, 28, 29, 30, 31,
293       32, 33, 34, 35, 36, 37, 38, 39,
294       40, 41, 42, 43, 44, 45, 46, 47,
295       48, 49, 50, 51, 52, 53, 54, 55,
296       56, 57, 58, 59, 60, 61, 62, 63,
297       64, 97, 98, 99,100,101,102,103,
298      104,105,106,107,108,109,110,111,
299      112,113,114,115,116,117,118,119,
300      120,121,122, 91, 92, 93, 94, 95,
301       96, 65, 66, 67, 68, 69, 70, 71,
302       72, 73, 74, 75, 76, 77, 78, 79,
303       80, 81, 82, 83, 84, 85, 86, 87,
304       88, 89, 90,123,124,125,126,127,
305      128,129,130,131,132,133,134,135,
306      136,137,138,139,140,141,142,143,
307      144,145,146,147,148,149,150,151,
308      152,153,154,155,156,157,158,159,
309      160,161,162,163,164,165,166,167,
310      168,169,170,171,172,173,174,175,
311      176,177,178,179,180,181,182,183,
312      184,185,186,187,188,189,190,191,
313      192,193,194,195,196,197,198,199,
314      200,201,202,203,204,205,206,207,
315      208,209,210,211,212,213,214,215,
316      216,217,218,219,220,221,222,223,
317      224,225,226,227,228,229,230,231,
318      232,233,234,235,236,237,238,239,
319      240,241,242,243,244,245,246,247,
320      248,249,250,251,252,253,254,255,
321    
322    /* This table contains bit maps for various character classes. Each map is 32
323    bytes long and the bits run from the least significant end of each byte. The
324    classes that have their own maps are: space, xdigit, digit, upper, lower, word,
325    graph, print, punct, and cntrl. Other classes are built from combinations. */
326    
327      0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
328      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
329      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
330      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
331    
332      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
333      0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
334      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
335      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
336    
337      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
338      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
339      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
340      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
341    
342      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
343      0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
344      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
345      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
346    
347      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
348      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
349      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
350      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
351    
352      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
353      0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
354      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
355      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
356    
357      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
358      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
359      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
360      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
361    
362      0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
363      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
364      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
365      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
366    
367      0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
368      0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
369      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
370      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
371    
372      0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
373      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
374      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
375      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
376    
377    /* This table identifies various classes of character by individual bits:
378      0x01   white space character
379      0x02   letter
380      0x04   decimal digit
381      0x08   hexadecimal digit
382      0x10   alphanumeric or '_'
383      0x80   regular expression metacharacter or binary zero
384    */
385    
386      0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
387      0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
388      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
389      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
390      0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
391      0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
392      0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
393      0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
394      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
395      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
396      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
397      0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
398      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
399      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
400      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
401      0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
402      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
403      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
404      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
405      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
406      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
407      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
408      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
409      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
410      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
411      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
412      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
413      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
414      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
415      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
416      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
417      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
418    
419    /* This is a set of tables that came orginally from a Windows user. It seems to
420    be at least an approximation of ISO 8859. In particular, there are characters
421    greater than 128 that are marked as spaces, letters, etc. */
422    
423    static const unsigned char tables1[] = {
424    0,1,2,3,4,5,6,7,
425    8,9,10,11,12,13,14,15,
426    16,17,18,19,20,21,22,23,
427    24,25,26,27,28,29,30,31,
428    32,33,34,35,36,37,38,39,
429    40,41,42,43,44,45,46,47,
430    48,49,50,51,52,53,54,55,
431    56,57,58,59,60,61,62,63,
432    64,97,98,99,100,101,102,103,
433    104,105,106,107,108,109,110,111,
434    112,113,114,115,116,117,118,119,
435    120,121,122,91,92,93,94,95,
436    96,97,98,99,100,101,102,103,
437    104,105,106,107,108,109,110,111,
438    112,113,114,115,116,117,118,119,
439    120,121,122,123,124,125,126,127,
440    128,129,130,131,132,133,134,135,
441    136,137,138,139,140,141,142,143,
442    144,145,146,147,148,149,150,151,
443    152,153,154,155,156,157,158,159,
444    160,161,162,163,164,165,166,167,
445    168,169,170,171,172,173,174,175,
446    176,177,178,179,180,181,182,183,
447    184,185,186,187,188,189,190,191,
448    224,225,226,227,228,229,230,231,
449    232,233,234,235,236,237,238,239,
450    240,241,242,243,244,245,246,215,
451    248,249,250,251,252,253,254,223,
452    224,225,226,227,228,229,230,231,
453    232,233,234,235,236,237,238,239,
454    240,241,242,243,244,245,246,247,
455    248,249,250,251,252,253,254,255,
456    0,1,2,3,4,5,6,7,
457    8,9,10,11,12,13,14,15,
458    16,17,18,19,20,21,22,23,
459    24,25,26,27,28,29,30,31,
460    32,33,34,35,36,37,38,39,
461    40,41,42,43,44,45,46,47,
462    48,49,50,51,52,53,54,55,
463    56,57,58,59,60,61,62,63,
464    64,97,98,99,100,101,102,103,
465    104,105,106,107,108,109,110,111,
466    112,113,114,115,116,117,118,119,
467    120,121,122,91,92,93,94,95,
468    96,65,66,67,68,69,70,71,
469    72,73,74,75,76,77,78,79,
470    80,81,82,83,84,85,86,87,
471    88,89,90,123,124,125,126,127,
472    128,129,130,131,132,133,134,135,
473    136,137,138,139,140,141,142,143,
474    144,145,146,147,148,149,150,151,
475    152,153,154,155,156,157,158,159,
476    160,161,162,163,164,165,166,167,
477    168,169,170,171,172,173,174,175,
478    176,177,178,179,180,181,182,183,
479    184,185,186,187,188,189,190,191,
480    224,225,226,227,228,229,230,231,
481    232,233,234,235,236,237,238,239,
482    240,241,242,243,244,245,246,215,
483    248,249,250,251,252,253,254,223,
484    192,193,194,195,196,197,198,199,
485    200,201,202,203,204,205,206,207,
486    208,209,210,211,212,213,214,247,
487    216,217,218,219,220,221,222,255,
488    0,62,0,0,1,0,0,0,
489    0,0,0,0,0,0,0,0,
490    32,0,0,0,1,0,0,0,
491    0,0,0,0,0,0,0,0,
492    0,0,0,0,0,0,255,3,
493    126,0,0,0,126,0,0,0,
494    0,0,0,0,0,0,0,0,
495    0,0,0,0,0,0,0,0,
496    0,0,0,0,0,0,255,3,
497    0,0,0,0,0,0,0,0,
498    0,0,0,0,0,0,12,2,
499    0,0,0,0,0,0,0,0,
500    0,0,0,0,0,0,0,0,
501    254,255,255,7,0,0,0,0,
502    0,0,0,0,0,0,0,0,
503    255,255,127,127,0,0,0,0,
504    0,0,0,0,0,0,0,0,
505    0,0,0,0,254,255,255,7,
506    0,0,0,0,0,4,32,4,
507    0,0,0,128,255,255,127,255,
508    0,0,0,0,0,0,255,3,
509    254,255,255,135,254,255,255,7,
510    0,0,0,0,0,4,44,6,
511    255,255,127,255,255,255,127,255,
512    0,0,0,0,254,255,255,255,
513    255,255,255,255,255,255,255,127,
514    0,0,0,0,254,255,255,255,
515    255,255,255,255,255,255,255,255,
516    0,2,0,0,255,255,255,255,
517    255,255,255,255,255,255,255,127,
518    0,0,0,0,255,255,255,255,
519    255,255,255,255,255,255,255,255,
520    0,0,0,0,254,255,0,252,
521    1,0,0,248,1,0,0,120,
522    0,0,0,0,254,255,255,255,
523    0,0,128,0,0,0,128,0,
524    255,255,255,255,0,0,0,0,
525    0,0,0,0,0,0,0,128,
526    255,255,255,255,0,0,0,0,
527    0,0,0,0,0,0,0,0,
528    128,0,0,0,0,0,0,0,
529    0,1,1,0,1,1,0,0,
530    0,0,0,0,0,0,0,0,
531    0,0,0,0,0,0,0,0,
532    1,0,0,0,128,0,0,0,
533    128,128,128,128,0,0,128,0,
534    28,28,28,28,28,28,28,28,
535    28,28,0,0,0,0,0,128,
536    0,26,26,26,26,26,26,18,
537    18,18,18,18,18,18,18,18,
538    18,18,18,18,18,18,18,18,
539    18,18,18,128,128,0,128,16,
540    0,26,26,26,26,26,26,18,
541    18,18,18,18,18,18,18,18,
542    18,18,18,18,18,18,18,18,
543    18,18,18,128,128,0,0,0,
544    0,0,0,0,0,1,0,0,
545    0,0,0,0,0,0,0,0,
546    0,0,0,0,0,0,0,0,
547    0,0,0,0,0,0,0,0,
548    1,0,0,0,0,0,0,0,
549    0,0,18,0,0,0,0,0,
550    0,0,20,20,0,18,0,0,
551    0,20,18,0,0,0,0,0,
552    18,18,18,18,18,18,18,18,
553    18,18,18,18,18,18,18,18,
554    18,18,18,18,18,18,18,0,
555    18,18,18,18,18,18,18,18,
556    18,18,18,18,18,18,18,18,
557    18,18,18,18,18,18,18,18,
558    18,18,18,18,18,18,18,0,
559    18,18,18,18,18,18,18,18
560  };  };
561    
562    
563  static void print_internals(pcre *re)  
564    
565    #ifndef HAVE_STRERROR
566    /*************************************************
567    *     Provide strerror() for non-ANSI libraries  *
568    *************************************************/
569    
570    /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
571    in their libraries, but can provide the same facility by this simple
572    alternative function. */
573    
574    extern int   sys_nerr;
575    extern char *sys_errlist[];
576    
577    char *
578    strerror(int n)
579  {  {
580  unsigned char *code = ((real_pcre *)re)->code;  if (n < 0 || n >= sys_nerr) return "unknown error number";
581    return sys_errlist[n];
582    }
583    #endif /* HAVE_STRERROR */
584    
 fprintf(outfile, "------------------------------------------------------------------\n");  
585    
586  for(;;)  /*************************************************
587    {  *         JIT memory callback                    *
588    int c;  *************************************************/
589    int charlength;  
590    static pcre_jit_stack* jit_callback(void *arg)
591    {
592    return (pcre_jit_stack *)arg;
593    }
594    
595    
596    /*************************************************
597    *        Read or extend an input line            *
598    *************************************************/
599    
600    fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));  /* Input lines are read into buffer, but both patterns and data lines can be
601    continued over multiple input lines. In addition, if the buffer fills up, we
602    want to automatically expand it so as to be able to handle extremely large
603    lines that are needed for certain stress tests. When the input buffer is
604    expanded, the other two buffers must also be expanded likewise, and the
605    contents of pbuffer, which are a copy of the input for callouts, must be
606    preserved (for when expansion happens for a data line). This is not the most
607    optimal way of handling this, but hey, this is just a test program!
608    
609    Arguments:
610      f            the file to read
611      start        where in buffer to start (this *must* be within buffer)
612      prompt       for stdin or readline()
613    
614    Returns:       pointer to the start of new data
615                   could be a copy of start, or could be moved
616                   NULL if no data read and EOF reached
617    */
618    
619    if (*code >= OP_BRA)  static uschar *
620    extend_inputline(FILE *f, uschar *start, const char *prompt)
621    {
622    uschar *here = start;
623    
624    for (;;)
625      {
626      int rlen = (int)(buffer_size - (here - buffer));
627    
628      if (rlen > 1000)
629      {      {
630      fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);      int dlen;
     code += 2;  
     }  
   
   else switch(*code)  
     {  
     case OP_END:  
     fprintf(outfile, "    %s\n", OP_names[*code]);  
     fprintf(outfile, "------------------------------------------------------------------\n");  
     return;  
   
     case OP_OPT:  
     fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
     case OP_COND:  
     fprintf(outfile, "%3d Cond", (code[1] << 8) + code[2]);  
     code += 2;  
     break;  
   
     case OP_CREF:  
     fprintf(outfile, " %.2d %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
     case OP_CHARS:  
     charlength = *(++code);  
     fprintf(outfile, "%3d ", charlength);  
     while (charlength-- > 0)  
       if (isprint(c = *(++code))) fprintf(outfile, "%c", c);  
         else fprintf(outfile, "\\x%02x", c);  
     break;  
   
     case OP_KETRMAX:  
     case OP_KETRMIN:  
     case OP_ALT:  
     case OP_KET:  
     case OP_ASSERT:  
     case OP_ASSERT_NOT:  
     case OP_ASSERTBACK:  
     case OP_ASSERTBACK_NOT:  
     case OP_ONCE:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_REVERSE:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_STAR:  
     case OP_MINSTAR:  
     case OP_PLUS:  
     case OP_MINPLUS:  
     case OP_QUERY:  
     case OP_MINQUERY:  
     case OP_TYPESTAR:  
     case OP_TYPEMINSTAR:  
     case OP_TYPEPLUS:  
     case OP_TYPEMINPLUS:  
     case OP_TYPEQUERY:  
     case OP_TYPEMINQUERY:  
     if (*code >= OP_TYPESTAR)  
       fprintf(outfile, "    %s", OP_names[code[1]]);  
     else if (isprint(c = code[1])) fprintf(outfile, "    %c", c);  
       else fprintf(outfile, "    \\x%02x", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_EXACT:  
     case OP_UPTO:  
     case OP_MINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    %c{", c);  
       else fprintf(outfile, "    \\x%02x{", c);  
     if (*code != OP_EXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_MINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_TYPEEXACT:  
     case OP_TYPEUPTO:  
     case OP_TYPEMINUPTO:  
     fprintf(outfile, "    %s{", OP_names[code[3]]);  
     if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_NOT:  
     if (isprint(c = *(++code))) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     break;  
   
     case OP_NOTSTAR:  
     case OP_NOTMINSTAR:  
     case OP_NOTPLUS:  
     case OP_NOTMINPLUS:  
     case OP_NOTQUERY:  
     case OP_NOTMINQUERY:  
     if (isprint(c = code[1])) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_NOTEXACT:  
     case OP_NOTUPTO:  
     case OP_NOTMINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    [^%c]{", c);  
       else fprintf(outfile, "    [^\\x%02x]{", c);  
     if (*code != OP_NOTEXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_REF:  
     fprintf(outfile, "    \\%d", *(++code));  
     code++;  
     goto CLASS_REF_REPEAT;  
   
     case OP_CLASS:  
       {  
       int i, min, max;  
       code++;  
       fprintf(outfile, "    [");  
   
       for (i = 0; i < 256; i++)  
         {  
         if ((code[i/8] & (1 << (i&7))) != 0)  
           {  
           int j;  
           for (j = i+1; j < 256; j++)  
             if ((code[j/8] & (1 << (j&7))) == 0) break;  
           if (i == '-' || i == ']') fprintf(outfile, "\\");  
           if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);  
           if (--j > i)  
             {  
             fprintf(outfile, "-");  
             if (j == '-' || j == ']') fprintf(outfile, "\\");  
             if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);  
             }  
           i = j;  
           }  
         }  
       fprintf(outfile, "]");  
       code += 32;  
631    
632        CLASS_REF_REPEAT:      /* If libreadline support is required, use readline() to read a line if the
633        input is a terminal. Note that readline() removes the trailing newline, so
634        we must put it back again, to be compatible with fgets(). */
635    
636        switch(*code)  #ifdef SUPPORT_LIBREADLINE
637          {      if (isatty(fileno(f)))
638          case OP_CRSTAR:        {
639          case OP_CRMINSTAR:        size_t len;
640          case OP_CRPLUS:        char *s = readline(prompt);
641          case OP_CRMINPLUS:        if (s == NULL) return (here == start)? NULL : start;
642          case OP_CRQUERY:        len = strlen(s);
643          case OP_CRMINQUERY:        if (len > 0) add_history(s);
644          fprintf(outfile, "%s", OP_names[*code]);        if (len > rlen - 1) len = rlen - 1;
645          break;        memcpy(here, s, len);
646          here[len] = '\n';
647          here[len+1] = 0;
648          free(s);
649          }
650        else
651    #endif
652    
653          case OP_CRRANGE:      /* Read the next line by normal means, prompting if the file is stdin. */
         case OP_CRMINRANGE:  
         min = (code[1] << 8) + code[2];  
         max = (code[3] << 8) + code[4];  
         if (max == 0) fprintf(outfile, "{%d,}", min);  
         else fprintf(outfile, "{%d,%d}", min, max);  
         if (*code == OP_CRMINRANGE) fprintf(outfile, "?");  
         code += 4;  
         break;  
654    
655          default:        {
656          code--;        if (f == stdin) printf("%s", prompt);
657          }        if (fgets((char *)here, rlen,  f) == NULL)
658            return (here == start)? NULL : start;
659          }
660    
661        dlen = (int)strlen((char *)here);
662        if (dlen > 0 && here[dlen - 1] == '\n') return start;
663        here += dlen;
664        }
665    
666      else
667        {
668        int new_buffer_size = 2*buffer_size;
669        uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
670        uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
671        uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
672    
673        if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
674          {
675          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
676          exit(1);
677        }        }
     break;  
678    
679      /* Anything else is just a one-node item */      memcpy(new_buffer, buffer, buffer_size);
680        memcpy(new_pbuffer, pbuffer, buffer_size);
681    
682        buffer_size = new_buffer_size;
683    
684        start = new_buffer + (start - buffer);
685        here = new_buffer + (here - buffer);
686    
687      default:      free(buffer);
688      fprintf(outfile, "    %s", OP_names[*code]);      free(dbuffer);
689      break;      free(pbuffer);
690    
691        buffer = new_buffer;
692        dbuffer = new_dbuffer;
693        pbuffer = new_pbuffer;
694      }      }
695      }
696    
697    return NULL;  /* Control never gets here */
698    }
699    
700    
701    
702    
703    
704    
705    
706    /*************************************************
707    *          Read number from string               *
708    *************************************************/
709    
710    /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
711    around with conditional compilation, just do the job by hand. It is only used
712    for unpicking arguments, so just keep it simple.
713    
714    Arguments:
715      str           string to be converted
716      endptr        where to put the end pointer
717    
718    Returns:        the unsigned long
719    */
720    
721    code++;  static int
722    fprintf(outfile, "\n");  get_value(unsigned char *str, unsigned char **endptr)
723    {
724    int result = 0;
725    while(*str != 0 && isspace(*str)) str++;
726    while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
727    *endptr = str;
728    return(result);
729    }
730    
731    
732    
733    
734    /*************************************************
735    *            Convert UTF-8 string to value       *
736    *************************************************/
737    
738    /* This function takes one or more bytes that represents a UTF-8 character,
739    and returns the value of the character.
740    
741    Argument:
742      utf8bytes   a pointer to the byte vector
743      vptr        a pointer to an int to receive the value
744    
745    Returns:      >  0 => the number of bytes consumed
746                  -6 to 0 => malformed UTF-8 character at offset = (-return)
747    */
748    
749    #if !defined NOUTF8
750    
751    static int
752    utf82ord(unsigned char *utf8bytes, int *vptr)
753    {
754    int c = *utf8bytes++;
755    int d = c;
756    int i, j, s;
757    
758    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
759      {
760      if ((d & 0x80) == 0) break;
761      d <<= 1;
762      }
763    
764    if (i == -1) { *vptr = c; return 1; }  /* ascii character */
765    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
766    
767    /* i now has a value in the range 1-5 */
768    
769    s = 6*i;
770    d = (c & utf8_table3[i]) << s;
771    
772    for (j = 0; j < i; j++)
773      {
774      c = *utf8bytes++;
775      if ((c & 0xc0) != 0x80) return -(j+1);
776      s -= 6;
777      d |= (c & 0x3f) << s;
778    }    }
779    
780    /* Check that encoding was the correct unique one */
781    
782    for (j = 0; j < utf8_table1_size; j++)
783      if (d <= utf8_table1[j]) break;
784    if (j != i) return -(i+1);
785    
786    /* Valid value */
787    
788    *vptr = d;
789    return i+1;
790    }
791    
792    #endif
793    
794    
795    
796    /*************************************************
797    *       Convert character value to UTF-8         *
798    *************************************************/
799    
800    /* This function takes an integer value in the range 0 - 0x7fffffff
801    and encodes it as a UTF-8 character in 0 to 6 bytes.
802    
803    Arguments:
804      cvalue     the character value
805      utf8bytes  pointer to buffer for result - at least 6 bytes long
806    
807    Returns:     number of characters placed in the buffer
808    */
809    
810    #if !defined NOUTF8
811    
812    static int
813    ord2utf8(int cvalue, uschar *utf8bytes)
814    {
815    register int i, j;
816    for (i = 0; i < utf8_table1_size; i++)
817      if (cvalue <= utf8_table1[i]) break;
818    utf8bytes += i;
819    for (j = i; j > 0; j--)
820     {
821     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
822     cvalue >>= 6;
823     }
824    *utf8bytes = utf8_table2[i] | cvalue;
825    return i + 1;
826  }  }
827    
828    #endif
829    
830    
831    
832    /*************************************************
833    *             Print character string             *
834    *************************************************/
835    
836  /* Character string printing function. */  /* Character string printing function. Must handle UTF-8 strings in utf8
837    mode. Yields number of characters printed. If handed a NULL file, just counts
838    chars without printing. */
839    
840  static void pchars(unsigned char *p, int length)  static int pchars(unsigned char *p, int length, FILE *f)
841  {  {
842  int c;  int c = 0;
843    int yield = 0;
844    
845  while (length-- > 0)  while (length-- > 0)
846    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);    {
847      else fprintf(outfile, "\\x%02x", c);  #if !defined NOUTF8
848      if (use_utf8)
849        {
850        int rc = utf82ord(p, &c);
851    
852        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
853          {
854          length -= rc - 1;
855          p += rc;
856          if (PRINTHEX(c))
857            {
858            if (f != NULL) fprintf(f, "%c", c);
859            yield++;
860            }
861          else
862            {
863            int n = 4;
864            if (f != NULL) fprintf(f, "\\x{%02x}", c);
865            yield += (n <= 0x000000ff)? 2 :
866                     (n <= 0x00000fff)? 3 :
867                     (n <= 0x0000ffff)? 4 :
868                     (n <= 0x000fffff)? 5 : 6;
869            }
870          continue;
871          }
872        }
873    #endif
874    
875       /* Not UTF-8, or malformed UTF-8  */
876    
877      c = *p++;
878      if (PRINTHEX(c))
879        {
880        if (f != NULL) fprintf(f, "%c", c);
881        yield++;
882        }
883      else
884        {
885        if (f != NULL) fprintf(f, "\\x%02x", c);
886        yield += 4;
887        }
888      }
889    
890    return yield;
891  }  }
892    
893    
894    
895  /* Alternative malloc function, to test functionality and show the size of the  /*************************************************
896  compiled re. */  *              Callout function                  *
897    *************************************************/
898    
899    /* Called from PCRE as a result of the (?C) item. We print out where we are in
900    the match. Yield zero unless more callouts than the fail count, or the callout
901    data is not zero. */
902    
903    static int callout(pcre_callout_block *cb)
904    {
905    FILE *f = (first_callout | callout_extra)? outfile : NULL;
906    int i, pre_start, post_start, subject_length;
907    
908    if (callout_extra)
909      {
910      fprintf(f, "Callout %d: last capture = %d\n",
911        cb->callout_number, cb->capture_last);
912    
913      for (i = 0; i < cb->capture_top * 2; i += 2)
914        {
915        if (cb->offset_vector[i] < 0)
916          fprintf(f, "%2d: <unset>\n", i/2);
917        else
918          {
919          fprintf(f, "%2d: ", i/2);
920          (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
921            cb->offset_vector[i+1] - cb->offset_vector[i], f);
922          fprintf(f, "\n");
923          }
924        }
925      }
926    
927    /* Re-print the subject in canonical form, the first time or if giving full
928    datails. On subsequent calls in the same match, we use pchars just to find the
929    printed lengths of the substrings. */
930    
931    if (f != NULL) fprintf(f, "--->");
932    
933    pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
934    post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
935      cb->current_position - cb->start_match, f);
936    
937    subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
938    
939    (void)pchars((unsigned char *)(cb->subject + cb->current_position),
940      cb->subject_length - cb->current_position, f);
941    
942    if (f != NULL) fprintf(f, "\n");
943    
944    /* Always print appropriate indicators, with callout number if not already
945    shown. For automatic callouts, show the pattern offset. */
946    
947    if (cb->callout_number == 255)
948      {
949      fprintf(outfile, "%+3d ", cb->pattern_position);
950      if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
951      }
952    else
953      {
954      if (callout_extra) fprintf(outfile, "    ");
955        else fprintf(outfile, "%3d ", cb->callout_number);
956      }
957    
958    for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
959    fprintf(outfile, "^");
960    
961    if (post_start > 0)
962      {
963      for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
964      fprintf(outfile, "^");
965      }
966    
967    for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
968      fprintf(outfile, " ");
969    
970    fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
971      pbuffer + cb->pattern_position);
972    
973    fprintf(outfile, "\n");
974    first_callout = 0;
975    
976    if (cb->mark != last_callout_mark)
977      {
978      fprintf(outfile, "Latest Mark: %s\n",
979        (cb->mark == NULL)? "<unset>" : (char *)(cb->mark));
980      last_callout_mark = cb->mark;
981      }
982    
983    if (cb->callout_data != NULL)
984      {
985      int callout_data = *((int *)(cb->callout_data));
986      if (callout_data != 0)
987        {
988        fprintf(outfile, "Callout data = %d\n", callout_data);
989        return callout_data;
990        }
991      }
992    
993    return (cb->callout_number != callout_fail_id)? 0 :
994           (++callout_count >= callout_fail_count)? 1 : 0;
995    }
996    
997    
998    /*************************************************
999    *            Local malloc functions              *
1000    *************************************************/
1001    
1002    /* Alternative malloc function, to test functionality and save the size of a
1003    compiled re, which is the first store request that pcre_compile() makes. The
1004    show_malloc variable is set only during matching. */
1005    
1006  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
1007  {  {
1008    void *block = malloc(size);
1009  gotten_store = size;  gotten_store = size;
1010  if (log_store)  if (first_gotten_store == 0) first_gotten_store = size;
1011    fprintf(outfile, "Memory allocation (code space): %d\n",  if (show_malloc)
1012      (int)((int)size - offsetof(real_pcre, code[0])));    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
1013  return malloc(size);  return block;
1014  }  }
1015    
1016    static void new_free(void *block)
1017    {
1018    if (show_malloc)
1019      fprintf(outfile, "free             %p\n", block);
1020    free(block);
1021    }
1022    
1023    /* For recursion malloc/free, to test stacking calls */
1024    
1025    static void *stack_malloc(size_t size)
1026    {
1027    void *block = malloc(size);
1028    if (show_malloc)
1029      fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1030    return block;
1031    }
1032    
1033    static void stack_free(void *block)
1034    {
1035    if (show_malloc)
1036      fprintf(outfile, "stack_free       %p\n", block);
1037    free(block);
1038    }
1039    
1040    
1041    /*************************************************
1042    *          Call pcre_fullinfo()                  *
1043    *************************************************/
1044    
1045  /* Get one piece of information from the pcre_fullinfo() function */  /* Get one piece of information from the pcre_fullinfo() function */
1046    
# Line 303  if ((rc = pcre_fullinfo(re, study, optio Line 1053  if ((rc = pcre_fullinfo(re, study, optio
1053    
1054    
1055    
1056    /*************************************************
1057    *         Byte flipping function                 *
1058    *************************************************/
1059    
1060    static unsigned long int
1061    byteflip(unsigned long int value, int n)
1062    {
1063    if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
1064    return ((value & 0x000000ff) << 24) |
1065           ((value & 0x0000ff00) <<  8) |
1066           ((value & 0x00ff0000) >>  8) |
1067           ((value & 0xff000000) >> 24);
1068    }
1069    
1070    
1071    
1072    
1073    /*************************************************
1074    *        Check match or recursion limit          *
1075    *************************************************/
1076    
1077    static int
1078    check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
1079      int start_offset, int options, int *use_offsets, int use_size_offsets,
1080      int flag, unsigned long int *limit, int errnumber, const char *msg)
1081    {
1082    int count;
1083    int min = 0;
1084    int mid = 64;
1085    int max = -1;
1086    
1087    extra->flags |= flag;
1088    
1089    for (;;)
1090      {
1091      *limit = mid;
1092    
1093      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
1094        use_offsets, use_size_offsets);
1095    
1096      if (count == errnumber)
1097        {
1098        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1099        min = mid;
1100        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1101        }
1102    
1103      else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1104                             count == PCRE_ERROR_PARTIAL)
1105        {
1106        if (mid == min + 1)
1107          {
1108          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1109          break;
1110          }
1111        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1112        max = mid;
1113        mid = (min + mid)/2;
1114        }
1115      else break;    /* Some other error */
1116      }
1117    
1118    extra->flags &= ~flag;
1119    return count;
1120    }
1121    
1122    
1123    
1124    /*************************************************
1125    *         Case-independent strncmp() function    *
1126    *************************************************/
1127    
1128    /*
1129    Arguments:
1130      s         first string
1131      t         second string
1132      n         number of characters to compare
1133    
1134    Returns:    < 0, = 0, or > 0, according to the comparison
1135    */
1136    
1137    static int
1138    strncmpic(uschar *s, uschar *t, int n)
1139    {
1140    while (n--)
1141      {
1142      int c = tolower(*s++) - tolower(*t++);
1143      if (c) return c;
1144      }
1145    return 0;
1146    }
1147    
1148    
1149    
1150    /*************************************************
1151    *         Check newline indicator                *
1152    *************************************************/
1153    
1154    /* This is used both at compile and run-time to check for <xxx> escapes. Print
1155    a message and return 0 if there is no match.
1156    
1157    Arguments:
1158      p           points after the leading '<'
1159      f           file for error message
1160    
1161    Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
1162    */
1163    
1164    static int
1165    check_newline(uschar *p, FILE *f)
1166    {
1167    if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1168    if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1169    if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1170    if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1171    if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1172    if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1173    if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1174    fprintf(f, "Unknown newline type at: <%s\n", p);
1175    return 0;
1176    }
1177    
1178    
1179    
1180    /*************************************************
1181    *             Usage function                     *
1182    *************************************************/
1183    
1184    static void
1185    usage(void)
1186    {
1187    printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
1188    printf("Input and output default to stdin and stdout.\n");
1189    #ifdef SUPPORT_LIBREADLINE
1190    printf("If input is a terminal, readline() is used to read from it.\n");
1191    #else
1192    printf("This version of pcretest is not linked with readline().\n");
1193    #endif
1194    printf("\nOptions:\n");
1195    printf("  -b       show compiled code (bytecode)\n");
1196    printf("  -C       show PCRE compile-time options and exit\n");
1197    printf("  -d       debug: show compiled code and information (-b and -i)\n");
1198    #if !defined NODFA
1199    printf("  -dfa     force DFA matching for all subjects\n");
1200    #endif
1201    printf("  -help    show usage information\n");
1202    printf("  -i       show information about compiled patterns\n"
1203           "  -M       find MATCH_LIMIT minimum for each subject\n"
1204           "  -m       output memory used information\n"
1205           "  -o <n>   set size of offsets vector to <n>\n");
1206    #if !defined NOPOSIX
1207    printf("  -p       use POSIX interface\n");
1208    #endif
1209    printf("  -q       quiet: do not output PCRE version number at start\n");
1210    printf("  -S <n>   set stack size to <n> megabytes\n");
1211    printf("  -s       force each pattern to be studied at basic level\n"
1212           "  -s+      force each pattern to be studied, using JIT if available\n"
1213           "  -t       time compilation and execution\n");
1214    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
1215    printf("  -tm      time execution (matching) only\n");
1216    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
1217    }
1218    
1219    
1220    
1221    /*************************************************
1222    *                Main Program                    *
1223    *************************************************/
1224    
1225  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
1226  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
# Line 313  int main(int argc, char **argv) Line 1231  int main(int argc, char **argv)
1231  FILE *infile = stdin;  FILE *infile = stdin;
1232  int options = 0;  int options = 0;
1233  int study_options = 0;  int study_options = 0;
1234    int default_find_match_limit = FALSE;
1235  int op = 1;  int op = 1;
1236  int timeit = 0;  int timeit = 0;
1237    int timeitm = 0;
1238  int showinfo = 0;  int showinfo = 0;
1239  int showstore = 0;  int showstore = 0;
1240    int force_study = -1;
1241    int force_study_options = 0;
1242    int quiet = 0;
1243    int size_offsets = 45;
1244    int size_offsets_max;
1245    int *offsets = NULL;
1246    #if !defined NOPOSIX
1247  int posix = 0;  int posix = 0;
1248    #endif
1249  int debug = 0;  int debug = 0;
1250  int done = 0;  int done = 0;
1251  unsigned char buffer[30000];  int all_use_dfa = 0;
1252  unsigned char dbuffer[1024];  int yield = 0;
1253    int stack_size;
1254    
1255    pcre_jit_stack *jit_stack = NULL;
1256    
1257    
1258    /* These vectors store, end-to-end, a list of captured substring names. Assume
1259    that 1024 is plenty long enough for the few names we'll be testing. */
1260    
1261    uschar copynames[1024];
1262    uschar getnames[1024];
1263    
1264    uschar *copynamesptr;
1265    uschar *getnamesptr;
1266    
1267  /* Static so that new_malloc can use it. */  /* Get buffers from malloc() so that Electric Fence will check their misuse
1268    when I am debugging. They grow automatically when very long lines are read. */
1269    
1270    buffer = (unsigned char *)malloc(buffer_size);
1271    dbuffer = (unsigned char *)malloc(buffer_size);
1272    pbuffer = (unsigned char *)malloc(buffer_size);
1273    
1274    /* The outfile variable is static so that new_malloc can use it. */
1275    
1276  outfile = stdout;  outfile = stdout;
1277    
1278    /* The following  _setmode() stuff is some Windows magic that tells its runtime
1279    library to translate CRLF into a single LF character. At least, that's what
1280    I've been told: never having used Windows I take this all on trust. Originally
1281    it set 0x8000, but then I was advised that _O_BINARY was better. */
1282    
1283    #if defined(_WIN32) || defined(WIN32)
1284    _setmode( _fileno( stdout ), _O_BINARY );
1285    #endif
1286    
1287  /* Scan options */  /* Scan options */
1288    
1289  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
1290    {    {
1291    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    unsigned char *endptr;
1292      showstore = 1;  
1293    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    if (strcmp(argv[op], "-m") == 0) showstore = 1;
1294      else if (strcmp(argv[op], "-s") == 0) force_study = 0;
1295      else if (strcmp(argv[op], "-s+") == 0)
1296        {
1297        force_study = 1;
1298        force_study_options = PCRE_STUDY_JIT_COMPILE;
1299        }
1300      else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1301      else if (strcmp(argv[op], "-b") == 0) debug = 1;
1302    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1303    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1304      else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1305    #if !defined NODFA
1306      else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1307    #endif
1308      else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1309          ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
1310            *endptr == 0))
1311        {
1312        op++;
1313        argc--;
1314        }
1315      else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
1316        {
1317        int both = argv[op][2] == 0;
1318        int temp;
1319        if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
1320                         *endptr == 0))
1321          {
1322          timeitm = temp;
1323          op++;
1324          argc--;
1325          }
1326        else timeitm = LOOPREPEAT;
1327        if (both) timeit = timeitm;
1328        }
1329      else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1330          ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
1331            *endptr == 0))
1332        {
1333    #if defined(_WIN32) || defined(WIN32) || defined(__minix)
1334        printf("PCRE: -S not supported on this OS\n");
1335        exit(1);
1336    #else
1337        int rc;
1338        struct rlimit rlim;
1339        getrlimit(RLIMIT_STACK, &rlim);
1340        rlim.rlim_cur = stack_size * 1024 * 1024;
1341        rc = setrlimit(RLIMIT_STACK, &rlim);
1342        if (rc != 0)
1343          {
1344        printf("PCRE: setrlimit() failed with error %d\n", rc);
1345        exit(1);
1346          }
1347        op++;
1348        argc--;
1349    #endif
1350        }
1351    #if !defined NOPOSIX
1352    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
1353    #endif
1354      else if (strcmp(argv[op], "-C") == 0)
1355        {
1356        int rc;
1357        unsigned long int lrc;
1358        printf("PCRE version %s\n", pcre_version());
1359        printf("Compiled with\n");
1360        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1361        printf("  %sUTF-8 support\n", rc? "" : "No ");
1362        (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1363        printf("  %sUnicode properties support\n", rc? "" : "No ");
1364        (void)pcre_config(PCRE_CONFIG_JIT, &rc);
1365        if (rc)
1366          printf("  Just-in-time compiler support\n");
1367        else
1368          printf("  No just-in-time compiler support\n");
1369        (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1370        /* Note that these values are always the ASCII values, even
1371        in EBCDIC environments. CR is 13 and NL is 10. */
1372        printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
1373          (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1374          (rc == -2)? "ANYCRLF" :
1375          (rc == -1)? "ANY" : "???");
1376        (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1377        printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1378                                         "all Unicode newlines");
1379        (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1380        printf("  Internal link size = %d\n", rc);
1381        (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1382        printf("  POSIX malloc threshold = %d\n", rc);
1383        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1384        printf("  Default match limit = %ld\n", lrc);
1385        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1386        printf("  Default recursion depth limit = %ld\n", lrc);
1387        (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1388        printf("  Match recursion uses %s\n", rc? "stack" : "heap");
1389        goto EXIT;
1390        }
1391      else if (strcmp(argv[op], "-help") == 0 ||
1392               strcmp(argv[op], "--help") == 0)
1393        {
1394        usage();
1395        goto EXIT;
1396        }
1397    else    else
1398      {      {
1399      printf("*** Unknown option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
1400      printf("Usage: pcretest [-d] [-i] [-p] [-s] [-t] [<input> [<output>]]\n");      usage();
1401      printf("  -d   debug: show compiled code; implies -i\n"      yield = 1;
1402             "  -i   show information about compiled pattern\n"      goto EXIT;
            "  -p   use POSIX interface\n"  
            "  -s   output store information\n"  
            "  -t   time compilation and execution\n");  
     return 1;  
1403      }      }
1404    op++;    op++;
1405    argc--;    argc--;
1406    }    }
1407    
1408    /* Get the store for the offsets vector, and remember what it was */
1409    
1410    size_offsets_max = size_offsets;
1411    offsets = (int *)malloc(size_offsets_max * sizeof(int));
1412    if (offsets == NULL)
1413      {
1414      printf("** Failed to get %d bytes of memory for offsets vector\n",
1415        (int)(size_offsets_max * sizeof(int)));
1416      yield = 1;
1417      goto EXIT;
1418      }
1419    
1420  /* Sort out the input and output files */  /* Sort out the input and output files */
1421    
1422  if (argc > 1)  if (argc > 1)
1423    {    {
1424    infile = fopen(argv[op], "r");    infile = fopen(argv[op], INPUT_MODE);
1425    if (infile == NULL)    if (infile == NULL)
1426      {      {
1427      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
1428      return 1;      yield = 1;
1429        goto EXIT;
1430      }      }
1431    }    }
1432    
1433  if (argc > 2)  if (argc > 2)
1434    {    {
1435    outfile = fopen(argv[op+1], "w");    outfile = fopen(argv[op+1], OUTPUT_MODE);
1436    if (outfile == NULL)    if (outfile == NULL)
1437      {      {
1438      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
1439      return 1;      yield = 1;
1440        goto EXIT;
1441      }      }
1442    }    }
1443    
1444  /* Set alternative malloc function */  /* Set alternative malloc function */
1445    
1446  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
1447    pcre_free = new_free;
1448    pcre_stack_malloc = stack_malloc;
1449    pcre_stack_free = stack_free;
1450    
1451  /* Heading line, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
1452    
1453  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1454    
1455  /* Main loop */  /* Main loop */
1456    
# Line 391  while (!done) Line 1461  while (!done)
1461    
1462  #if !defined NOPOSIX  /* There are still compilers that require no indent */  #if !defined NOPOSIX  /* There are still compilers that require no indent */
1463    regex_t preg;    regex_t preg;
1464      int do_posix = 0;
1465  #endif  #endif
1466    
1467    const char *error;    const char *error;
1468      unsigned char *markptr;
1469    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
1470    unsigned const char *tables = NULL;    unsigned char *to_file = NULL;
1471      const unsigned char *tables = NULL;
1472      unsigned long int true_size, true_study_size = 0;
1473      size_t size, regex_gotten_store;
1474      int do_allcaps = 0;
1475      int do_mark = 0;
1476    int do_study = 0;    int do_study = 0;
1477      int no_force_study = 0;
1478    int do_debug = debug;    int do_debug = debug;
1479    int do_G = 0;    int do_G = 0;
1480    int do_g = 0;    int do_g = 0;
1481    int do_showinfo = showinfo;    int do_showinfo = showinfo;
1482    int do_showrest = 0;    int do_showrest = 0;
1483    int do_posix = 0;    int do_showcaprest = 0;
1484    int erroroffset, len, delimiter;    int do_flip = 0;
1485      int erroroffset, len, delimiter, poffset;
1486    
1487    if (infile == stdin) printf("  re> ");    use_utf8 = 0;
1488    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;    debug_lengths = 1;
1489    
1490      if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
1491    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1492      fflush(outfile);
1493    
1494    p = buffer;    p = buffer;
1495    while (isspace(*p)) p++;    while (isspace(*p)) p++;
1496    if (*p == 0) continue;    if (*p == 0) continue;
1497    
1498    /* Get the delimiter and seek the end of the pattern; if is isn't    /* See if the pattern is to be loaded pre-compiled from a file. */
1499    complete, read more. */  
1500      if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1501        {
1502        unsigned long int magic, get_options;
1503        uschar sbuf[8];
1504        FILE *f;
1505    
1506        p++;
1507        pp = p + (int)strlen((char *)p);
1508        while (isspace(pp[-1])) pp--;
1509        *pp = 0;
1510    
1511        f = fopen((char *)p, "rb");
1512        if (f == NULL)
1513          {
1514          fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1515          continue;
1516          }
1517    
1518        if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1519    
1520        true_size =
1521          (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1522        true_study_size =
1523          (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1524    
1525        re = (real_pcre *)new_malloc(true_size);
1526        regex_gotten_store = first_gotten_store;
1527    
1528        if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1529    
1530        magic = ((real_pcre *)re)->magic_number;
1531        if (magic != MAGIC_NUMBER)
1532          {
1533          if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1534            {
1535            do_flip = 1;
1536            }
1537          else
1538            {
1539            fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1540            fclose(f);
1541            continue;
1542            }
1543          }
1544    
1545        fprintf(outfile, "Compiled pattern%s loaded from %s\n",
1546          do_flip? " (byte-inverted)" : "", p);
1547    
1548        /* Need to know if UTF-8 for printing data strings */
1549    
1550        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1551        use_utf8 = (get_options & PCRE_UTF8) != 0;
1552    
1553        /* Now see if there is any following study data. */
1554    
1555        if (true_study_size != 0)
1556          {
1557          pcre_study_data *psd;
1558    
1559          extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1560          extra->flags = PCRE_EXTRA_STUDY_DATA;
1561    
1562          psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1563          extra->study_data = psd;
1564    
1565          if (fread(psd, 1, true_study_size, f) != true_study_size)
1566            {
1567            FAIL_READ:
1568            fprintf(outfile, "Failed to read data from %s\n", p);
1569            if (extra != NULL) pcre_free_study(extra);
1570            if (re != NULL) new_free(re);
1571            fclose(f);
1572            continue;
1573            }
1574          fprintf(outfile, "Study data loaded from %s\n", p);
1575          do_study = 1;     /* To get the data output if requested */
1576          }
1577        else fprintf(outfile, "No study data\n");
1578    
1579        fclose(f);
1580        goto SHOW_INFO;
1581        }
1582    
1583      /* In-line pattern (the usual case). Get the delimiter and seek the end of
1584      the pattern; if is isn't complete, read more. */
1585    
1586    delimiter = *p++;    delimiter = *p++;
1587    
1588    if (isalnum(delimiter) || delimiter == '\\')    if (isalnum(delimiter) || delimiter == '\\')
1589      {      {
1590      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");      fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1591      goto SKIP_DATA;      goto SKIP_DATA;
1592      }      }
1593    
1594    pp = p;    pp = p;
1595      poffset = (int)(p - buffer);
1596    
1597    for(;;)    for(;;)
1598      {      {
# Line 435  while (!done) Line 1603  while (!done)
1603        pp++;        pp++;
1604        }        }
1605      if (*pp != 0) break;      if (*pp != 0) break;
1606        if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
     len = sizeof(buffer) - (pp - buffer);  
     if (len < 256)  
       {  
       fprintf(outfile, "** Expression too long - missing delimiter?\n");  
       goto SKIP_DATA;  
       }  
   
     if (infile == stdin) printf("    > ");  
     if (fgets((char *)pp, len, infile) == NULL)  
1607        {        {
1608        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
1609        done = 1;        done = 1;
# Line 453  while (!done) Line 1612  while (!done)
1612      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1613      }      }
1614    
1615      /* The buffer may have moved while being extended; reset the start of data
1616      pointer to the correct relative point in the buffer. */
1617    
1618      p = buffer + poffset;
1619    
1620    /* If the first character after the delimiter is backslash, make    /* If the first character after the delimiter is backslash, make
1621    the pattern end with backslash. This is purely to provide a way    the pattern end with backslash. This is purely to provide a way
1622    of testing for the error message when a pattern ends with backslash. */    of testing for the error message when a pattern ends with backslash. */
1623    
1624    if (pp[1] == '\\') *pp++ = '\\';    if (pp[1] == '\\') *pp++ = '\\';
1625    
1626    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter, and save a copy of the pattern
1627      for callouts. */
1628    
1629    *pp++ = 0;    *pp++ = 0;
1630      strcpy((char *)pbuffer, (char *)p);
1631    
1632    /* Look for options after final delimiter */    /* Look for options after final delimiter */
1633    
1634    options = 0;    options = 0;
   study_options = 0;  
1635    log_store = showstore;  /* default from command line */    log_store = showstore;  /* default from command line */
1636    
1637    while (*pp != 0)    while (*pp != 0)
1638      {      {
1639      switch (*pp++)      switch (*pp++)
1640        {        {
1641          case 'f': options |= PCRE_FIRSTLINE; break;
1642        case 'g': do_g = 1; break;        case 'g': do_g = 1; break;
1643        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
1644        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
1645        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
1646        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
1647    
1648        case '+': do_showrest = 1; break;        case '+':
1649          if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
1650          break;
1651    
1652          case '=': do_allcaps = 1; break;
1653        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
1654          case 'B': do_debug = 1; break;
1655          case 'C': options |= PCRE_AUTO_CALLOUT; break;
1656        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
1657        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1658          case 'F': do_flip = 1; break;
1659        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
1660        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
1661          case 'J': options |= PCRE_DUPNAMES; break;
1662          case 'K': do_mark = 1; break;
1663        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
1664          case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1665    
1666  #if !defined NOPOSIX  #if !defined NOPOSIX
1667        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
1668  #endif  #endif
1669    
1670        case 'S': do_study = 1; break;        case 'S':
1671          if (do_study == 0)
1672            {
1673            do_study = 1;
1674            if (*pp == '+')
1675              {
1676              study_options |= PCRE_STUDY_JIT_COMPILE;
1677              pp++;
1678              }
1679            }
1680          else
1681            {
1682            do_study = 0;
1683            no_force_study = 1;
1684            }
1685          break;
1686    
1687        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1688          case 'W': options |= PCRE_UCP; break;
1689        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1690          case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
1691          case 'Z': debug_lengths = 0; break;
1692          case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1693          case '?': options |= PCRE_NO_UTF8_CHECK; break;
1694    
1695          case 'T':
1696          switch (*pp++)
1697            {
1698            case '0': tables = tables0; break;
1699            case '1': tables = tables1; break;
1700    
1701            case '\r':
1702            case '\n':
1703            case ' ':
1704            case 0:
1705            fprintf(outfile, "** Missing table number after /T\n");
1706            goto SKIP_DATA;
1707    
1708            default:
1709            fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
1710            goto SKIP_DATA;
1711            }
1712          break;
1713    
1714        case 'L':        case 'L':
1715        ppp = pp;        ppp = pp;
1716        while (*ppp != '\n' && *ppp != ' ') ppp++;        /* The '\r' test here is so that it works on Windows. */
1717          /* The '0' test is just in case this is an unterminated line. */
1718          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1719        *ppp = 0;        *ppp = 0;
1720        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1721          {          {
1722          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1723          goto SKIP_DATA;          goto SKIP_DATA;
1724          }          }
1725          locale_set = 1;
1726        tables = pcre_maketables();        tables = pcre_maketables();
1727        pp = ppp;        pp = ppp;
1728        break;        break;
1729    
1730        case '\n': case ' ': break;        case '>':
1731          to_file = pp;
1732          while (*pp != 0) pp++;
1733          while (isspace(pp[-1])) pp--;
1734          *pp = 0;
1735          break;
1736    
1737          case '<':
1738            {
1739            if (strncmpic(pp, (uschar *)"JS>", 3) == 0)
1740              {
1741              options |= PCRE_JAVASCRIPT_COMPAT;
1742              pp += 3;
1743              }
1744            else
1745              {
1746              int x = check_newline(pp, outfile);
1747              if (x == 0) goto SKIP_DATA;
1748              options |= x;
1749              while (*pp++ != '>');
1750              }
1751            }
1752          break;
1753    
1754          case '\r':                      /* So that it works in Windows */
1755          case '\n':
1756          case ' ':
1757          break;
1758    
1759        default:        default:
1760        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1761        goto SKIP_DATA;        goto SKIP_DATA;
# Line 524  while (!done) Line 1771  while (!done)
1771      {      {
1772      int rc;      int rc;
1773      int cflags = 0;      int cflags = 0;
1774    
1775      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1776      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1777        if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1778        if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1779        if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1780        if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
1781        if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1782    
1783        first_gotten_store = 0;
1784      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
1785    
1786      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 533  while (!done) Line 1788  while (!done)
1788    
1789      if (rc != 0)      if (rc != 0)
1790        {        {
1791        (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));        (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1792        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1793        goto SKIP_DATA;        goto SKIP_DATA;
1794        }        }
# Line 545  while (!done) Line 1800  while (!done)
1800  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
1801    
1802      {      {
1803      if (timeit)      unsigned long int get_options;
1804    
1805        if (timeit > 0)
1806        {        {
1807        register int i;        register int i;
1808        clock_t time_taken;        clock_t time_taken;
1809        clock_t start_time = clock();        clock_t start_time = clock();
1810        for (i = 0; i < LOOPREPEAT; i++)        for (i = 0; i < timeit; i++)
1811          {          {
1812          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1813          if (re != NULL) free(re);          if (re != NULL) free(re);
1814          }          }
1815        time_taken = clock() - start_time;        time_taken = clock() - start_time;
1816        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.4f milliseconds\n",
1817          ((double)time_taken * 1000.0) /          (((double)time_taken * 1000.0) / (double)timeit) /
1818          ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));            (double)CLOCKS_PER_SEC);
1819        }        }
1820    
1821        first_gotten_store = 0;
1822      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1823    
1824      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 574  while (!done) Line 1832  while (!done)
1832          {          {
1833          for (;;)          for (;;)
1834            {            {
1835            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (extend_inputline(infile, buffer, NULL) == NULL)
1836              {              {
1837              done = 1;              done = 1;
1838              goto CONTINUE;              goto CONTINUE;
# Line 588  while (!done) Line 1846  while (!done)
1846        goto CONTINUE;        goto CONTINUE;
1847        }        }
1848    
1849      /* Compilation succeeded; print data if required. There are now two      /* Compilation succeeded. It is now possible to set the UTF-8 option from
1850      info-returning functions. The old one has a limited interface and      within the regex; check for this so that we know how to process the data
1851      returns only limited data. Check that it agrees with the newer one. */      lines. */
1852    
1853        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1854        if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1855    
1856        /* Print information if required. There are now two info-returning
1857        functions. The old one has a limited interface and returns only limited
1858        data. Check that it agrees with the newer one. */
1859    
1860        if (log_store)
1861          fprintf(outfile, "Memory allocation (code space): %d\n",
1862            (int)(first_gotten_store -
1863                  sizeof(real_pcre) -
1864                  ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1865    
1866        /* Extract the size for possible writing before possibly flipping it,
1867        and remember the store that was got. */
1868    
1869        true_size = ((real_pcre *)re)->size;
1870        regex_gotten_store = first_gotten_store;
1871    
1872        /* If -s or /S was present, study the regex to generate additional info to
1873        help with the matching, unless the pattern has the SS option, which
1874        suppresses the effect of /S (used for a few test patterns where studying is
1875        never sensible). */
1876    
1877        if (do_study || (force_study >= 0 && !no_force_study))
1878          {
1879          if (timeit > 0)
1880            {
1881            register int i;
1882            clock_t time_taken;
1883            clock_t start_time = clock();
1884            for (i = 0; i < timeit; i++)
1885              extra = pcre_study(re, study_options | force_study_options, &error);
1886            time_taken = clock() - start_time;
1887            if (extra != NULL) pcre_free_study(extra);
1888            fprintf(outfile, "  Study time %.4f milliseconds\n",
1889              (((double)time_taken * 1000.0) / (double)timeit) /
1890                (double)CLOCKS_PER_SEC);
1891            }
1892          extra = pcre_study(re, study_options | force_study_options, &error);
1893          if (error != NULL)
1894            fprintf(outfile, "Failed to study: %s\n", error);
1895          else if (extra != NULL)
1896            true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1897          }
1898    
1899        /* If /K was present, we set up for handling MARK data. */
1900    
1901        if (do_mark)
1902          {
1903          if (extra == NULL)
1904            {
1905            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1906            extra->flags = 0;
1907            }
1908          extra->mark = &markptr;
1909          extra->flags |= PCRE_EXTRA_MARK;
1910          }
1911    
1912        /* If the 'F' option was present, we flip the bytes of all the integer
1913        fields in the regex data block and the study block. This is to make it
1914        possible to test PCRE's handling of byte-flipped patterns, e.g. those
1915        compiled on a different architecture. */
1916    
1917        if (do_flip)
1918          {
1919          real_pcre *rre = (real_pcre *)re;
1920          rre->magic_number =
1921            byteflip(rre->magic_number, sizeof(rre->magic_number));
1922          rre->size = byteflip(rre->size, sizeof(rre->size));
1923          rre->options = byteflip(rre->options, sizeof(rre->options));
1924          rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1925          rre->top_bracket =
1926            (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1927          rre->top_backref =
1928            (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1929          rre->first_byte =
1930            (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1931          rre->req_byte =
1932            (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1933          rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1934            sizeof(rre->name_table_offset));
1935          rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1936            sizeof(rre->name_entry_size));
1937          rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1938            sizeof(rre->name_count));
1939    
1940          if (extra != NULL)
1941            {
1942            pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1943            rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1944            rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1945            rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1946            }
1947          }
1948    
1949        /* Extract information from the compiled data if required */
1950    
1951        SHOW_INFO:
1952    
1953        if (do_debug)
1954          {
1955          fprintf(outfile, "------------------------------------------------------------------\n");
1956          pcre_printint(re, outfile, debug_lengths);
1957          }
1958    
1959        /* We already have the options in get_options (see above) */
1960    
1961      if (do_showinfo)      if (do_showinfo)
1962        {        {
1963          unsigned long int all_options;
1964    #if !defined NOINFOCHECK
1965        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
1966        int count, backrefmax, first_char, need_char;  #endif
1967        size_t size;        int count, backrefmax, first_char, need_char, okpartial, jchanged,
1968            hascrorlf;
1969        if (do_debug) print_internals(re);        int nameentrysize, namecount;
1970          const uschar *nametable;
1971    
       new_info(re, NULL, PCRE_INFO_OPTIONS, &options);  
1972        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
1973        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1974        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1975        new_info(re, NULL, PCRE_INFO_FIRSTCHAR, &first_char);        new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1976        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1977          new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1978          new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1979          new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1980          new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1981          new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1982          new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1983    
1984    #if !defined NOINFOCHECK
1985        old_count = pcre_info(re, &old_options, &old_first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
1986        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
1987          "Error %d from pcre_info()\n", count);          "Error %d from pcre_info()\n", count);
# Line 620  while (!done) Line 1995  while (!done)
1995            "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",            "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1996              first_char, old_first_char);              first_char, old_first_char);
1997    
1998          if (old_options != options) fprintf(outfile,          if (old_options != (int)get_options) fprintf(outfile,
1999            "Options disagreement: pcre_fullinfo=%d pcre_info=%d\n", options,            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
2000              old_options);              get_options, old_options);
2001          }          }
2002    #endif
2003    
2004        if (size != gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
2005          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
2006          size, gotten_store);          (int)size, (int)regex_gotten_store);
2007    
2008        fprintf(outfile, "Capturing subpattern count = %d\n", count);        fprintf(outfile, "Capturing subpattern count = %d\n", count);
2009        if (backrefmax > 0)        if (backrefmax > 0)
2010          fprintf(outfile, "Max back reference = %d\n", backrefmax);          fprintf(outfile, "Max back reference = %d\n", backrefmax);
       if (options == 0) fprintf(outfile, "No options\n");  
         else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s\n",  
           ((options & PCRE_ANCHORED) != 0)? " anchored" : "",  
           ((options & PCRE_CASELESS) != 0)? " caseless" : "",  
           ((options & PCRE_EXTENDED) != 0)? " extended" : "",  
           ((options & PCRE_MULTILINE) != 0)? " multiline" : "",  
           ((options & PCRE_DOTALL) != 0)? " dotall" : "",  
           ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",  
           ((options & PCRE_EXTRA) != 0)? " extra" : "",  
           ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");  
2011    
2012        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)        if (namecount > 0)
2013          fprintf(outfile, "Case state changes\n");          {
2014            fprintf(outfile, "Named capturing subpatterns:\n");
2015            while (namecount-- > 0)
2016              {
2017              fprintf(outfile, "  %s %*s%3d\n", nametable + 2,
2018                nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
2019                GET2(nametable, 0));
2020              nametable += nameentrysize;
2021              }
2022            }
2023    
2024          if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
2025          if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
2026    
2027          all_options = ((real_pcre *)re)->options;
2028          if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
2029    
2030          if (get_options == 0) fprintf(outfile, "No options\n");
2031            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
2032              ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
2033              ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
2034              ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
2035              ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
2036              ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
2037              ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
2038              ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
2039              ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
2040              ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
2041              ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
2042              ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
2043              ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
2044              ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
2045              ((get_options & PCRE_UCP) != 0)? " ucp" : "",
2046              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
2047              ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
2048              ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
2049    
2050          if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
2051    
2052          switch (get_options & PCRE_NEWLINE_BITS)
2053            {
2054            case PCRE_NEWLINE_CR:
2055            fprintf(outfile, "Forced newline sequence: CR\n");
2056            break;
2057    
2058            case PCRE_NEWLINE_LF:
2059            fprintf(outfile, "Forced newline sequence: LF\n");
2060            break;
2061    
2062            case PCRE_NEWLINE_CRLF:
2063            fprintf(outfile, "Forced newline sequence: CRLF\n");
2064            break;
2065    
2066            case PCRE_NEWLINE_ANYCRLF:
2067            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
2068            break;
2069    
2070            case PCRE_NEWLINE_ANY:
2071            fprintf(outfile, "Forced newline sequence: ANY\n");
2072            break;
2073    
2074            default:
2075            break;
2076            }
2077    
2078        if (first_char == -1)        if (first_char == -1)
2079          {          {
2080          fprintf(outfile, "First char at start or follows \\n\n");          fprintf(outfile, "First char at start or follows newline\n");
2081          }          }
2082        else if (first_char < 0)        else if (first_char < 0)
2083          {          {
# Line 656  while (!done) Line 2085  while (!done)
2085          }          }
2086        else        else
2087          {          {
2088          if (isprint(first_char))          int ch = first_char & 255;
2089            fprintf(outfile, "First char = \'%c\'\n", first_char);          const char *caseless = ((first_char & REQ_CASELESS) == 0)?
2090              "" : " (caseless)";
2091            if (PRINTHEX(ch))
2092              fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
2093          else          else
2094            fprintf(outfile, "First char = %d\n", first_char);            fprintf(outfile, "First char = %d%s\n", ch, caseless);
2095          }          }
2096    
2097        if (need_char < 0)        if (need_char < 0)
# Line 668  while (!done) Line 2100  while (!done)
2100          }          }
2101        else        else
2102          {          {
2103          if (isprint(need_char))          int ch = need_char & 255;
2104            fprintf(outfile, "Need char = \'%c\'\n", need_char);          const char *caseless = ((need_char & REQ_CASELESS) == 0)?
2105              "" : " (caseless)";
2106            if (PRINTHEX(ch))
2107              fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
2108          else          else
2109            fprintf(outfile, "Need char = %d\n", need_char);            fprintf(outfile, "Need char = %d%s\n", ch, caseless);
2110          }          }
       }  
2111    
2112      /* If /S was present, study the regexp to generate additional info to        /* Don't output study size; at present it is in any case a fixed
2113      help with the matching. */        value, but it varies, depending on the computer architecture, and
2114          so messes up the test suite. (And with the /F option, it might be
2115          flipped.) If study was forced by an external -s, don't show this
2116          information unless -i or -d was also present. This means that, except
2117          when auto-callouts are involved, the output from runs with and without
2118          -s should be identical. */
2119    
2120      if (do_study)        if (do_study || (force_study >= 0 && showinfo && !no_force_study))
       {  
       if (timeit)  
2121          {          {
2122          register int i;          if (extra == NULL)
2123          clock_t time_taken;            fprintf(outfile, "Study returned NULL\n");
2124          clock_t start_time = clock();          else
2125          for (i = 0; i < LOOPREPEAT; i++)            {
2126            extra = pcre_study(re, study_options, &error);            uschar *start_bits = NULL;
2127          time_taken = clock() - start_time;            int minlength;
2128          if (extra != NULL) free(extra);  
2129          fprintf(outfile, "  Study time %.3f milliseconds\n",            new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2130            ((double)time_taken * 1000.0)/            fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2131            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));  
2132              new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2133              if (start_bits == NULL)
2134                fprintf(outfile, "No set of starting bytes\n");
2135              else
2136                {
2137                int i;
2138                int c = 24;
2139                fprintf(outfile, "Starting byte set: ");
2140                for (i = 0; i < 256; i++)
2141                  {
2142                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
2143                    {
2144                    if (c > 75)
2145                      {
2146                      fprintf(outfile, "\n  ");
2147                      c = 2;
2148                      }
2149                    if (PRINTHEX(i) && i != ' ')
2150                      {
2151                      fprintf(outfile, "%c ", i);
2152                      c += 2;
2153                      }
2154                    else
2155                      {
2156                      fprintf(outfile, "\\x%02x ", i);
2157                      c += 5;
2158                      }
2159                    }
2160                  }
2161                fprintf(outfile, "\n");
2162                }
2163              }
2164    
2165            /* Show this only if the JIT was set by /S, not by -s. */
2166    
2167            if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2168              {
2169              int jit;
2170              new_info(re, extra, PCRE_INFO_JIT, &jit);
2171              if (jit)
2172                fprintf(outfile, "JIT study was successful\n");
2173              else
2174    #ifdef SUPPORT_JIT
2175                fprintf(outfile, "JIT study was not successful\n");
2176    #else
2177                fprintf(outfile, "JIT support is not available in this version of PCRE\n");
2178    #endif
2179              }
2180          }          }
2181          }
2182    
2183        extra = pcre_study(re, study_options, &error);      /* If the '>' option was present, we write out the regex to a file, and
2184        if (error != NULL)      that is all. The first 8 bytes of the file are the regex length and then
2185          fprintf(outfile, "Failed to study: %s\n", error);      the study length, in big-endian order. */
       else if (extra == NULL)  
         fprintf(outfile, "Study returned NULL\n");  
2186    
2187        else if (do_showinfo)      if (to_file != NULL)
2188          {
2189          FILE *f = fopen((char *)to_file, "wb");
2190          if (f == NULL)
2191            {
2192            fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
2193            }
2194          else
2195          {          {
2196          uschar *start_bits = NULL;          uschar sbuf[8];
2197          new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);          sbuf[0] = (uschar)((true_size >> 24) & 255);
2198          if (start_bits == NULL)          sbuf[1] = (uschar)((true_size >> 16) & 255);
2199            fprintf(outfile, "No starting character set\n");          sbuf[2] = (uschar)((true_size >>  8) & 255);
2200            sbuf[3] = (uschar)((true_size) & 255);
2201    
2202            sbuf[4] = (uschar)((true_study_size >> 24) & 255);
2203            sbuf[5] = (uschar)((true_study_size >> 16) & 255);
2204            sbuf[6] = (uschar)((true_study_size >>  8) & 255);
2205            sbuf[7] = (uschar)((true_study_size) & 255);
2206    
2207            if (fwrite(sbuf, 1, 8, f) < 8 ||
2208                fwrite(re, 1, true_size, f) < true_size)
2209              {
2210              fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
2211              }
2212          else          else
2213            {            {
2214            int i;            fprintf(outfile, "Compiled pattern written to %s\n", to_file);
2215            int c = 24;  
2216            fprintf(outfile, "Starting character set: ");            /* If there is study data, write it. */
2217            for (i = 0; i < 256; i++)  
2218              if (extra != NULL)
2219              {              {
2220              if ((start_bits[i/8] & (1<<(i%8))) != 0)              if (fwrite(extra->study_data, 1, true_study_size, f) <
2221                    true_study_size)
2222                {                {
2223                if (c > 75)                fprintf(outfile, "Write error on %s: %s\n", to_file,
2224                  {                  strerror(errno));
                 fprintf(outfile, "\n  ");  
                 c = 2;  
                 }  
               if (isprint(i) && i != ' ')  
                 {  
                 fprintf(outfile, "%c ", i);  
                 c += 2;  
                 }  
               else  
                 {  
                 fprintf(outfile, "\\x%02x ", i);  
                 c += 5;  
                 }  
2225                }                }
2226                else fprintf(outfile, "Study data written to %s\n", to_file);
2227              }              }
           fprintf(outfile, "\n");  
2228            }            }
2229            fclose(f);
2230            }
2231    
2232          new_free(re);
2233          if (extra != NULL) pcre_free_study(extra);
2234          if (locale_set)
2235            {
2236            new_free((void *)tables);
2237            setlocale(LC_CTYPE, "C");
2238            locale_set = 0;
2239          }          }
2240          continue;  /* With next regex */
2241        }        }
2242      }      }        /* End of non-POSIX compile */
2243    
2244    /* Read data lines and test them */    /* Read data lines and test them */
2245    
2246    for (;;)    for (;;)
2247      {      {
2248      unsigned char *q;      uschar *q;
2249      unsigned char *bptr = dbuffer;      uschar *bptr;
2250        int *use_offsets = offsets;
2251        int use_size_offsets = size_offsets;
2252        int callout_data = 0;
2253        int callout_data_set = 0;
2254      int count, c;      int count, c;
2255      int copystrings = 0;      int copystrings = 0;
2256        int find_match_limit = default_find_match_limit;
2257      int getstrings = 0;      int getstrings = 0;
2258      int getlist = 0;      int getlist = 0;
2259      int gmatched = 0;      int gmatched = 0;
2260      int start_offset = 0;      int start_offset = 0;
2261        int start_offset_sign = 1;
2262      int g_notempty = 0;      int g_notempty = 0;
2263      int offsets[45];      int use_dfa = 0;
     int size_offsets = sizeof(offsets)/sizeof(int);  
2264    
2265      options = 0;      options = 0;
2266    
2267      if (infile == stdin) printf("data> ");      *copynames = 0;
2268      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)      *getnames = 0;
2269    
2270        copynamesptr = copynames;
2271        getnamesptr = getnames;
2272    
2273        pcre_callout = callout;
2274        first_callout = 1;
2275        last_callout_mark = NULL;
2276        callout_extra = 0;
2277        callout_count = 0;
2278        callout_fail_count = 999999;
2279        callout_fail_id = -1;
2280        show_malloc = 0;
2281    
2282        if (extra != NULL) extra->flags &=
2283          ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
2284    
2285        len = 0;
2286        for (;;)
2287        {        {
2288        done = 1;        if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2289        goto CONTINUE;          {
2290            if (len > 0)    /* Reached EOF without hitting a newline */
2291              {
2292              fprintf(outfile, "\n");
2293              break;
2294              }
2295            done = 1;
2296            goto CONTINUE;
2297            }
2298          if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2299          len = (int)strlen((char *)buffer);
2300          if (buffer[len-1] == '\n') break;
2301        }        }
     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);  
2302    
     len = (int)strlen((char *)buffer);  
2303      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
2304      buffer[len] = 0;      buffer[len] = 0;
2305      if (len == 0) break;      if (len == 0) break;
# Line 772  while (!done) Line 2307  while (!done)
2307      p = buffer;      p = buffer;
2308      while (isspace(*p)) p++;      while (isspace(*p)) p++;
2309    
2310      q = dbuffer;      bptr = q = dbuffer;
2311      while ((c = *p++) != 0)      while ((c = *p++) != 0)
2312        {        {
2313        int i = 0;        int i = 0;
2314        int n = 0;        int n = 0;
2315    
2316        if (c == '\\') switch ((c = *p++))        if (c == '\\') switch ((c = *p++))
2317          {          {
2318          case 'a': c =    7; break;          case 'a': c =    7; break;
# Line 793  while (!done) Line 2329  while (!done)
2329          c -= '0';          c -= '0';
2330          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
2331            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
2332    
2333    #if !defined NOUTF8
2334            if (use_utf8 && c > 255)
2335              {
2336              unsigned char buff8[8];
2337              int ii, utn;
2338              utn = ord2utf8(c, buff8);
2339              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2340              c = buff8[ii];   /* Last byte */
2341              }
2342    #endif
2343          break;          break;
2344    
2345          case 'x':          case 'x':
2346    
2347            /* Handle \x{..} specially - new Perl thing for utf8 */
2348    
2349    #if !defined NOUTF8
2350            if (*p == '{')
2351              {
2352              unsigned char *pt = p;
2353              c = 0;
2354    
2355              /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
2356              when isxdigit() is a macro that refers to its argument more than
2357              once. This is banned by the C Standard, but apparently happens in at
2358              least one MacOS environment. */
2359    
2360              for (pt++; isxdigit(*pt); pt++)
2361                c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
2362              if (*pt == '}')
2363                {
2364                unsigned char buff8[8];
2365                int ii, utn;
2366                if (use_utf8)
2367                  {
2368                  utn = ord2utf8(c, buff8);
2369                  for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2370                  c = buff8[ii];   /* Last byte */
2371                  }
2372                else
2373                 {
2374                 if (c > 255)
2375                   fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2376                     "UTF-8 mode is not enabled.\n"
2377                     "** Truncation will probably give the wrong result.\n", c);
2378                 }
2379                p = pt + 1;
2380                break;
2381                }
2382              /* Not correct form; fall through */
2383              }
2384    #endif
2385    
2386            /* Ordinary \x */
2387    
2388          c = 0;          c = 0;
2389          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
2390            {            {
2391            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
2392            p++;            p++;
2393            }            }
2394          break;          break;
2395    
2396          case 0:   /* Allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
2397          p--;          p--;
2398          continue;          continue;
2399    
2400            case '>':
2401            if (*p == '-')
2402              {
2403              start_offset_sign = -1;
2404              p++;
2405              }
2406            while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2407            start_offset *= start_offset_sign;
2408            continue;
2409    
2410          case 'A':  /* Option setting */          case 'A':  /* Option setting */
2411          options |= PCRE_ANCHORED;          options |= PCRE_ANCHORED;
2412          continue;          continue;
# Line 817  while (!done) Line 2416  while (!done)
2416          continue;          continue;
2417    
2418          case 'C':          case 'C':
2419          while(isdigit(*p)) n = n * 10 + *p++ - '0';          if (isdigit(*p))    /* Set copy string */
2420          copystrings |= 1 << n;            {
2421              while(isdigit(*p)) n = n * 10 + *p++ - '0';
2422              copystrings |= 1 << n;
2423              }
2424            else if (isalnum(*p))
2425              {
2426              uschar *npp = copynamesptr;
2427              while (isalnum(*p)) *npp++ = *p++;
2428              *npp++ = 0;
2429              *npp = 0;
2430              n = pcre_get_stringnumber(re, (char *)copynamesptr);
2431              if (n < 0)
2432                fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
2433              copynamesptr = npp;
2434              }
2435            else if (*p == '+')
2436              {
2437              callout_extra = 1;
2438              p++;
2439              }
2440            else if (*p == '-')
2441              {
2442              pcre_callout = NULL;
2443              p++;
2444              }
2445            else if (*p == '!')
2446              {
2447              callout_fail_id = 0;
2448              p++;
2449              while(isdigit(*p))
2450                callout_fail_id = callout_fail_id * 10 + *p++ - '0';
2451              callout_fail_count = 0;
2452              if (*p == '!')
2453                {
2454                p++;
2455                while(isdigit(*p))
2456                  callout_fail_count = callout_fail_count * 10 + *p++ - '0';
2457                }
2458              }
2459            else if (*p == '*')
2460              {
2461              int sign = 1;
2462              callout_data = 0;
2463              if (*(++p) == '-') { sign = -1; p++; }
2464              while(isdigit(*p))
2465                callout_data = callout_data * 10 + *p++ - '0';
2466              callout_data *= sign;
2467              callout_data_set = 1;
2468              }
2469            continue;
2470    
2471    #if !defined NODFA
2472            case 'D':
2473    #if !defined NOPOSIX
2474            if (posix || do_posix)
2475              printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
2476            else
2477    #endif
2478              use_dfa = 1;
2479            continue;
2480    #endif
2481    
2482    #if !defined NODFA
2483            case 'F':
2484            options |= PCRE_DFA_SHORTEST;
2485          continue;          continue;
2486    #endif
2487    
2488          case 'G':          case 'G':
2489            if (isdigit(*p))
2490              {
2491              while(isdigit(*p)) n = n * 10 + *p++ - '0';
2492              getstrings |= 1 << n;
2493              }
2494            else if (isalnum(*p))
2495              {
2496              uschar *npp = getnamesptr;
2497              while (isalnum(*p)) *npp++ = *p++;
2498              *npp++ = 0;
2499              *npp = 0;
2500              n = pcre_get_stringnumber(re, (char *)getnamesptr);
2501              if (n < 0)
2502                fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
2503              getnamesptr = npp;
2504              }
2505            continue;
2506    
2507            case 'J':
2508          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
2509          getstrings |= 1 << n;          if (extra != NULL
2510                && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
2511                && extra->executable_jit != NULL)
2512              {
2513              if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
2514              jit_stack = pcre_jit_stack_alloc(1, n * 1024);
2515              pcre_assign_jit_stack(extra, jit_callback, jit_stack);
2516              }
2517          continue;          continue;
2518    
2519          case 'L':          case 'L':
2520          getlist = 1;          getlist = 1;
2521          continue;          continue;
2522    
2523            case 'M':
2524            find_match_limit = 1;
2525            continue;
2526    
2527          case 'N':          case 'N':
2528          options |= PCRE_NOTEMPTY;          if ((options & PCRE_NOTEMPTY) != 0)
2529              options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2530            else
2531              options |= PCRE_NOTEMPTY;
2532          continue;          continue;
2533    
2534          case 'O':          case 'O':
2535          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
2536          if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;          if (n > size_offsets_max)
2537              {
2538              size_offsets_max = n;
2539              free(offsets);
2540              use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
2541              if (offsets == NULL)
2542                {
2543                printf("** Failed to get %d bytes of memory for offsets vector\n",
2544                  (int)(size_offsets_max * sizeof(int)));
2545                yield = 1;
2546                goto EXIT;
2547                }
2548              }
2549            use_size_offsets = n;
2550            if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
2551            continue;
2552    
2553            case 'P':
2554            options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2555              PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2556            continue;
2557    
2558            case 'Q':
2559            while(isdigit(*p)) n = n * 10 + *p++ - '0';
2560            if (extra == NULL)
2561              {
2562              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2563              extra->flags = 0;
2564              }
2565            extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2566            extra->match_limit_recursion = n;
2567            continue;
2568    
2569            case 'q':
2570            while(isdigit(*p)) n = n * 10 + *p++ - '0';
2571            if (extra == NULL)
2572              {
2573              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2574              extra->flags = 0;
2575              }
2576            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2577            extra->match_limit = n;
2578            continue;
2579    
2580    #if !defined NODFA
2581            case 'R':
2582            options |= PCRE_DFA_RESTART;
2583            continue;
2584    #endif
2585    
2586            case 'S':
2587            show_malloc = 1;
2588            continue;
2589    
2590            case 'Y':
2591            options |= PCRE_NO_START_OPTIMIZE;
2592          continue;          continue;
2593    
2594          case 'Z':          case 'Z':
2595          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
2596          continue;          continue;
2597    
2598            case '?':
2599            options |= PCRE_NO_UTF8_CHECK;
2600            continue;
2601    
2602            case '<':
2603              {
2604              int x = check_newline(p, outfile);
2605              if (x == 0) goto NEXT_DATA;
2606              options |= x;
2607              while (*p++ != '>');
2608              }
2609            continue;
2610          }          }
2611        *q++ = c;        *q++ = c;
2612        }        }
2613      *q = 0;      *q = 0;
2614      len = q - dbuffer;      len = (int)(q - dbuffer);
2615    
2616        /* Move the data to the end of the buffer so that a read over the end of
2617        the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2618        we are using the POSIX interface, we must include the terminating zero. */
2619    
2620    #if !defined NOPOSIX
2621        if (posix || do_posix)
2622          {
2623          memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2624          bptr += buffer_size - len - 1;
2625          }
2626        else
2627    #endif
2628          {
2629          memmove(bptr + buffer_size - len, bptr, len);
2630          bptr += buffer_size - len;
2631          }
2632    
2633        if ((all_use_dfa || use_dfa) && find_match_limit)
2634          {
2635          printf("**Match limit not relevant for DFA matching: ignored\n");
2636          find_match_limit = 0;
2637          }
2638    
2639      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
2640      support timing. */      support timing or playing with the match limit or callout data. */
2641    
2642  #if !defined NOPOSIX  #if !defined NOPOSIX
2643      if (posix || do_posix)      if (posix || do_posix)
2644        {        {
2645        int rc;        int rc;
2646        int eflags = 0;        int eflags = 0;
2647        regmatch_t pmatch[sizeof(offsets)/sizeof(int)];        regmatch_t *pmatch = NULL;
2648          if (use_size_offsets > 0)
2649            pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2650        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2651        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2652          if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2653    
2654        rc = regexec(&preg, (const char *)bptr, size_offsets, pmatch, eflags);        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2655    
2656        if (rc != 0)        if (rc != 0)
2657          {          {
2658          (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));          (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2659          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2660          }          }
2661          else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2662                  != 0)
2663            {
2664            fprintf(outfile, "Matched with REG_NOSUB\n");
2665            }
2666        else        else
2667          {          {
2668          size_t i;          size_t i;
2669          for (i = 0; i < size_offsets; i++)          for (i = 0; i < (size_t)use_size_offsets; i++)
2670            {            {
2671            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
2672              {              {
2673              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
2674              pchars(dbuffer + pmatch[i].rm_so,              (void)pchars(dbuffer + pmatch[i].rm_so,
2675                pmatch[i].rm_eo - pmatch[i].rm_so);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2676              fprintf(outfile, "\n");              fprintf(outfile, "\n");
2677              if (i == 0 && do_showrest)              if (do_showcaprest || (i == 0 && do_showrest))
2678                {                {
2679                fprintf(outfile, " 0+ ");                fprintf(outfile, "%2d+ ", (int)i);
2680                pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo);                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2681                    outfile);
2682                fprintf(outfile, "\n");                fprintf(outfile, "\n");
2683                }                }
2684              }              }
2685            }            }
2686          }          }
2687          free(pmatch);
2688        }        }
2689    
2690      /* Handle matching via the native interface - repeats for /g and /G */      /* Handle matching via the native interface - repeats for /g and /G */
# Line 896  while (!done) Line 2694  while (!done)
2694    
2695      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
2696        {        {
2697        if (timeit)        markptr = NULL;
2698    
2699          if (timeitm > 0)
2700          {          {
2701          register int i;          register int i;
2702          clock_t time_taken;          clock_t time_taken;
2703          clock_t start_time = clock();          clock_t start_time = clock();
2704          for (i = 0; i < LOOPREPEAT; i++)  
2705    #if !defined NODFA
2706            if (all_use_dfa || use_dfa)
2707              {
2708              int workspace[1000];
2709              for (i = 0; i < timeitm; i++)
2710                count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2711                  options | g_notempty, use_offsets, use_size_offsets, workspace,
2712                  sizeof(workspace)/sizeof(int));
2713              }
2714            else
2715    #endif
2716    
2717            for (i = 0; i < timeitm; i++)
2718            count = pcre_exec(re, extra, (char *)bptr, len,            count = pcre_exec(re, extra, (char *)bptr, len,
2719              start_offset, options | g_notempty, offsets, size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
2720    
2721          time_taken = clock() - start_time;          time_taken = clock() - start_time;
2722          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
2723            ((double)time_taken * 1000.0)/            (((double)time_taken * 1000.0) / (double)timeitm) /
2724            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));              (double)CLOCKS_PER_SEC);
2725            }
2726    
2727          /* If find_match_limit is set, we want to do repeated matches with
2728          varying limits in order to find the minimum value for the match limit and
2729          for the recursion limit. The match limits are relevant only to the normal
2730          running of pcre_exec(), so disable the JIT optimization. This makes it
2731          possible to run the same set of tests with and without JIT externally
2732          requested. */
2733    
2734          if (find_match_limit)
2735            {
2736            if (extra == NULL)
2737              {
2738              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2739              extra->flags = 0;
2740              }
2741            else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
2742    
2743            (void)check_match_limit(re, extra, bptr, len, start_offset,
2744              options|g_notempty, use_offsets, use_size_offsets,
2745              PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2746              PCRE_ERROR_MATCHLIMIT, "match()");
2747    
2748            count = check_match_limit(re, extra, bptr, len, start_offset,
2749              options|g_notempty, use_offsets, use_size_offsets,
2750              PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2751              PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2752          }          }
2753    
2754        count = pcre_exec(re, extra, (char *)bptr, len,        /* If callout_data is set, use the interface with additional data */
2755          start_offset, options | g_notempty, offsets, size_offsets);  
2756          else if (callout_data_set)
2757            {
2758            if (extra == NULL)
2759              {
2760              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2761              extra->flags = 0;
2762              }
2763            extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2764            extra->callout_data = &callout_data;
2765            count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2766              options | g_notempty, use_offsets, use_size_offsets);
2767            extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2768            }
2769    
2770          /* The normal case is just to do the match once, with the default
2771          value of match_limit. */
2772    
2773    #if !defined NODFA
2774          else if (all_use_dfa || use_dfa)
2775            {
2776            int workspace[1000];
2777            count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2778              options | g_notempty, use_offsets, use_size_offsets, workspace,
2779              sizeof(workspace)/sizeof(int));
2780            if (count == 0)
2781              {
2782              fprintf(outfile, "Matched, but too many subsidiary matches\n");
2783              count = use_size_offsets/2;
2784              }
2785            }
2786    #endif
2787    
2788        if (count == 0)        else
2789          {          {
2790          fprintf(outfile, "Matched, but too many substrings\n");          count = pcre_exec(re, extra, (char *)bptr, len,
2791          count = size_offsets/3;            start_offset, options | g_notempty, use_offsets, use_size_offsets);
2792            if (count == 0)
2793              {
2794              fprintf(outfile, "Matched, but too many substrings\n");
2795              count = use_size_offsets/3;
2796              }
2797          }          }
2798    
2799        /* Matched */        /* Matched */
2800    
2801        if (count >= 0)        if (count >= 0)
2802          {          {
2803          int i;          int i, maxcount;
2804    
2805    #if !defined NODFA
2806            if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2807    #endif
2808              maxcount = use_size_offsets/3;
2809    
2810            /* This is a check against a lunatic return value. */
2811    
2812            if (count > maxcount)
2813              {
2814              fprintf(outfile,
2815                "** PCRE error: returned count %d is too big for offset size %d\n",
2816                count, use_size_offsets);
2817              count = use_size_offsets/3;
2818              if (do_g || do_G)
2819                {
2820                fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2821                do_g = do_G = FALSE;        /* Break g/G loop */
2822                }
2823              }
2824    
2825            /* do_allcaps requests showing of all captures in the pattern, to check
2826            unset ones at the end. */
2827    
2828            if (do_allcaps)
2829              {
2830              new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
2831              count++;   /* Allow for full match */
2832              if (count * 2 > use_size_offsets) count = use_size_offsets/2;
2833              }
2834    
2835            /* Output the captured substrings */
2836    
2837          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
2838            {            {
2839            if (offsets[i] < 0)            if (use_offsets[i] < 0)
2840                {
2841                if (use_offsets[i] != -1)
2842                  fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
2843                    use_offsets[i], i);
2844                if (use_offsets[i+1] != -1)
2845                  fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
2846                    use_offsets[i+1], i+1);
2847              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
2848                }
2849            else            else
2850              {              {
2851              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
2852              pchars(bptr + offsets[i], offsets[i+1] - offsets[i]);              (void)pchars(bptr + use_offsets[i],
2853                  use_offsets[i+1] - use_offsets[i], outfile);
2854              fprintf(outfile, "\n");              fprintf(outfile, "\n");
2855              if (i == 0)              if (do_showcaprest || (i == 0 && do_showrest))
2856                {                {
2857                if (do_showrest)                fprintf(outfile, "%2d+ ", i/2);
2858                  {                (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2859                  fprintf(outfile, " 0+ ");                  outfile);
2860                  pchars(bptr + offsets[i+1], len - offsets[i+1]);                fprintf(outfile, "\n");
                 fprintf(outfile, "\n");  
                 }  
2861                }                }
2862              }              }
2863            }            }
2864    
2865            if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
2866    
2867          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
2868            {            {
2869            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
2870              {              {
2871              char copybuffer[16];              char copybuffer[256];
2872              int rc = pcre_copy_substring((char *)bptr, offsets, count,              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2873                i, copybuffer, sizeof(copybuffer));                i, copybuffer, sizeof(copybuffer));
2874              if (rc < 0)              if (rc < 0)
2875                fprintf(outfile, "copy substring %d failed %d\n", i, rc);                fprintf(outfile, "copy substring %d failed %d\n", i, rc);
# Line 959  while (!done) Line 2878  while (!done)
2878              }              }
2879            }            }
2880    
2881            for (copynamesptr = copynames;
2882                 *copynamesptr != 0;
2883                 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2884              {
2885              char copybuffer[256];
2886              int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2887                count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2888              if (rc < 0)
2889                fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2890              else
2891                fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2892              }
2893    
2894          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
2895            {            {
2896            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
2897              {              {
2898              const char *substring;              const char *substring;
2899              int rc = pcre_get_substring((char *)bptr, offsets, count,              int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2900                i, &substring);                i, &substring);
2901              if (rc < 0)              if (rc < 0)
2902                fprintf(outfile, "get substring %d failed %d\n", i, rc);                fprintf(outfile, "get substring %d failed %d\n", i, rc);
2903              else              else
2904                {                {
2905                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2906                free((void *)substring);                pcre_free_substring(substring);
2907                }                }
2908              }              }
2909            }            }
2910    
2911            for (getnamesptr = getnames;
2912                 *getnamesptr != 0;
2913                 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2914              {
2915              const char *substring;
2916              int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2917                count, (char *)getnamesptr, &substring);
2918              if (rc < 0)
2919                fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2920              else
2921                {
2922                fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);
2923                pcre_free_substring(substring);
2924                }
2925              }
2926    
2927          if (getlist)          if (getlist)
2928            {            {
2929            const char **stringlist;            const char **stringlist;
2930            int rc = pcre_get_substring_list((char *)bptr, offsets, count,            int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2931              &stringlist);              &stringlist);
2932            if (rc < 0)            if (rc < 0)
2933              fprintf(outfile, "get substring list failed %d\n", rc);              fprintf(outfile, "get substring list failed %d\n", rc);
# Line 989  while (!done) Line 2937  while (!done)
2937                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2938              if (stringlist[i] != NULL)              if (stringlist[i] != NULL)
2939                fprintf(outfile, "string list not terminated by NULL\n");                fprintf(outfile, "string list not terminated by NULL\n");
2940              free((void *)stringlist);              pcre_free_substring_list(stringlist);
2941              }              }
2942            }            }
2943          }          }
2944    
2945          /* There was a partial match */
2946    
2947          else if (count == PCRE_ERROR_PARTIAL)
2948            {
2949            if (markptr == NULL) fprintf(outfile, "Partial match");
2950              else fprintf(outfile, "Partial match, mark=%s", markptr);
2951            if (use_size_offsets > 1)
2952              {
2953              fprintf(outfile, ": ");
2954              pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2955                outfile);
2956              }
2957            fprintf(outfile, "\n");
2958            break;  /* Out of the /g loop */
2959            }
2960    
2961        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
2962        PCRE_NOTEMPTY after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end. We want
2963        We want to advance the start offset, and continue. Fudge the offset        to advance the start offset, and continue. We won't be at the end of the
2964        values to achieve this. We won't be at the end of the string - that        string - that was checked before setting g_notempty.
2965        was checked before setting PCRE_NOTEMPTY. */  
2966          Complication arises in the case when the newline convention is "any",
2967          "crlf", or "anycrlf". If the previous match was at the end of a line
2968          terminated by CRLF, an advance of one character just passes the \r,
2969          whereas we should prefer the longer newline sequence, as does the code in
2970          pcre_exec(). Fudge the offset value to achieve this. We check for a
2971          newline setting in the pattern; if none was set, use pcre_config() to
2972          find the default.
2973    
2974          Otherwise, in the case of UTF-8 matching, the advance must be one
2975          character, not one byte. */
2976    
2977        else        else
2978          {          {
2979          if (g_notempty != 0)          if (g_notempty != 0)
2980            {            {
2981            offsets[0] = start_offset;            int onechar = 1;
2982            offsets[1] = start_offset + 1;            unsigned int obits = ((real_pcre *)re)->options;
2983              use_offsets[0] = start_offset;
2984              if ((obits & PCRE_NEWLINE_BITS) == 0)
2985                {
2986                int d;
2987                (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2988                /* Note that these values are always the ASCII ones, even in
2989                EBCDIC environments. CR = 13, NL = 10. */
2990                obits = (d == 13)? PCRE_NEWLINE_CR :
2991                        (d == 10)? PCRE_NEWLINE_LF :
2992                        (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
2993                        (d == -2)? PCRE_NEWLINE_ANYCRLF :
2994                        (d == -1)? PCRE_NEWLINE_ANY : 0;
2995                }
2996              if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2997                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
2998                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2999                  &&
3000                  start_offset < len - 1 &&
3001                  bptr[start_offset] == '\r' &&
3002                  bptr[start_offset+1] == '\n')
3003                onechar++;
3004              else if (use_utf8)
3005                {
3006                while (start_offset + onechar < len)
3007                  {
3008                  if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
3009                  onechar++;
3010                  }
3011                }
3012              use_offsets[1] = start_offset + onechar;
3013            }            }
3014          else          else
3015            {            {
3016            if (gmatched == 0)   /* Error if no previous matches */            switch(count)
3017              {              {
3018              if (count == -1) fprintf(outfile, "No match\n");              case PCRE_ERROR_NOMATCH:
3019                else fprintf(outfile, "Error %d\n", count);              if (gmatched == 0)
3020                  {
3021                  if (markptr == NULL) fprintf(outfile, "No match\n");
3022                    else fprintf(outfile, "No match, mark = %s\n", markptr);
3023                  }
3024                break;
3025    
3026                case PCRE_ERROR_BADUTF8:
3027                case PCRE_ERROR_SHORTUTF8:
3028                fprintf(outfile, "Error %d (%s UTF-8 string)", count,
3029                  (count == PCRE_ERROR_BADUTF8)? "bad" : "short");
3030                if (use_size_offsets >= 2)
3031                  fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
3032                    use_offsets[1]);
3033                fprintf(outfile, "\n");
3034                break;
3035    
3036                default:
3037                if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
3038                  fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
3039                else
3040                  fprintf(outfile, "Error %d (Unexpected value)\n", count);
3041                break;
3042              }              }
3043    
3044            break;  /* Out of the /g loop */            break;  /* Out of the /g loop */
3045            }            }
3046          }          }
# Line 1023  while (!done) Line 3050  while (!done)
3050        if (!do_g && !do_G) break;        if (!do_g && !do_G) break;
3051    
3052        /* If we have matched an empty string, first check to see if we are at        /* If we have matched an empty string, first check to see if we are at
3053        the end of the subject. If so, the /g loop is over. Otherwise, mimic        the end of the subject. If so, the /g loop is over. Otherwise, mimic what
3054        what Perl's /g options does. This turns out to be rather cunning. First        Perl's /g options does. This turns out to be rather cunning. First we set
3055        we set PCRE_NOTEMPTY and try the match again at the same point. If this        PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
3056        fails (picked up above) we advance to the next character. */        same point. If this fails (picked up above) we advance to the next
3057          character. */
3058    
3059        g_notempty = 0;        g_notempty = 0;
3060        if (offsets[0] == offsets[1])  
3061          if (use_offsets[0] == use_offsets[1])
3062          {          {
3063          if (offsets[0] == len) break;          if (use_offsets[0] == len) break;
3064          g_notempty = PCRE_NOTEMPTY;          g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
3065          }          }
3066    
3067        /* For /g, update the start offset, leaving the rest alone */        /* For /g, update the start offset, leaving the rest alone */
3068    
3069        if (do_g) start_offset = offsets[1];        if (do_g) start_offset = use_offsets[1];
3070    
3071        /* For /G, update the pointer and length */        /* For /G, update the pointer and length */
3072    
3073        else        else
3074          {          {
3075          bptr += offsets[1];          bptr += use_offsets[1];
3076          len -= offsets[1];          len -= use_offsets[1];
3077          }          }
3078        }  /* End of loop for /g and /G */        }  /* End of loop for /g and /G */
3079    
3080        NEXT_DATA: continue;
3081      }    /* End of loop for data lines */      }    /* End of loop for data lines */
3082    
3083    CONTINUE:    CONTINUE:
# Line 1055  while (!done) Line 3086  while (!done)
3086    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
3087  #endif  #endif
3088    
3089    if (re != NULL) free(re);    if (re != NULL) new_free(re);
3090    if (extra != NULL) free(extra);    if (extra != NULL) pcre_free_study(extra);
3091    if (tables != NULL)    if (locale_set)
3092      {      {
3093      free((void *)tables);      new_free((void *)tables);
3094      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
3095        locale_set = 0;
3096        }
3097      if (jit_stack != NULL)
3098        {
3099        pcre_jit_stack_free(jit_stack);
3100        jit_stack = NULL;
3101      }      }
3102    }    }
3103    
3104  fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
3105  return 0;  
3106    EXIT:
3107    
3108    if (infile != NULL && infile != stdin) fclose(infile);
3109    if (outfile != NULL && outfile != stdout) fclose(outfile);
3110    
3111    free(buffer);
3112    free(dbuffer);
3113    free(pbuffer);
3114    free(offsets);
3115    
3116    return yield;
3117  }  }
3118    
3119  /* End */  /* End of pcretest.c */

Legend:
Removed from v.43  
changed lines
  Added in v.773

  ViewVC Help
Powered by ViewVC 1.1.5