/[pcre]/code/branches/pcre16/pcretest.c
ViewVC logotype

Diff of /code/branches/pcre16/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 73 by nigel, Sat Feb 24 21:40:30 2007 UTC revision 580 by ph10, Fri Nov 26 11:16:43 2010 UTC
# Line 4  Line 4 
4    
5  /* This program was hacked up as a tester for PCRE. I really should have  /* This program was hacked up as a tester for PCRE. I really should have
6  written it more tidily in the first place. Will I ever learn? It has grown and  written it more tidily in the first place. Will I ever learn? It has grown and
7  been extended and consequently is now rather untidy in places. */  been extended and consequently is now rather, er, *very* untidy in places.
8    
9    -----------------------------------------------------------------------------
10    Redistribution and use in source and binary forms, with or without
11    modification, are permitted provided that the following conditions are met:
12    
13        * Redistributions of source code must retain the above copyright notice,
14          this list of conditions and the following disclaimer.
15    
16        * Redistributions in binary form must reproduce the above copyright
17          notice, this list of conditions and the following disclaimer in the
18          documentation and/or other materials provided with the distribution.
19    
20        * Neither the name of the University of Cambridge nor the names of its
21          contributors may be used to endorse or promote products derived from
22          this software without specific prior written permission.
23    
24    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34    POSSIBILITY OF SUCH DAMAGE.
35    -----------------------------------------------------------------------------
36    */
37    
38    
39    #ifdef HAVE_CONFIG_H
40    #include "config.h"
41    #endif
42    
43  #include <ctype.h>  #include <ctype.h>
44  #include <stdio.h>  #include <stdio.h>
# Line 12  been extended and consequently is now ra Line 46  been extended and consequently is now ra
46  #include <stdlib.h>  #include <stdlib.h>
47  #include <time.h>  #include <time.h>
48  #include <locale.h>  #include <locale.h>
49    #include <errno.h>
50    
51    #ifdef SUPPORT_LIBREADLINE
52    #ifdef HAVE_UNISTD_H
53    #include <unistd.h>
54    #endif
55    #include <readline/readline.h>
56    #include <readline/history.h>
57    #endif
58    
59    
60    /* A number of things vary for Windows builds. Originally, pcretest opened its
61    input and output without "b"; then I was told that "b" was needed in some
62    environments, so it was added for release 5.0 to both the input and output. (It
63    makes no difference on Unix-like systems.) Later I was told that it is wrong
64    for the input on Windows. I've now abstracted the modes into two macros that
65    are set here, to make it easier to fiddle with them, and removed "b" from the
66    input mode under Windows. */
67    
68    #if defined(_WIN32) || defined(WIN32)
69    #include <io.h>                /* For _setmode() */
70    #include <fcntl.h>             /* For _O_BINARY */
71    #define INPUT_MODE   "r"
72    #define OUTPUT_MODE  "wb"
73    
74    #ifndef isatty
75    #define isatty _isatty         /* This is what Windows calls them, I'm told, */
76    #endif                         /* though in some environments they seem to   */
77                                   /* be already defined, hence the #ifndefs.    */
78    #ifndef fileno
79    #define fileno _fileno
80    #endif
81    
82    /* A user sent this fix for Borland Builder 5 under Windows. */
83    
84    #ifdef __BORLANDC__
85    #define _setmode(handle, mode) setmode(handle, mode)
86    #endif
87    
88    /* Not Windows */
89    
90    #else
91    #include <sys/time.h>          /* These two includes are needed */
92    #include <sys/resource.h>      /* for setrlimit(). */
93    #define INPUT_MODE   "rb"
94    #define OUTPUT_MODE  "wb"
95    #endif
96    
 /* We need the internal info for displaying the results of pcre_study(). Also  
 for getting the opcodes for showing compiled code. */  
97    
98  #define PCRE_SPY        /* For Win32 build, import data, not export */  /* We have to include pcre_internal.h because we need the internal info for
99  #include "internal.h"  displaying the results of pcre_study() and we also need to know about the
100    internal macros, structures, and other internal data values; pcretest has
101    "inside information" compared to a program that strictly follows the PCRE API.
102    
103    Although pcre_internal.h does itself include pcre.h, we explicitly include it
104    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
105    appropriately for an application, not for building PCRE. */
106    
107    #include "pcre.h"
108    #include "pcre_internal.h"
109    
110    /* We need access to some of the data tables that PCRE uses. So as not to have
111    to keep two copies, we include the source file here, changing the names of the
112    external symbols to prevent clashes. */
113    
114    #define _pcre_ucp_gentype      ucp_gentype
115    #define _pcre_utf8_table1      utf8_table1
116    #define _pcre_utf8_table1_size utf8_table1_size
117    #define _pcre_utf8_table2      utf8_table2
118    #define _pcre_utf8_table3      utf8_table3
119    #define _pcre_utf8_table4      utf8_table4
120    #define _pcre_utt              utt
121    #define _pcre_utt_size         utt_size
122    #define _pcre_utt_names        utt_names
123    #define _pcre_OP_lengths       OP_lengths
124    
125    #include "pcre_tables.c"
126    
127    /* We also need the pcre_printint() function for printing out compiled
128    patterns. This function is in a separate file so that it can be included in
129    pcre_compile.c when that module is compiled with debugging enabled. It needs to
130    know which case is being compiled. */
131    
132    #define COMPILING_PCRETEST
133    #include "pcre_printint.src"
134    
135    /* The definition of the macro PRINTABLE, which determines whether to print an
136    output character as-is or as a hex value when showing compiled patterns, is
137    contained in the printint.src file. We uses it here also, in cases when the
138    locale has not been explicitly changed, so as to get consistent output from
139    systems that differ in their output from isprint() even in the "C" locale. */
140    
141    #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
142    
143  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
144  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 27  Makefile. */ Line 148  Makefile. */
148  #include "pcreposix.h"  #include "pcreposix.h"
149  #endif  #endif
150    
151    /* It is also possible, for the benefit of the version currently imported into
152    Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
153    interface to the DFA matcher (NODFA), and without the doublecheck of the old
154    "info" function (define NOINFOCHECK). In fact, we automatically cut out the
155    UTF8 support if PCRE is built without it. */
156    
157    #ifndef SUPPORT_UTF8
158    #ifndef NOUTF8
159    #define NOUTF8
160    #endif
161    #endif
162    
163    
164    /* Other parameters */
165    
166  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
167  #ifdef CLK_TCK  #ifdef CLK_TCK
168  #define CLOCKS_PER_SEC CLK_TCK  #define CLOCKS_PER_SEC CLK_TCK
# Line 35  Makefile. */ Line 171  Makefile. */
171  #endif  #endif
172  #endif  #endif
173    
174  #define LOOPREPEAT 50000  /* This is the default loop count for timing. */
175    
176  #define BUFFER_SIZE 30000  #define LOOPREPEAT 500000
 #define DBUFFER_SIZE BUFFER_SIZE  
177    
178    /* Static variables */
179    
180  static FILE *outfile;  static FILE *outfile;
181  static int log_store = 0;  static int log_store = 0;
# Line 47  static int callout_count; Line 183  static int callout_count;
183  static int callout_extra;  static int callout_extra;
184  static int callout_fail_count;  static int callout_fail_count;
185  static int callout_fail_id;  static int callout_fail_id;
186    static int debug_lengths;
187  static int first_callout;  static int first_callout;
188    static int locale_set = 0;
189  static int show_malloc;  static int show_malloc;
190  static int use_utf8;  static int use_utf8;
191  static size_t gotten_store;  static size_t gotten_store;
192    
193    /* The buffers grow automatically if very long input lines are encountered. */
194    
195  static const int utf8_table1[] = {  static int buffer_size = 50000;
196    0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};  static uschar *buffer = NULL;
197    static uschar *dbuffer = NULL;
198    static uschar *pbuffer = NULL;
199    
 static const int utf8_table2[] = {  
   0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};  
200    
201  static const int utf8_table3[] = {  /*************************************************
202    0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};  *         Alternate character tables             *
203    *************************************************/
204    
205    /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
206    using the default tables of the library. However, the T option can be used to
207    select alternate sets of tables, for different kinds of testing. Note also that
208    the L (locale) option also adjusts the tables. */
209    
210    /* This is the set of tables distributed as default with PCRE. It recognizes
211    only ASCII characters. */
212    
213    static const unsigned char tables0[] = {
214    
215    /* This table is a lower casing table. */
216    
217        0,  1,  2,  3,  4,  5,  6,  7,
218        8,  9, 10, 11, 12, 13, 14, 15,
219       16, 17, 18, 19, 20, 21, 22, 23,
220       24, 25, 26, 27, 28, 29, 30, 31,
221       32, 33, 34, 35, 36, 37, 38, 39,
222       40, 41, 42, 43, 44, 45, 46, 47,
223       48, 49, 50, 51, 52, 53, 54, 55,
224       56, 57, 58, 59, 60, 61, 62, 63,
225       64, 97, 98, 99,100,101,102,103,
226      104,105,106,107,108,109,110,111,
227      112,113,114,115,116,117,118,119,
228      120,121,122, 91, 92, 93, 94, 95,
229       96, 97, 98, 99,100,101,102,103,
230      104,105,106,107,108,109,110,111,
231      112,113,114,115,116,117,118,119,
232      120,121,122,123,124,125,126,127,
233      128,129,130,131,132,133,134,135,
234      136,137,138,139,140,141,142,143,
235      144,145,146,147,148,149,150,151,
236      152,153,154,155,156,157,158,159,
237      160,161,162,163,164,165,166,167,
238      168,169,170,171,172,173,174,175,
239      176,177,178,179,180,181,182,183,
240      184,185,186,187,188,189,190,191,
241      192,193,194,195,196,197,198,199,
242      200,201,202,203,204,205,206,207,
243      208,209,210,211,212,213,214,215,
244      216,217,218,219,220,221,222,223,
245      224,225,226,227,228,229,230,231,
246      232,233,234,235,236,237,238,239,
247      240,241,242,243,244,245,246,247,
248      248,249,250,251,252,253,254,255,
249    
250    /* This table is a case flipping table. */
251    
252        0,  1,  2,  3,  4,  5,  6,  7,
253        8,  9, 10, 11, 12, 13, 14, 15,
254       16, 17, 18, 19, 20, 21, 22, 23,
255       24, 25, 26, 27, 28, 29, 30, 31,
256       32, 33, 34, 35, 36, 37, 38, 39,
257       40, 41, 42, 43, 44, 45, 46, 47,
258       48, 49, 50, 51, 52, 53, 54, 55,
259       56, 57, 58, 59, 60, 61, 62, 63,
260       64, 97, 98, 99,100,101,102,103,
261      104,105,106,107,108,109,110,111,
262      112,113,114,115,116,117,118,119,
263      120,121,122, 91, 92, 93, 94, 95,
264       96, 65, 66, 67, 68, 69, 70, 71,
265       72, 73, 74, 75, 76, 77, 78, 79,
266       80, 81, 82, 83, 84, 85, 86, 87,
267       88, 89, 90,123,124,125,126,127,
268      128,129,130,131,132,133,134,135,
269      136,137,138,139,140,141,142,143,
270      144,145,146,147,148,149,150,151,
271      152,153,154,155,156,157,158,159,
272      160,161,162,163,164,165,166,167,
273      168,169,170,171,172,173,174,175,
274      176,177,178,179,180,181,182,183,
275      184,185,186,187,188,189,190,191,
276      192,193,194,195,196,197,198,199,
277      200,201,202,203,204,205,206,207,
278      208,209,210,211,212,213,214,215,
279      216,217,218,219,220,221,222,223,
280      224,225,226,227,228,229,230,231,
281      232,233,234,235,236,237,238,239,
282      240,241,242,243,244,245,246,247,
283      248,249,250,251,252,253,254,255,
284    
285    /* This table contains bit maps for various character classes. Each map is 32
286    bytes long and the bits run from the least significant end of each byte. The
287    classes that have their own maps are: space, xdigit, digit, upper, lower, word,
288    graph, print, punct, and cntrl. Other classes are built from combinations. */
289    
290      0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
291      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
292      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
293      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
294    
295      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
296      0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
297      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
298      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
299    
300      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
301      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
302      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
303      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
304    
305      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
306      0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
307      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
308      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
309    
310      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
311      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
312      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
313      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
314    
315      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
316      0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
317      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
318      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
319    
320      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
321      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
322      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
323      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
324    
325      0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
326      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
327      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
328      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
329    
330      0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
331      0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
332      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
333      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
334    
335      0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
336      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
337      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
338      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
339    
340    /* This table identifies various classes of character by individual bits:
341      0x01   white space character
342      0x02   letter
343      0x04   decimal digit
344      0x08   hexadecimal digit
345      0x10   alphanumeric or '_'
346      0x80   regular expression metacharacter or binary zero
347    */
348    
349      0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
350      0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
351      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
352      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
353      0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
354      0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
355      0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
356      0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
357      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
358      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
359      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
360      0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
361      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
362      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
363      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
364      0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
365      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
366      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
367      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
368      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
369      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
370      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
371      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
372      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
373      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
374      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
375      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
376      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
377      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
378      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
379      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
380      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
381    
382    /* This is a set of tables that came orginally from a Windows user. It seems to
383    be at least an approximation of ISO 8859. In particular, there are characters
384    greater than 128 that are marked as spaces, letters, etc. */
385    
386    static const unsigned char tables1[] = {
387    0,1,2,3,4,5,6,7,
388    8,9,10,11,12,13,14,15,
389    16,17,18,19,20,21,22,23,
390    24,25,26,27,28,29,30,31,
391    32,33,34,35,36,37,38,39,
392    40,41,42,43,44,45,46,47,
393    48,49,50,51,52,53,54,55,
394    56,57,58,59,60,61,62,63,
395    64,97,98,99,100,101,102,103,
396    104,105,106,107,108,109,110,111,
397    112,113,114,115,116,117,118,119,
398    120,121,122,91,92,93,94,95,
399    96,97,98,99,100,101,102,103,
400    104,105,106,107,108,109,110,111,
401    112,113,114,115,116,117,118,119,
402    120,121,122,123,124,125,126,127,
403    128,129,130,131,132,133,134,135,
404    136,137,138,139,140,141,142,143,
405    144,145,146,147,148,149,150,151,
406    152,153,154,155,156,157,158,159,
407    160,161,162,163,164,165,166,167,
408    168,169,170,171,172,173,174,175,
409    176,177,178,179,180,181,182,183,
410    184,185,186,187,188,189,190,191,
411    224,225,226,227,228,229,230,231,
412    232,233,234,235,236,237,238,239,
413    240,241,242,243,244,245,246,215,
414    248,249,250,251,252,253,254,223,
415    224,225,226,227,228,229,230,231,
416    232,233,234,235,236,237,238,239,
417    240,241,242,243,244,245,246,247,
418    248,249,250,251,252,253,254,255,
419    0,1,2,3,4,5,6,7,
420    8,9,10,11,12,13,14,15,
421    16,17,18,19,20,21,22,23,
422    24,25,26,27,28,29,30,31,
423    32,33,34,35,36,37,38,39,
424    40,41,42,43,44,45,46,47,
425    48,49,50,51,52,53,54,55,
426    56,57,58,59,60,61,62,63,
427    64,97,98,99,100,101,102,103,
428    104,105,106,107,108,109,110,111,
429    112,113,114,115,116,117,118,119,
430    120,121,122,91,92,93,94,95,
431    96,65,66,67,68,69,70,71,
432    72,73,74,75,76,77,78,79,
433    80,81,82,83,84,85,86,87,
434    88,89,90,123,124,125,126,127,
435    128,129,130,131,132,133,134,135,
436    136,137,138,139,140,141,142,143,
437    144,145,146,147,148,149,150,151,
438    152,153,154,155,156,157,158,159,
439    160,161,162,163,164,165,166,167,
440    168,169,170,171,172,173,174,175,
441    176,177,178,179,180,181,182,183,
442    184,185,186,187,188,189,190,191,
443    224,225,226,227,228,229,230,231,
444    232,233,234,235,236,237,238,239,
445    240,241,242,243,244,245,246,215,
446    248,249,250,251,252,253,254,223,
447    192,193,194,195,196,197,198,199,
448    200,201,202,203,204,205,206,207,
449    208,209,210,211,212,213,214,247,
450    216,217,218,219,220,221,222,255,
451    0,62,0,0,1,0,0,0,
452    0,0,0,0,0,0,0,0,
453    32,0,0,0,1,0,0,0,
454    0,0,0,0,0,0,0,0,
455    0,0,0,0,0,0,255,3,
456    126,0,0,0,126,0,0,0,
457    0,0,0,0,0,0,0,0,
458    0,0,0,0,0,0,0,0,
459    0,0,0,0,0,0,255,3,
460    0,0,0,0,0,0,0,0,
461    0,0,0,0,0,0,12,2,
462    0,0,0,0,0,0,0,0,
463    0,0,0,0,0,0,0,0,
464    254,255,255,7,0,0,0,0,
465    0,0,0,0,0,0,0,0,
466    255,255,127,127,0,0,0,0,
467    0,0,0,0,0,0,0,0,
468    0,0,0,0,254,255,255,7,
469    0,0,0,0,0,4,32,4,
470    0,0,0,128,255,255,127,255,
471    0,0,0,0,0,0,255,3,
472    254,255,255,135,254,255,255,7,
473    0,0,0,0,0,4,44,6,
474    255,255,127,255,255,255,127,255,
475    0,0,0,0,254,255,255,255,
476    255,255,255,255,255,255,255,127,
477    0,0,0,0,254,255,255,255,
478    255,255,255,255,255,255,255,255,
479    0,2,0,0,255,255,255,255,
480    255,255,255,255,255,255,255,127,
481    0,0,0,0,255,255,255,255,
482    255,255,255,255,255,255,255,255,
483    0,0,0,0,254,255,0,252,
484    1,0,0,248,1,0,0,120,
485    0,0,0,0,254,255,255,255,
486    0,0,128,0,0,0,128,0,
487    255,255,255,255,0,0,0,0,
488    0,0,0,0,0,0,0,128,
489    255,255,255,255,0,0,0,0,
490    0,0,0,0,0,0,0,0,
491    128,0,0,0,0,0,0,0,
492    0,1,1,0,1,1,0,0,
493    0,0,0,0,0,0,0,0,
494    0,0,0,0,0,0,0,0,
495    1,0,0,0,128,0,0,0,
496    128,128,128,128,0,0,128,0,
497    28,28,28,28,28,28,28,28,
498    28,28,0,0,0,0,0,128,
499    0,26,26,26,26,26,26,18,
500    18,18,18,18,18,18,18,18,
501    18,18,18,18,18,18,18,18,
502    18,18,18,128,128,0,128,16,
503    0,26,26,26,26,26,26,18,
504    18,18,18,18,18,18,18,18,
505    18,18,18,18,18,18,18,18,
506    18,18,18,128,128,0,0,0,
507    0,0,0,0,0,1,0,0,
508    0,0,0,0,0,0,0,0,
509    0,0,0,0,0,0,0,0,
510    0,0,0,0,0,0,0,0,
511    1,0,0,0,0,0,0,0,
512    0,0,18,0,0,0,0,0,
513    0,0,20,20,0,18,0,0,
514    0,20,18,0,0,0,0,0,
515    18,18,18,18,18,18,18,18,
516    18,18,18,18,18,18,18,18,
517    18,18,18,18,18,18,18,0,
518    18,18,18,18,18,18,18,18,
519    18,18,18,18,18,18,18,18,
520    18,18,18,18,18,18,18,18,
521    18,18,18,18,18,18,18,0,
522    18,18,18,18,18,18,18,18
523    };
524    
525    
526    
527    
528    #ifndef HAVE_STRERROR
529  /*************************************************  /*************************************************
530  *         Print compiled regex                   *  *     Provide strerror() for non-ANSI libraries  *
531  *************************************************/  *************************************************/
532    
533  /* The code for doing this is held in a separate file that is also included in  /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
534  pcre.c when it is compiled with the debug switch. It defines a function called  in their libraries, but can provide the same facility by this simple
535  print_internals(), which uses a table of opcode lengths defined by the macro  alternative function. */
536  OP_LENGTHS, whose name must be OP_lengths. */  
537    extern int   sys_nerr;
538    extern char *sys_errlist[];
539    
540    char *
541    strerror(int n)
542    {
543    if (n < 0 || n >= sys_nerr) return "unknown error number";
544    return sys_errlist[n];
545    }
546    #endif /* HAVE_STRERROR */
547    
548    
549    
550    
551    /*************************************************
552    *        Read or extend an input line            *
553    *************************************************/
554    
555    /* Input lines are read into buffer, but both patterns and data lines can be
556    continued over multiple input lines. In addition, if the buffer fills up, we
557    want to automatically expand it so as to be able to handle extremely large
558    lines that are needed for certain stress tests. When the input buffer is
559    expanded, the other two buffers must also be expanded likewise, and the
560    contents of pbuffer, which are a copy of the input for callouts, must be
561    preserved (for when expansion happens for a data line). This is not the most
562    optimal way of handling this, but hey, this is just a test program!
563    
564    Arguments:
565      f            the file to read
566      start        where in buffer to start (this *must* be within buffer)
567      prompt       for stdin or readline()
568    
569    Returns:       pointer to the start of new data
570                   could be a copy of start, or could be moved
571                   NULL if no data read and EOF reached
572    */
573    
574    static uschar *
575    extend_inputline(FILE *f, uschar *start, const char *prompt)
576    {
577    uschar *here = start;
578    
579    for (;;)
580      {
581      int rlen = (int)(buffer_size - (here - buffer));
582    
583      if (rlen > 1000)
584        {
585        int dlen;
586    
587        /* If libreadline support is required, use readline() to read a line if the
588        input is a terminal. Note that readline() removes the trailing newline, so
589        we must put it back again, to be compatible with fgets(). */
590    
591    #ifdef SUPPORT_LIBREADLINE
592        if (isatty(fileno(f)))
593          {
594          size_t len;
595          char *s = readline(prompt);
596          if (s == NULL) return (here == start)? NULL : start;
597          len = strlen(s);
598          if (len > 0) add_history(s);
599          if (len > rlen - 1) len = rlen - 1;
600          memcpy(here, s, len);
601          here[len] = '\n';
602          here[len+1] = 0;
603          free(s);
604          }
605        else
606    #endif
607    
608        /* Read the next line by normal means, prompting if the file is stdin. */
609    
610          {
611          if (f == stdin) printf("%s", prompt);
612          if (fgets((char *)here, rlen,  f) == NULL)
613            return (here == start)? NULL : start;
614          }
615    
616        dlen = (int)strlen((char *)here);
617        if (dlen > 0 && here[dlen - 1] == '\n') return start;
618        here += dlen;
619        }
620    
621      else
622        {
623        int new_buffer_size = 2*buffer_size;
624        uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
625        uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
626        uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
627    
628        if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
629          {
630          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
631          exit(1);
632          }
633    
634        memcpy(new_buffer, buffer, buffer_size);
635        memcpy(new_pbuffer, pbuffer, buffer_size);
636    
637        buffer_size = new_buffer_size;
638    
639        start = new_buffer + (start - buffer);
640        here = new_buffer + (here - buffer);
641    
642        free(buffer);
643        free(dbuffer);
644        free(pbuffer);
645    
646        buffer = new_buffer;
647        dbuffer = new_dbuffer;
648        pbuffer = new_pbuffer;
649        }
650      }
651    
652    return NULL;  /* Control never gets here */
653    }
654    
655    
656    
 static uschar OP_lengths[] = { OP_LENGTHS };  
657    
 #include "printint.c"  
658    
659    
660    
# Line 85  static uschar OP_lengths[] = { OP_LENGTH Line 664  static uschar OP_lengths[] = { OP_LENGTH
664    
665  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
666  around with conditional compilation, just do the job by hand. It is only used  around with conditional compilation, just do the job by hand. It is only used
667  for unpicking the -o argument, so just keep it simple.  for unpicking arguments, so just keep it simple.
668    
669  Arguments:  Arguments:
670    str           string to be converted    str           string to be converted
# Line 106  return(result); Line 685  return(result);
685    
686    
687    
 /*************************************************  
 *       Convert character value to UTF-8         *  
 *************************************************/  
   
 /* This function takes an integer value in the range 0 - 0x7fffffff  
 and encodes it as a UTF-8 character in 0 to 6 bytes.  
   
 Arguments:  
   cvalue     the character value  
   buffer     pointer to buffer for result - at least 6 bytes long  
   
 Returns:     number of characters placed in the buffer  
              -1 if input character is negative  
              0 if input character is positive but too big (only when  
              int is longer than 32 bits)  
 */  
   
 static int  
 ord2utf8(int cvalue, unsigned char *buffer)  
 {  
 register int i, j;  
 for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)  
   if (cvalue <= utf8_table1[i]) break;  
 if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;  
 if (cvalue < 0) return -1;  
   
 buffer += i;  
 for (j = i; j > 0; j--)  
  {  
  *buffer-- = 0x80 | (cvalue & 0x3f);  
  cvalue >>= 6;  
  }  
 *buffer = utf8_table2[i] | cvalue;  
 return i + 1;  
 }  
   
688    
689  /*************************************************  /*************************************************
690  *            Convert UTF-8 string to value       *  *            Convert UTF-8 string to value       *
# Line 151  return i + 1; Line 694  return i + 1;
694  and returns the value of the character.  and returns the value of the character.
695    
696  Argument:  Argument:
697    buffer   a pointer to the byte vector    utf8bytes   a pointer to the byte vector
698    vptr     a pointer to an int to receive the value    vptr        a pointer to an int to receive the value
699    
700  Returns:   >  0 => the number of bytes consumed  Returns:      >  0 => the number of bytes consumed
701             -6 to 0 => malformed UTF-8 character at offset = (-return)                -6 to 0 => malformed UTF-8 character at offset = (-return)
702  */  */
703    
704    #if !defined NOUTF8
705    
706  static int  static int
707  utf82ord(unsigned char *buffer, int *vptr)  utf82ord(unsigned char *utf8bytes, int *vptr)
708  {  {
709  int c = *buffer++;  int c = *utf8bytes++;
710  int d = c;  int d = c;
711  int i, j, s;  int i, j, s;
712    
# Line 181  d = (c & utf8_table3[i]) << s; Line 726  d = (c & utf8_table3[i]) << s;
726    
727  for (j = 0; j < i; j++)  for (j = 0; j < i; j++)
728    {    {
729    c = *buffer++;    c = *utf8bytes++;
730    if ((c & 0xc0) != 0x80) return -(j+1);    if ((c & 0xc0) != 0x80) return -(j+1);
731    s -= 6;    s -= 6;
732    d |= (c & 0x3f) << s;    d |= (c & 0x3f) << s;
# Line 189  for (j = 0; j < i; j++) Line 734  for (j = 0; j < i; j++)
734    
735  /* Check that encoding was the correct unique one */  /* Check that encoding was the correct unique one */
736    
737  for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)  for (j = 0; j < utf8_table1_size; j++)
738    if (d <= utf8_table1[j]) break;    if (d <= utf8_table1[j]) break;
739  if (j != i) return -(i+1);  if (j != i) return -(i+1);
740    
# Line 199  if (j != i) return -(i+1); Line 744  if (j != i) return -(i+1);
744  return i+1;  return i+1;
745  }  }
746    
747    #endif
748    
749    
750    
751    /*************************************************
752    *       Convert character value to UTF-8         *
753    *************************************************/
754    
755    /* This function takes an integer value in the range 0 - 0x7fffffff
756    and encodes it as a UTF-8 character in 0 to 6 bytes.
757    
758    Arguments:
759      cvalue     the character value
760      utf8bytes  pointer to buffer for result - at least 6 bytes long
761    
762    Returns:     number of characters placed in the buffer
763    */
764    
765    #if !defined NOUTF8
766    
767    static int
768    ord2utf8(int cvalue, uschar *utf8bytes)
769    {
770    register int i, j;
771    for (i = 0; i < utf8_table1_size; i++)
772      if (cvalue <= utf8_table1[i]) break;
773    utf8bytes += i;
774    for (j = i; j > 0; j--)
775     {
776     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
777     cvalue >>= 6;
778     }
779    *utf8bytes = utf8_table2[i] | cvalue;
780    return i + 1;
781    }
782    
783    #endif
784    
785    
786    
787  /*************************************************  /*************************************************
# Line 211  chars without printing. */ Line 794  chars without printing. */
794    
795  static int pchars(unsigned char *p, int length, FILE *f)  static int pchars(unsigned char *p, int length, FILE *f)
796  {  {
797  int c;  int c = 0;
798  int yield = 0;  int yield = 0;
799    
800  while (length-- > 0)  while (length-- > 0)
801    {    {
802    #if !defined NOUTF8
803    if (use_utf8)    if (use_utf8)
804      {      {
805      int rc = utf82ord(p, &c);      int rc = utf82ord(p, &c);
# Line 224  while (length-- > 0) Line 808  while (length-- > 0)
808        {        {
809        length -= rc - 1;        length -= rc - 1;
810        p += rc;        p += rc;
811        if (c < 256 && isprint(c))        if (PRINTHEX(c))
812          {          {
813          if (f != NULL) fprintf(f, "%c", c);          if (f != NULL) fprintf(f, "%c", c);
814          yield++;          yield++;
815          }          }
816        else        else
817          {          {
818          int n;          int n = 4;
819          if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);          if (f != NULL) fprintf(f, "\\x{%02x}", c);
820          yield += n;          yield += (n <= 0x000000ff)? 2 :
821                     (n <= 0x00000fff)? 3 :
822                     (n <= 0x0000ffff)? 4 :
823                     (n <= 0x000fffff)? 5 : 6;
824          }          }
825        continue;        continue;
826        }        }
827      }      }
828    #endif
829    
830     /* Not UTF-8, or malformed UTF-8  */     /* Not UTF-8, or malformed UTF-8  */
831    
832    if (isprint(c = *(p++)))    c = *p++;
833      if (PRINTHEX(c))
834      {      {
835      if (f != NULL) fprintf(f, "%c", c);      if (f != NULL) fprintf(f, "%c", c);
836      yield++;      yield++;
# Line 269  data is not zero. */ Line 858  data is not zero. */
858  static int callout(pcre_callout_block *cb)  static int callout(pcre_callout_block *cb)
859  {  {
860  FILE *f = (first_callout | callout_extra)? outfile : NULL;  FILE *f = (first_callout | callout_extra)? outfile : NULL;
861  int i, pre_start, post_start;  int i, pre_start, post_start, subject_length;
862    
863  if (callout_extra)  if (callout_extra)
864    {    {
# Line 300  pre_start = pchars((unsigned char *)cb-> Line 889  pre_start = pchars((unsigned char *)cb->
889  post_start = pchars((unsigned char *)(cb->subject + cb->start_match),  post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
890    cb->current_position - cb->start_match, f);    cb->current_position - cb->start_match, f);
891    
892    subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
893    
894  (void)pchars((unsigned char *)(cb->subject + cb->current_position),  (void)pchars((unsigned char *)(cb->subject + cb->current_position),
895    cb->subject_length - cb->current_position, f);    cb->subject_length - cb->current_position, f);
896    
897  if (f != NULL) fprintf(f, "\n");  if (f != NULL) fprintf(f, "\n");
898    
899  /* Always print appropriate indicators, with callout number if not already  /* Always print appropriate indicators, with callout number if not already
900  shown */  shown. For automatic callouts, show the pattern offset. */
901    
902  if (callout_extra) fprintf(outfile, "    ");  if (cb->callout_number == 255)
903    else fprintf(outfile, "%3d ", cb->callout_number);    {
904      fprintf(outfile, "%+3d ", cb->pattern_position);
905      if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
906      }
907    else
908      {
909      if (callout_extra) fprintf(outfile, "    ");
910        else fprintf(outfile, "%3d ", cb->callout_number);
911      }
912    
913  for (i = 0; i < pre_start; i++) fprintf(outfile, " ");  for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
914  fprintf(outfile, "^");  fprintf(outfile, "^");
# Line 320  if (post_start > 0) Line 919  if (post_start > 0)
919    fprintf(outfile, "^");    fprintf(outfile, "^");
920    }    }
921    
922    for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
923      fprintf(outfile, " ");
924    
925    fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
926      pbuffer + cb->pattern_position);
927    
928  fprintf(outfile, "\n");  fprintf(outfile, "\n");
929  first_callout = 0;  first_callout = 0;
930    
# Line 350  static void *new_malloc(size_t size) Line 955  static void *new_malloc(size_t size)
955  void *block = malloc(size);  void *block = malloc(size);
956  gotten_store = size;  gotten_store = size;
957  if (show_malloc)  if (show_malloc)
958    fprintf(outfile, "malloc       %3d %p\n", size, block);    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
959  return block;  return block;
960  }  }
961    
# Line 368  static void *stack_malloc(size_t size) Line 973  static void *stack_malloc(size_t size)
973  {  {
974  void *block = malloc(size);  void *block = malloc(size);
975  if (show_malloc)  if (show_malloc)
976    fprintf(outfile, "stack_malloc %3d %p\n", size, block);    fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
977  return block;  return block;
978  }  }
979    
# Line 396  if ((rc = pcre_fullinfo(re, study, optio Line 1001  if ((rc = pcre_fullinfo(re, study, optio
1001    
1002    
1003  /*************************************************  /*************************************************
1004    *         Byte flipping function                 *
1005    *************************************************/
1006    
1007    static unsigned long int
1008    byteflip(unsigned long int value, int n)
1009    {
1010    if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
1011    return ((value & 0x000000ff) << 24) |
1012           ((value & 0x0000ff00) <<  8) |
1013           ((value & 0x00ff0000) >>  8) |
1014           ((value & 0xff000000) >> 24);
1015    }
1016    
1017    
1018    
1019    
1020    /*************************************************
1021    *        Check match or recursion limit          *
1022    *************************************************/
1023    
1024    static int
1025    check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
1026      int start_offset, int options, int *use_offsets, int use_size_offsets,
1027      int flag, unsigned long int *limit, int errnumber, const char *msg)
1028    {
1029    int count;
1030    int min = 0;
1031    int mid = 64;
1032    int max = -1;
1033    
1034    extra->flags |= flag;
1035    
1036    for (;;)
1037      {
1038      *limit = mid;
1039    
1040      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
1041        use_offsets, use_size_offsets);
1042    
1043      if (count == errnumber)
1044        {
1045        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1046        min = mid;
1047        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1048        }
1049    
1050      else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1051                             count == PCRE_ERROR_PARTIAL)
1052        {
1053        if (mid == min + 1)
1054          {
1055          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1056          break;
1057          }
1058        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1059        max = mid;
1060        mid = (min + mid)/2;
1061        }
1062      else break;    /* Some other error */
1063      }
1064    
1065    extra->flags &= ~flag;
1066    return count;
1067    }
1068    
1069    
1070    
1071    /*************************************************
1072    *         Case-independent strncmp() function    *
1073    *************************************************/
1074    
1075    /*
1076    Arguments:
1077      s         first string
1078      t         second string
1079      n         number of characters to compare
1080    
1081    Returns:    < 0, = 0, or > 0, according to the comparison
1082    */
1083    
1084    static int
1085    strncmpic(uschar *s, uschar *t, int n)
1086    {
1087    while (n--)
1088      {
1089      int c = tolower(*s++) - tolower(*t++);
1090      if (c) return c;
1091      }
1092    return 0;
1093    }
1094    
1095    
1096    
1097    /*************************************************
1098    *         Check newline indicator                *
1099    *************************************************/
1100    
1101    /* This is used both at compile and run-time to check for <xxx> escapes. Print
1102    a message and return 0 if there is no match.
1103    
1104    Arguments:
1105      p           points after the leading '<'
1106      f           file for error message
1107    
1108    Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
1109    */
1110    
1111    static int
1112    check_newline(uschar *p, FILE *f)
1113    {
1114    if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1115    if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1116    if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1117    if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1118    if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1119    if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1120    if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1121    fprintf(f, "Unknown newline type at: <%s\n", p);
1122    return 0;
1123    }
1124    
1125    
1126    
1127    /*************************************************
1128    *             Usage function                     *
1129    *************************************************/
1130    
1131    static void
1132    usage(void)
1133    {
1134    printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
1135    printf("Input and output default to stdin and stdout.\n");
1136    #ifdef SUPPORT_LIBREADLINE
1137    printf("If input is a terminal, readline() is used to read from it.\n");
1138    #else
1139    printf("This version of pcretest is not linked with readline().\n");
1140    #endif
1141    printf("\nOptions:\n");
1142    printf("  -b       show compiled code (bytecode)\n");
1143    printf("  -C       show PCRE compile-time options and exit\n");
1144    printf("  -d       debug: show compiled code and information (-b and -i)\n");
1145    #if !defined NODFA
1146    printf("  -dfa     force DFA matching for all subjects\n");
1147    #endif
1148    printf("  -help    show usage information\n");
1149    printf("  -i       show information about compiled patterns\n"
1150           "  -M       find MATCH_LIMIT minimum for each subject\n"
1151           "  -m       output memory used information\n"
1152           "  -o <n>   set size of offsets vector to <n>\n");
1153    #if !defined NOPOSIX
1154    printf("  -p       use POSIX interface\n");
1155    #endif
1156    printf("  -q       quiet: do not output PCRE version number at start\n");
1157    printf("  -S <n>   set stack size to <n> megabytes\n");
1158    printf("  -s       output store (memory) used information\n"
1159           "  -t       time compilation and execution\n");
1160    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
1161    printf("  -tm      time execution (matching) only\n");
1162    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
1163    }
1164    
1165    
1166    
1167    /*************************************************
1168  *                Main Program                    *  *                Main Program                    *
1169  *************************************************/  *************************************************/
1170    
# Line 408  int main(int argc, char **argv) Line 1177  int main(int argc, char **argv)
1177  FILE *infile = stdin;  FILE *infile = stdin;
1178  int options = 0;  int options = 0;
1179  int study_options = 0;  int study_options = 0;
1180    int default_find_match_limit = FALSE;
1181  int op = 1;  int op = 1;
1182  int timeit = 0;  int timeit = 0;
1183    int timeitm = 0;
1184  int showinfo = 0;  int showinfo = 0;
1185  int showstore = 0;  int showstore = 0;
1186    int quiet = 0;
1187  int size_offsets = 45;  int size_offsets = 45;
1188  int size_offsets_max;  int size_offsets_max;
1189  int *offsets;  int *offsets = NULL;
1190  #if !defined NOPOSIX  #if !defined NOPOSIX
1191  int posix = 0;  int posix = 0;
1192  #endif  #endif
1193  int debug = 0;  int debug = 0;
1194  int done = 0;  int done = 0;
1195    int all_use_dfa = 0;
1196    int yield = 0;
1197    int stack_size;
1198    
1199    /* These vectors store, end-to-end, a list of captured substring names. Assume
1200    that 1024 is plenty long enough for the few names we'll be testing. */
1201    
1202  unsigned char *buffer;  uschar copynames[1024];
1203  unsigned char *dbuffer;  uschar getnames[1024];
1204    
1205    uschar *copynamesptr;
1206    uschar *getnamesptr;
1207    
1208  /* Get buffers from malloc() so that Electric Fence will check their misuse  /* Get buffers from malloc() so that Electric Fence will check their misuse
1209  when I am debugging. */  when I am debugging. They grow automatically when very long lines are read. */
1210    
1211  buffer = (unsigned char *)malloc(BUFFER_SIZE);  buffer = (unsigned char *)malloc(buffer_size);
1212  dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);  dbuffer = (unsigned char *)malloc(buffer_size);
1213    pbuffer = (unsigned char *)malloc(buffer_size);
1214    
1215  /* Static so that new_malloc can use it. */  /* The outfile variable is static so that new_malloc can use it. */
1216    
1217  outfile = stdout;  outfile = stdout;
1218    
1219    /* The following  _setmode() stuff is some Windows magic that tells its runtime
1220    library to translate CRLF into a single LF character. At least, that's what
1221    I've been told: never having used Windows I take this all on trust. Originally
1222    it set 0x8000, but then I was advised that _O_BINARY was better. */
1223    
1224    #if defined(_WIN32) || defined(WIN32)
1225    _setmode( _fileno( stdout ), _O_BINARY );
1226    #endif
1227    
1228  /* Scan options */  /* Scan options */
1229    
1230  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
# Line 442  while (argc > 1 && argv[op][0] == '-') Line 1233  while (argc > 1 && argv[op][0] == '-')
1233    
1234    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
1235      showstore = 1;      showstore = 1;
1236    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1237      else if (strcmp(argv[op], "-b") == 0) debug = 1;
1238    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1239    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1240      else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1241    #if !defined NODFA
1242      else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1243    #endif
1244    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1245        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
1246          *endptr == 0))          *endptr == 0))
# Line 452  while (argc > 1 && argv[op][0] == '-') Line 1248  while (argc > 1 && argv[op][0] == '-')
1248      op++;      op++;
1249      argc--;      argc--;
1250      }      }
1251      else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
1252        {
1253        int both = argv[op][2] == 0;
1254        int temp;
1255        if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
1256                         *endptr == 0))
1257          {
1258          timeitm = temp;
1259          op++;
1260          argc--;
1261          }
1262        else timeitm = LOOPREPEAT;
1263        if (both) timeit = timeitm;
1264        }
1265      else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1266          ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
1267            *endptr == 0))
1268        {
1269    #if defined(_WIN32) || defined(WIN32)
1270        printf("PCRE: -S not supported on this OS\n");
1271        exit(1);
1272    #else
1273        int rc;
1274        struct rlimit rlim;
1275        getrlimit(RLIMIT_STACK, &rlim);
1276        rlim.rlim_cur = stack_size * 1024 * 1024;
1277        rc = setrlimit(RLIMIT_STACK, &rlim);
1278        if (rc != 0)
1279          {
1280        printf("PCRE: setrlimit() failed with error %d\n", rc);
1281        exit(1);
1282          }
1283        op++;
1284        argc--;
1285    #endif
1286        }
1287  #if !defined NOPOSIX  #if !defined NOPOSIX
1288    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
1289  #endif  #endif
1290    else if (strcmp(argv[op], "-C") == 0)    else if (strcmp(argv[op], "-C") == 0)
1291      {      {
1292      int rc;      int rc;
1293        unsigned long int lrc;
1294      printf("PCRE version %s\n", pcre_version());      printf("PCRE version %s\n", pcre_version());
1295      printf("Compiled with\n");      printf("Compiled with\n");
1296      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1297      printf("  %sUTF-8 support\n", rc? "" : "No ");      printf("  %sUTF-8 support\n", rc? "" : "No ");
1298        (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1299        printf("  %sUnicode properties support\n", rc? "" : "No ");
1300      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1301      printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");      /* Note that these values are always the ASCII values, even
1302        in EBCDIC environments. CR is 13 and NL is 10. */
1303        printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
1304          (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1305          (rc == -2)? "ANYCRLF" :
1306          (rc == -1)? "ANY" : "???");
1307        (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1308        printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1309                                         "all Unicode newlines");
1310      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1311      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
1312      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1313      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
1314      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1315      printf("  Default match limit = %d\n", rc);      printf("  Default match limit = %ld\n", lrc);
1316        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1317        printf("  Default recursion depth limit = %ld\n", lrc);
1318      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1319      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s\n", rc? "stack" : "heap");
1320      exit(0);      goto EXIT;
1321        }
1322      else if (strcmp(argv[op], "-help") == 0 ||
1323               strcmp(argv[op], "--help") == 0)
1324        {
1325        usage();
1326        goto EXIT;
1327      }      }
1328    else    else
1329      {      {
1330      printf("** Unknown or malformed option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
1331      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");      usage();
1332      printf("  -C     show PCRE compile-time options and exit\n");      yield = 1;
1333      printf("  -d     debug: show compiled code; implies -i\n"      goto EXIT;
            "  -i     show information about compiled pattern\n"  
            "  -o <n> set size of offsets vector to <n>\n");  
 #if !defined NOPOSIX  
     printf("  -p     use POSIX interface\n");  
 #endif  
     printf("  -s     output store information\n"  
            "  -t     time compilation and execution\n");  
     return 1;  
1334      }      }
1335    op++;    op++;
1336    argc--;    argc--;
# Line 500  offsets = (int *)malloc(size_offsets_max Line 1343  offsets = (int *)malloc(size_offsets_max
1343  if (offsets == NULL)  if (offsets == NULL)
1344    {    {
1345    printf("** Failed to get %d bytes of memory for offsets vector\n",    printf("** Failed to get %d bytes of memory for offsets vector\n",
1346      size_offsets_max * sizeof(int));      (int)(size_offsets_max * sizeof(int)));
1347    return 1;    yield = 1;
1348      goto EXIT;
1349    }    }
1350    
1351  /* Sort out the input and output files */  /* Sort out the input and output files */
1352    
1353  if (argc > 1)  if (argc > 1)
1354    {    {
1355    infile = fopen(argv[op], "r");    infile = fopen(argv[op], INPUT_MODE);
1356    if (infile == NULL)    if (infile == NULL)
1357      {      {
1358      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
1359      return 1;      yield = 1;
1360        goto EXIT;
1361      }      }
1362    }    }
1363    
1364  if (argc > 2)  if (argc > 2)
1365    {    {
1366    outfile = fopen(argv[op+1], "w");    outfile = fopen(argv[op+1], OUTPUT_MODE);
1367    if (outfile == NULL)    if (outfile == NULL)
1368      {      {
1369      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
1370      return 1;      yield = 1;
1371        goto EXIT;
1372      }      }
1373    }    }
1374    
# Line 533  pcre_free = new_free; Line 1379  pcre_free = new_free;
1379  pcre_stack_malloc = stack_malloc;  pcre_stack_malloc = stack_malloc;
1380  pcre_stack_free = stack_free;  pcre_stack_free = stack_free;
1381    
1382  /* Heading line, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
1383    
1384  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1385    
1386  /* Main loop */  /* Main loop */
1387    
# Line 550  while (!done) Line 1396  while (!done)
1396  #endif  #endif
1397    
1398    const char *error;    const char *error;
1399      unsigned char *markptr;
1400    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
1401      unsigned char *to_file = NULL;
1402    const unsigned char *tables = NULL;    const unsigned char *tables = NULL;
1403      unsigned long int true_size, true_study_size = 0;
1404      size_t size, regex_gotten_store;
1405      int do_mark = 0;
1406    int do_study = 0;    int do_study = 0;
1407    int do_debug = debug;    int do_debug = debug;
1408    int do_G = 0;    int do_G = 0;
1409    int do_g = 0;    int do_g = 0;
1410    int do_showinfo = showinfo;    int do_showinfo = showinfo;
1411    int do_showrest = 0;    int do_showrest = 0;
1412    int erroroffset, len, delimiter;    int do_flip = 0;
1413      int erroroffset, len, delimiter, poffset;
1414    
1415    use_utf8 = 0;    use_utf8 = 0;
1416      debug_lengths = 1;
1417    
1418    if (infile == stdin) printf("  re> ");    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
   if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;  
1419    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1420    fflush(outfile);    fflush(outfile);
1421    
# Line 571  while (!done) Line 1423  while (!done)
1423    while (isspace(*p)) p++;    while (isspace(*p)) p++;
1424    if (*p == 0) continue;    if (*p == 0) continue;
1425    
1426    /* Get the delimiter and seek the end of the pattern; if is isn't    /* See if the pattern is to be loaded pre-compiled from a file. */
1427    complete, read more. */  
1428      if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1429        {
1430        unsigned long int magic, get_options;
1431        uschar sbuf[8];
1432        FILE *f;
1433    
1434        p++;
1435        pp = p + (int)strlen((char *)p);
1436        while (isspace(pp[-1])) pp--;
1437        *pp = 0;
1438    
1439        f = fopen((char *)p, "rb");
1440        if (f == NULL)
1441          {
1442          fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1443          continue;
1444          }
1445    
1446        if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1447    
1448        true_size =
1449          (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1450        true_study_size =
1451          (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1452    
1453        re = (real_pcre *)new_malloc(true_size);
1454        regex_gotten_store = gotten_store;
1455    
1456        if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1457    
1458        magic = ((real_pcre *)re)->magic_number;
1459        if (magic != MAGIC_NUMBER)
1460          {
1461          if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1462            {
1463            do_flip = 1;
1464            }
1465          else
1466            {
1467            fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1468            fclose(f);
1469            continue;
1470            }
1471          }
1472    
1473        fprintf(outfile, "Compiled regex%s loaded from %s\n",
1474          do_flip? " (byte-inverted)" : "", p);
1475    
1476        /* Need to know if UTF-8 for printing data strings */
1477    
1478        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1479        use_utf8 = (get_options & PCRE_UTF8) != 0;
1480    
1481        /* Now see if there is any following study data */
1482    
1483        if (true_study_size != 0)
1484          {
1485          pcre_study_data *psd;
1486    
1487          extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1488          extra->flags = PCRE_EXTRA_STUDY_DATA;
1489    
1490          psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1491          extra->study_data = psd;
1492    
1493          if (fread(psd, 1, true_study_size, f) != true_study_size)
1494            {
1495            FAIL_READ:
1496            fprintf(outfile, "Failed to read data from %s\n", p);
1497            if (extra != NULL) new_free(extra);
1498            if (re != NULL) new_free(re);
1499            fclose(f);
1500            continue;
1501            }
1502          fprintf(outfile, "Study data loaded from %s\n", p);
1503          do_study = 1;     /* To get the data output if requested */
1504          }
1505        else fprintf(outfile, "No study data\n");
1506    
1507        fclose(f);
1508        goto SHOW_INFO;
1509        }
1510    
1511      /* In-line pattern (the usual case). Get the delimiter and seek the end of
1512      the pattern; if is isn't complete, read more. */
1513    
1514    delimiter = *p++;    delimiter = *p++;
1515    
1516    if (isalnum(delimiter) || delimiter == '\\')    if (isalnum(delimiter) || delimiter == '\\')
1517      {      {
1518      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");      fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1519      goto SKIP_DATA;      goto SKIP_DATA;
1520      }      }
1521    
1522    pp = p;    pp = p;
1523      poffset = (int)(p - buffer);
1524    
1525    for(;;)    for(;;)
1526      {      {
# Line 593  while (!done) Line 1531  while (!done)
1531        pp++;        pp++;
1532        }        }
1533      if (*pp != 0) break;      if (*pp != 0) break;
1534        if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
     len = BUFFER_SIZE - (pp - buffer);  
     if (len < 256)  
       {  
       fprintf(outfile, "** Expression too long - missing delimiter?\n");  
       goto SKIP_DATA;  
       }  
   
     if (infile == stdin) printf("    > ");  
     if (fgets((char *)pp, len, infile) == NULL)  
1535        {        {
1536        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
1537        done = 1;        done = 1;
# Line 611  while (!done) Line 1540  while (!done)
1540      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1541      }      }
1542    
1543      /* The buffer may have moved while being extended; reset the start of data
1544      pointer to the correct relative point in the buffer. */
1545    
1546      p = buffer + poffset;
1547    
1548    /* If the first character after the delimiter is backslash, make    /* If the first character after the delimiter is backslash, make
1549    the pattern end with backslash. This is purely to provide a way    the pattern end with backslash. This is purely to provide a way
1550    of testing for the error message when a pattern ends with backslash. */    of testing for the error message when a pattern ends with backslash. */
1551    
1552    if (pp[1] == '\\') *pp++ = '\\';    if (pp[1] == '\\') *pp++ = '\\';
1553    
1554    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter, and save a copy of the pattern
1555      for callouts. */
1556    
1557    *pp++ = 0;    *pp++ = 0;
1558      strcpy((char *)pbuffer, (char *)p);
1559    
1560    /* Look for options after final delimiter */    /* Look for options after final delimiter */
1561    
# Line 631  while (!done) Line 1567  while (!done)
1567      {      {
1568      switch (*pp++)      switch (*pp++)
1569        {        {
1570          case 'f': options |= PCRE_FIRSTLINE; break;
1571        case 'g': do_g = 1; break;        case 'g': do_g = 1; break;
1572        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
1573        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
# Line 639  while (!done) Line 1576  while (!done)
1576    
1577        case '+': do_showrest = 1; break;        case '+': do_showrest = 1; break;
1578        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
1579          case 'B': do_debug = 1; break;
1580          case 'C': options |= PCRE_AUTO_CALLOUT; break;
1581        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
1582        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1583          case 'F': do_flip = 1; break;
1584        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
1585        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
1586          case 'J': options |= PCRE_DUPNAMES; break;
1587          case 'K': do_mark = 1; break;
1588        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
1589        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1590    
# Line 652  while (!done) Line 1594  while (!done)
1594    
1595        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
1596        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1597          case 'W': options |= PCRE_UCP; break;
1598        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1599          case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
1600          case 'Z': debug_lengths = 0; break;
1601        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1602        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
1603    
1604          case 'T':
1605          switch (*pp++)
1606            {
1607            case '0': tables = tables0; break;
1608            case '1': tables = tables1; break;
1609    
1610            case '\r':
1611            case '\n':
1612            case ' ':
1613            case 0:
1614            fprintf(outfile, "** Missing table number after /T\n");
1615            goto SKIP_DATA;
1616    
1617            default:
1618            fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
1619            goto SKIP_DATA;
1620            }
1621          break;
1622    
1623        case 'L':        case 'L':
1624        ppp = pp;        ppp = pp;
1625        while (*ppp != '\n' && *ppp != ' ') ppp++;        /* The '\r' test here is so that it works on Windows. */
1626          /* The '0' test is just in case this is an unterminated line. */
1627          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1628        *ppp = 0;        *ppp = 0;
1629        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1630          {          {
1631          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1632          goto SKIP_DATA;          goto SKIP_DATA;
1633          }          }
1634          locale_set = 1;
1635        tables = pcre_maketables();        tables = pcre_maketables();
1636        pp = ppp;        pp = ppp;
1637        break;        break;
1638    
1639        case '\n': case ' ': break;        case '>':
1640          to_file = pp;
1641          while (*pp != 0) pp++;
1642          while (isspace(pp[-1])) pp--;
1643          *pp = 0;
1644          break;
1645    
1646          case '<':
1647            {
1648            if (strncmpic(pp, (uschar *)"JS>", 3) == 0)
1649              {
1650              options |= PCRE_JAVASCRIPT_COMPAT;
1651              pp += 3;
1652              }
1653            else
1654              {
1655              int x = check_newline(pp, outfile);
1656              if (x == 0) goto SKIP_DATA;
1657              options |= x;
1658              while (*pp++ != '>');
1659              }
1660            }
1661          break;
1662    
1663          case '\r':                      /* So that it works in Windows */
1664          case '\n':
1665          case ' ':
1666          break;
1667    
1668        default:        default:
1669        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1670        goto SKIP_DATA;        goto SKIP_DATA;
# Line 685  while (!done) Line 1680  while (!done)
1680      {      {
1681      int rc;      int rc;
1682      int cflags = 0;      int cflags = 0;
1683    
1684      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1685      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1686        if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1687        if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1688        if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1689        if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
1690        if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1691    
1692      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
1693    
1694      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 694  while (!done) Line 1696  while (!done)
1696    
1697      if (rc != 0)      if (rc != 0)
1698        {        {
1699        (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);        (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1700        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1701        goto SKIP_DATA;        goto SKIP_DATA;
1702        }        }
# Line 706  while (!done) Line 1708  while (!done)
1708  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
1709    
1710      {      {
1711      if (timeit)      unsigned long int get_options;
1712    
1713        if (timeit > 0)
1714          {
1715          register int i;
1716          clock_t time_taken;
1717          clock_t start_time = clock();
1718          for (i = 0; i < timeit; i++)
1719            {
1720            re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1721            if (re != NULL) free(re);
1722            }
1723          time_taken = clock() - start_time;
1724          fprintf(outfile, "Compile time %.4f milliseconds\n",
1725            (((double)time_taken * 1000.0) / (double)timeit) /
1726              (double)CLOCKS_PER_SEC);
1727          }
1728    
1729        re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1730    
1731        /* Compilation failed; go back for another re, skipping to blank line
1732        if non-interactive. */
1733    
1734        if (re == NULL)
1735          {
1736          fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1737          SKIP_DATA:
1738          if (infile != stdin)
1739            {
1740            for (;;)
1741              {
1742              if (extend_inputline(infile, buffer, NULL) == NULL)
1743                {
1744                done = 1;
1745                goto CONTINUE;
1746                }
1747              len = (int)strlen((char *)buffer);
1748              while (len > 0 && isspace(buffer[len-1])) len--;
1749              if (len == 0) break;
1750              }
1751            fprintf(outfile, "\n");
1752            }
1753          goto CONTINUE;
1754          }
1755    
1756        /* Compilation succeeded. It is now possible to set the UTF-8 option from
1757        within the regex; check for this so that we know how to process the data
1758        lines. */
1759    
1760        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1761        if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1762    
1763        /* Print information if required. There are now two info-returning
1764        functions. The old one has a limited interface and returns only limited
1765        data. Check that it agrees with the newer one. */
1766    
1767        if (log_store)
1768          fprintf(outfile, "Memory allocation (code space): %d\n",
1769            (int)(gotten_store -
1770                  sizeof(real_pcre) -
1771                  ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1772    
1773        /* Extract the size for possible writing before possibly flipping it,
1774        and remember the store that was got. */
1775    
1776        true_size = ((real_pcre *)re)->size;
1777        regex_gotten_store = gotten_store;
1778    
1779        /* If /S was present, study the regexp to generate additional info to
1780        help with the matching. */
1781    
1782        if (do_study)
1783          {
1784          if (timeit > 0)
1785            {
1786            register int i;
1787            clock_t time_taken;
1788            clock_t start_time = clock();
1789            for (i = 0; i < timeit; i++)
1790              extra = pcre_study(re, study_options, &error);
1791            time_taken = clock() - start_time;
1792            if (extra != NULL) free(extra);
1793            fprintf(outfile, "  Study time %.4f milliseconds\n",
1794              (((double)time_taken * 1000.0) / (double)timeit) /
1795                (double)CLOCKS_PER_SEC);
1796            }
1797          extra = pcre_study(re, study_options, &error);
1798          if (error != NULL)
1799            fprintf(outfile, "Failed to study: %s\n", error);
1800          else if (extra != NULL)
1801            true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1802          }
1803    
1804        /* If /K was present, we set up for handling MARK data. */
1805    
1806        if (do_mark)
1807        {        {
1808        register int i;        if (extra == NULL)
       clock_t time_taken;  
       clock_t start_time = clock();  
       for (i = 0; i < LOOPREPEAT; i++)  
1809          {          {
1810          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1811          if (re != NULL) free(re);          extra->flags = 0;
1812          }          }
1813        time_taken = clock() - start_time;        extra->mark = &markptr;
1814        fprintf(outfile, "Compile time %.3f milliseconds\n",        extra->flags |= PCRE_EXTRA_MARK;
         (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /  
           (double)CLOCKS_PER_SEC);  
1815        }        }
1816    
1817      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      /* If the 'F' option was present, we flip the bytes of all the integer
1818        fields in the regex data block and the study block. This is to make it
1819      /* Compilation failed; go back for another re, skipping to blank line      possible to test PCRE's handling of byte-flipped patterns, e.g. those
1820      if non-interactive. */      compiled on a different architecture. */
1821    
1822      if (re == NULL)      if (do_flip)
1823        {        {
1824        fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);        real_pcre *rre = (real_pcre *)re;
1825        SKIP_DATA:        rre->magic_number =
1826        if (infile != stdin)          byteflip(rre->magic_number, sizeof(rre->magic_number));
1827          {        rre->size = byteflip(rre->size, sizeof(rre->size));
1828          for (;;)        rre->options = byteflip(rre->options, sizeof(rre->options));
1829            {        rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1830            if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)        rre->top_bracket =
1831              {          (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1832              done = 1;        rre->top_backref =
1833              goto CONTINUE;          (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1834              }        rre->first_byte =
1835            len = (int)strlen((char *)buffer);          (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1836            while (len > 0 && isspace(buffer[len-1])) len--;        rre->req_byte =
1837            if (len == 0) break;          (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1838            }        rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1839          fprintf(outfile, "\n");          sizeof(rre->name_table_offset));
1840          rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1841            sizeof(rre->name_entry_size));
1842          rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1843            sizeof(rre->name_count));
1844    
1845          if (extra != NULL)
1846            {
1847            pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1848            rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1849            rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1850            rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1851          }          }
       goto CONTINUE;  
1852        }        }
1853    
1854      /* Compilation succeeded; print data if required. There are now two      /* Extract information from the compiled data if required */
     info-returning functions. The old one has a limited interface and  
     returns only limited data. Check that it agrees with the newer one. */  
1855    
1856      if (log_store)      SHOW_INFO:
1857        fprintf(outfile, "Memory allocation (code space): %d\n",  
1858          (int)(gotten_store -      if (do_debug)
1859                sizeof(real_pcre) -        {
1860                ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));        fprintf(outfile, "------------------------------------------------------------------\n");
1861          pcre_printint(re, outfile, debug_lengths);
1862          }
1863    
1864        /* We already have the options in get_options (see above) */
1865    
1866      if (do_showinfo)      if (do_showinfo)
1867        {        {
1868        unsigned long int get_options;        unsigned long int all_options;
1869    #if !defined NOINFOCHECK
1870        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
1871        int count, backrefmax, first_char, need_char;  #endif
1872          int count, backrefmax, first_char, need_char, okpartial, jchanged,
1873            hascrorlf;
1874        int nameentrysize, namecount;        int nameentrysize, namecount;
1875        const uschar *nametable;        const uschar *nametable;
       size_t size;  
   
       if (do_debug)  
         {  
         fprintf(outfile, "------------------------------------------------------------------\n");  
         print_internals(re, outfile);  
         }  
1876    
       new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);  
1877        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
1878        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1879        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
# Line 783  while (!done) Line 1882  while (!done)
1882        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1883        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1884        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1885          new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1886          new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1887          new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1888    
1889    #if !defined NOINFOCHECK
1890        old_count = pcre_info(re, &old_options, &old_first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
1891        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
1892          "Error %d from pcre_info()\n", count);          "Error %d from pcre_info()\n", count);
# Line 801  while (!done) Line 1904  while (!done)
1904            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1905              get_options, old_options);              get_options, old_options);
1906          }          }
1907    #endif
1908    
1909        if (size != gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
1910          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1911          size, gotten_store);          (int)size, (int)regex_gotten_store);
1912    
1913        fprintf(outfile, "Capturing subpattern count = %d\n", count);        fprintf(outfile, "Capturing subpattern count = %d\n", count);
1914        if (backrefmax > 0)        if (backrefmax > 0)
# Line 822  while (!done) Line 1926  while (!done)
1926            }            }
1927          }          }
1928    
1929          if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1930          if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1931    
1932          all_options = ((real_pcre *)re)->options;
1933          if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1934    
1935        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
1936          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1937            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1938            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1939            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1940            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1941              ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1942            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1943              ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1944              ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1945            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1946            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1947            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1948              ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1949            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1950            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");            ((get_options & PCRE_UCP) != 0)? " ucp" : "",
1951              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1952              ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
1953              ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1954    
1955          if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1956    
1957          switch (get_options & PCRE_NEWLINE_BITS)
1958            {
1959            case PCRE_NEWLINE_CR:
1960            fprintf(outfile, "Forced newline sequence: CR\n");
1961            break;
1962    
1963        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)          case PCRE_NEWLINE_LF:
1964          fprintf(outfile, "Case state changes\n");          fprintf(outfile, "Forced newline sequence: LF\n");
1965            break;
1966    
1967            case PCRE_NEWLINE_CRLF:
1968            fprintf(outfile, "Forced newline sequence: CRLF\n");
1969            break;
1970    
1971            case PCRE_NEWLINE_ANYCRLF:
1972            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1973            break;
1974    
1975            case PCRE_NEWLINE_ANY:
1976            fprintf(outfile, "Forced newline sequence: ANY\n");
1977            break;
1978    
1979            default:
1980            break;
1981            }
1982    
1983        if (first_char == -1)        if (first_char == -1)
1984          {          {
1985          fprintf(outfile, "First char at start or follows \\n\n");          fprintf(outfile, "First char at start or follows newline\n");
1986          }          }
1987        else if (first_char < 0)        else if (first_char < 0)
1988          {          {
# Line 851  while (!done) Line 1993  while (!done)
1993          int ch = first_char & 255;          int ch = first_char & 255;
1994          const char *caseless = ((first_char & REQ_CASELESS) == 0)?          const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1995            "" : " (caseless)";            "" : " (caseless)";
1996          if (isprint(ch))          if (PRINTHEX(ch))
1997            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1998          else          else
1999            fprintf(outfile, "First char = %d%s\n", ch, caseless);            fprintf(outfile, "First char = %d%s\n", ch, caseless);
# Line 866  while (!done) Line 2008  while (!done)
2008          int ch = need_char & 255;          int ch = need_char & 255;
2009          const char *caseless = ((need_char & REQ_CASELESS) == 0)?          const char *caseless = ((need_char & REQ_CASELESS) == 0)?
2010            "" : " (caseless)";            "" : " (caseless)";
2011          if (isprint(ch))          if (PRINTHEX(ch))
2012            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
2013          else          else
2014            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            fprintf(outfile, "Need char = %d%s\n", ch, caseless);
2015          }          }
       }  
   
     /* If /S was present, study the regexp to generate additional info to  
     help with the matching. */  
   
     if (do_study)  
       {  
       if (timeit)  
         {  
         register int i;  
         clock_t time_taken;  
         clock_t start_time = clock();  
         for (i = 0; i < LOOPREPEAT; i++)  
           extra = pcre_study(re, study_options, &error);  
         time_taken = clock() - start_time;  
         if (extra != NULL) free(extra);  
         fprintf(outfile, "  Study time %.3f milliseconds\n",  
           (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /  
             (double)CLOCKS_PER_SEC);  
         }  
   
       extra = pcre_study(re, study_options, &error);  
       if (error != NULL)  
         fprintf(outfile, "Failed to study: %s\n", error);  
       else if (extra == NULL)  
         fprintf(outfile, "Study returned NULL\n");  
2016    
2017        /* Don't output study size; at present it is in any case a fixed        /* Don't output study size; at present it is in any case a fixed
2018        value, but it varies, depending on the computer architecture, and        value, but it varies, depending on the computer architecture, and
2019        so messes up the test suite. */        so messes up the test suite. (And with the /F option, it might be
2020          flipped.) */
2021    
2022        else if (do_showinfo)        if (do_study)
2023          {          {
2024          size_t size;          if (extra == NULL)
2025          uschar *start_bits = NULL;            fprintf(outfile, "Study returned NULL\n");
         new_info(re, extra, PCRE_INFO_STUDYSIZE, &size);  
         new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);  
         /* fprintf(outfile, "Study size = %d\n", size); */  
         if (start_bits == NULL)  
           fprintf(outfile, "No starting character set\n");  
2026          else          else
2027            {            {
2028            int i;            uschar *start_bits = NULL;
2029            int c = 24;            int minlength;
2030            fprintf(outfile, "Starting character set: ");  
2031            for (i = 0; i < 256; i++)            new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2032              fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2033    
2034              new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2035              if (start_bits == NULL)
2036                fprintf(outfile, "No set of starting bytes\n");
2037              else
2038              {              {
2039              if ((start_bits[i/8] & (1<<(i%8))) != 0)              int i;
2040                int c = 24;
2041                fprintf(outfile, "Starting byte set: ");
2042                for (i = 0; i < 256; i++)
2043                {                {
2044                if (c > 75)                if ((start_bits[i/8] & (1<<(i&7))) != 0)
                 {  
                 fprintf(outfile, "\n  ");  
                 c = 2;  
                 }  
               if (isprint(i) && i != ' ')  
2045                  {                  {
2046                  fprintf(outfile, "%c ", i);                  if (c > 75)
2047                  c += 2;                    {
2048                  }                    fprintf(outfile, "\n  ");
2049                else                    c = 2;
2050                  {                    }
2051                  fprintf(outfile, "\\x%02x ", i);                  if (PRINTHEX(i) && i != ' ')
2052                  c += 5;                    {
2053                      fprintf(outfile, "%c ", i);
2054                      c += 2;
2055                      }
2056                    else
2057                      {
2058                      fprintf(outfile, "\\x%02x ", i);
2059                      c += 5;
2060                      }
2061                  }                  }
2062                }                }
2063                fprintf(outfile, "\n");
2064              }              }
           fprintf(outfile, "\n");  
2065            }            }
2066          }          }
2067        }        }
2068      }  
2069        /* If the '>' option was present, we write out the regex to a file, and
2070        that is all. The first 8 bytes of the file are the regex length and then
2071        the study length, in big-endian order. */
2072    
2073        if (to_file != NULL)
2074          {
2075          FILE *f = fopen((char *)to_file, "wb");
2076          if (f == NULL)
2077            {
2078            fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
2079            }
2080          else
2081            {
2082            uschar sbuf[8];
2083            sbuf[0] = (uschar)((true_size >> 24) & 255);
2084            sbuf[1] = (uschar)((true_size >> 16) & 255);
2085            sbuf[2] = (uschar)((true_size >>  8) & 255);
2086            sbuf[3] = (uschar)((true_size) & 255);
2087    
2088            sbuf[4] = (uschar)((true_study_size >> 24) & 255);
2089            sbuf[5] = (uschar)((true_study_size >> 16) & 255);
2090            sbuf[6] = (uschar)((true_study_size >>  8) & 255);
2091            sbuf[7] = (uschar)((true_study_size) & 255);
2092    
2093            if (fwrite(sbuf, 1, 8, f) < 8 ||
2094                fwrite(re, 1, true_size, f) < true_size)
2095              {
2096              fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
2097              }
2098            else
2099              {
2100              fprintf(outfile, "Compiled regex written to %s\n", to_file);
2101              if (extra != NULL)
2102                {
2103                if (fwrite(extra->study_data, 1, true_study_size, f) <
2104                    true_study_size)
2105                  {
2106                  fprintf(outfile, "Write error on %s: %s\n", to_file,
2107                    strerror(errno));
2108                  }
2109                else fprintf(outfile, "Study data written to %s\n", to_file);
2110    
2111                }
2112              }
2113            fclose(f);
2114            }
2115    
2116          new_free(re);
2117          if (extra != NULL) new_free(extra);
2118          if (locale_set)
2119            {
2120            new_free((void *)tables);
2121            setlocale(LC_CTYPE, "C");
2122            locale_set = 0;
2123            }
2124          continue;  /* With next regex */
2125          }
2126        }        /* End of non-POSIX compile */
2127    
2128    /* Read data lines and test them */    /* Read data lines and test them */
2129    
2130    for (;;)    for (;;)
2131      {      {
2132      unsigned char *q;      uschar *q;
2133      unsigned char *bptr = dbuffer;      uschar *bptr;
2134      int *use_offsets = offsets;      int *use_offsets = offsets;
2135      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
2136      int callout_data = 0;      int callout_data = 0;
2137      int callout_data_set = 0;      int callout_data_set = 0;
2138      int count, c;      int count, c;
2139      int copystrings = 0;      int copystrings = 0;
2140      int find_match_limit = 0;      int find_match_limit = default_find_match_limit;
2141      int getstrings = 0;      int getstrings = 0;
2142      int getlist = 0;      int getlist = 0;
2143      int gmatched = 0;      int gmatched = 0;
2144      int start_offset = 0;      int start_offset = 0;
2145        int start_offset_sign = 1;
2146      int g_notempty = 0;      int g_notempty = 0;
2147        int use_dfa = 0;
2148    
2149      options = 0;      options = 0;
2150    
2151        *copynames = 0;
2152        *getnames = 0;
2153    
2154        copynamesptr = copynames;
2155        getnamesptr = getnames;
2156    
2157      pcre_callout = callout;      pcre_callout = callout;
2158      first_callout = 1;      first_callout = 1;
2159      callout_extra = 0;      callout_extra = 0;
# Line 972  while (!done) Line 2162  while (!done)
2162      callout_fail_id = -1;      callout_fail_id = -1;
2163      show_malloc = 0;      show_malloc = 0;
2164    
2165      if (infile == stdin) printf("data> ");      if (extra != NULL) extra->flags &=
2166      if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
2167    
2168        len = 0;
2169        for (;;)
2170        {        {
2171        done = 1;        if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2172        goto CONTINUE;          {
2173            if (len > 0)    /* Reached EOF without hitting a newline */
2174              {
2175              fprintf(outfile, "\n");
2176              break;
2177              }
2178            done = 1;
2179            goto CONTINUE;
2180            }
2181          if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2182          len = (int)strlen((char *)buffer);
2183          if (buffer[len-1] == '\n') break;
2184        }        }
     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);  
2185    
     len = (int)strlen((char *)buffer);  
2186      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
2187      buffer[len] = 0;      buffer[len] = 0;
2188      if (len == 0) break;      if (len == 0) break;
# Line 988  while (!done) Line 2190  while (!done)
2190      p = buffer;      p = buffer;
2191      while (isspace(*p)) p++;      while (isspace(*p)) p++;
2192    
2193      q = dbuffer;      bptr = q = dbuffer;
2194      while ((c = *p++) != 0)      while ((c = *p++) != 0)
2195        {        {
2196        int i = 0;        int i = 0;
# Line 1010  while (!done) Line 2212  while (!done)
2212          c -= '0';          c -= '0';
2213          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
2214            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
2215    
2216    #if !defined NOUTF8
2217            if (use_utf8 && c > 255)
2218              {
2219              unsigned char buff8[8];
2220              int ii, utn;
2221              utn = ord2utf8(c, buff8);
2222              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2223              c = buff8[ii];   /* Last byte */
2224              }
2225    #endif
2226          break;          break;
2227    
2228          case 'x':          case 'x':
2229    
2230          /* Handle \x{..} specially - new Perl thing for utf8 */          /* Handle \x{..} specially - new Perl thing for utf8 */
2231    
2232    #if !defined NOUTF8
2233          if (*p == '{')          if (*p == '{')
2234            {            {
2235            unsigned char *pt = p;            unsigned char *pt = p;
# Line 1026  while (!done) Line 2240  while (!done)
2240              {              {
2241              unsigned char buff8[8];              unsigned char buff8[8];
2242              int ii, utn;              int ii, utn;
2243              utn = ord2utf8(c, buff8);              if (use_utf8)
2244              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];                {
2245              c = buff8[ii];   /* Last byte */                utn = ord2utf8(c, buff8);
2246                  for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2247                  c = buff8[ii];   /* Last byte */
2248                  }
2249                else
2250                 {
2251                 if (c > 255)
2252                   fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2253                     "UTF-8 mode is not enabled.\n"
2254                     "** Truncation will probably give the wrong result.\n", c);
2255                 }
2256              p = pt + 1;              p = pt + 1;
2257              break;              break;
2258              }              }
2259            /* Not correct form; fall through */            /* Not correct form; fall through */
2260            }            }
2261    #endif
2262    
2263          /* Ordinary \x */          /* Ordinary \x */
2264    
# Line 1045  while (!done) Line 2270  while (!done)
2270            }            }
2271          break;          break;
2272    
2273          case 0:   /* Allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
2274          p--;          p--;
2275          continue;          continue;
2276    
2277            case '>':
2278            if (*p == '-')
2279              {
2280              start_offset_sign = -1;
2281              p++;
2282              }
2283            while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2284            start_offset *= start_offset_sign;
2285            continue;
2286    
2287          case 'A':  /* Option setting */          case 'A':  /* Option setting */
2288          options |= PCRE_ANCHORED;          options |= PCRE_ANCHORED;
2289          continue;          continue;
# Line 1065  while (!done) Line 2300  while (!done)
2300            }            }
2301          else if (isalnum(*p))          else if (isalnum(*p))
2302            {            {
2303            uschar name[256];            uschar *npp = copynamesptr;
           uschar *npp = name;  
2304            while (isalnum(*p)) *npp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
2305              *npp++ = 0;
2306            *npp = 0;            *npp = 0;
2307            n = pcre_get_stringnumber(re, (char *)name);            n = pcre_get_stringnumber(re, (char *)copynamesptr);
2308            if (n < 0)            if (n < 0)
2309              fprintf(outfile, "no parentheses with name \"%s\"\n", name);              fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
2310            else copystrings |= 1 << n;            copynamesptr = npp;
2311            }            }
2312          else if (*p == '+')          else if (*p == '+')
2313            {            {
# Line 1110  while (!done) Line 2345  while (!done)
2345            }            }
2346          continue;          continue;
2347    
2348    #if !defined NODFA
2349            case 'D':
2350    #if !defined NOPOSIX
2351            if (posix || do_posix)
2352              printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
2353            else
2354    #endif
2355              use_dfa = 1;
2356            continue;
2357    #endif
2358    
2359    #if !defined NODFA
2360            case 'F':
2361            options |= PCRE_DFA_SHORTEST;
2362            continue;
2363    #endif
2364    
2365          case 'G':          case 'G':
2366          if (isdigit(*p))          if (isdigit(*p))
2367            {            {
# Line 1118  while (!done) Line 2370  while (!done)
2370            }            }
2371          else if (isalnum(*p))          else if (isalnum(*p))
2372            {            {
2373            uschar name[256];            uschar *npp = getnamesptr;
           uschar *npp = name;  
2374            while (isalnum(*p)) *npp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
2375              *npp++ = 0;
2376            *npp = 0;            *npp = 0;
2377            n = pcre_get_stringnumber(re, (char *)name);            n = pcre_get_stringnumber(re, (char *)getnamesptr);
2378            if (n < 0)            if (n < 0)
2379              fprintf(outfile, "no parentheses with name \"%s\"\n", name);              fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
2380            else getstrings |= 1 << n;            getnamesptr = npp;
2381            }            }
2382          continue;          continue;
2383    
# Line 1138  while (!done) Line 2390  while (!done)
2390          continue;          continue;
2391    
2392          case 'N':          case 'N':
2393          options |= PCRE_NOTEMPTY;          if ((options & PCRE_NOTEMPTY) != 0)
2394              options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2395            else
2396              options |= PCRE_NOTEMPTY;
2397          continue;          continue;
2398    
2399          case 'O':          case 'O':
# Line 1151  while (!done) Line 2406  while (!done)
2406            if (offsets == NULL)            if (offsets == NULL)
2407              {              {
2408              printf("** Failed to get %d bytes of memory for offsets vector\n",              printf("** Failed to get %d bytes of memory for offsets vector\n",
2409                size_offsets_max * sizeof(int));                (int)(size_offsets_max * sizeof(int)));
2410              return 1;              yield = 1;
2411                goto EXIT;
2412              }              }
2413            }            }
2414          use_size_offsets = n;          use_size_offsets = n;
2415          if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */          if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
2416          continue;          continue;
2417    
2418            case 'P':
2419            options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2420              PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2421            continue;
2422    
2423            case 'Q':
2424            while(isdigit(*p)) n = n * 10 + *p++ - '0';
2425            if (extra == NULL)
2426              {
2427              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2428              extra->flags = 0;
2429              }
2430            extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2431            extra->match_limit_recursion = n;
2432            continue;
2433    
2434            case 'q':
2435            while(isdigit(*p)) n = n * 10 + *p++ - '0';
2436            if (extra == NULL)
2437              {
2438              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2439              extra->flags = 0;
2440              }
2441            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2442            extra->match_limit = n;
2443            continue;
2444    
2445    #if !defined NODFA
2446            case 'R':
2447            options |= PCRE_DFA_RESTART;
2448            continue;
2449    #endif
2450    
2451          case 'S':          case 'S':
2452          show_malloc = 1;          show_malloc = 1;
2453          continue;          continue;
2454    
2455            case 'Y':
2456            options |= PCRE_NO_START_OPTIMIZE;
2457            continue;
2458    
2459          case 'Z':          case 'Z':
2460          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
2461          continue;          continue;
# Line 1170  while (!done) Line 2463  while (!done)
2463          case '?':          case '?':
2464          options |= PCRE_NO_UTF8_CHECK;          options |= PCRE_NO_UTF8_CHECK;
2465          continue;          continue;
2466    
2467            case '<':
2468              {
2469              int x = check_newline(p, outfile);
2470              if (x == 0) goto NEXT_DATA;
2471              options |= x;
2472              while (*p++ != '>');
2473              }
2474            continue;
2475          }          }
2476        *q++ = c;        *q++ = c;
2477        }        }
2478      *q = 0;      *q = 0;
2479      len = q - dbuffer;      len = (int)(q - dbuffer);
2480    
2481        /* Move the data to the end of the buffer so that a read over the end of
2482        the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2483        we are using the POSIX interface, we must include the terminating zero. */
2484    
2485    #if !defined NOPOSIX
2486        if (posix || do_posix)
2487          {
2488          memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2489          bptr += buffer_size - len - 1;
2490          }
2491        else
2492    #endif
2493          {
2494          memmove(bptr + buffer_size - len, bptr, len);
2495          bptr += buffer_size - len;
2496          }
2497    
2498        if ((all_use_dfa || use_dfa) && find_match_limit)
2499          {
2500          printf("**Match limit not relevant for DFA matching: ignored\n");
2501          find_match_limit = 0;
2502          }
2503    
2504      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
2505      support timing or playing with the match limit or callout data. */      support timing or playing with the match limit or callout data. */
# Line 1189  while (!done) Line 2514  while (!done)
2514          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2515        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2516        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2517          if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2518    
2519        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2520    
2521        if (rc != 0)        if (rc != 0)
2522          {          {
2523          (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);          (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2524          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2525          }          }
2526          else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2527                  != 0)
2528            {
2529            fprintf(outfile, "Matched with REG_NOSUB\n");
2530            }
2531        else        else
2532          {          {
2533          size_t i;          size_t i;
# Line 1228  while (!done) Line 2559  while (!done)
2559    
2560      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
2561        {        {
2562        if (timeit)        markptr = NULL;
2563    
2564          if (timeitm > 0)
2565          {          {
2566          register int i;          register int i;
2567          clock_t time_taken;          clock_t time_taken;
2568          clock_t start_time = clock();          clock_t start_time = clock();
2569          for (i = 0; i < LOOPREPEAT; i++)  
2570    #if !defined NODFA
2571            if (all_use_dfa || use_dfa)
2572              {
2573              int workspace[1000];
2574              for (i = 0; i < timeitm; i++)
2575                count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2576                  options | g_notempty, use_offsets, use_size_offsets, workspace,
2577                  sizeof(workspace)/sizeof(int));
2578              }
2579            else
2580    #endif
2581    
2582            for (i = 0; i < timeitm; i++)
2583            count = pcre_exec(re, extra, (char *)bptr, len,            count = pcre_exec(re, extra, (char *)bptr, len,
2584              start_offset, options | g_notempty, use_offsets, use_size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
2585    
2586          time_taken = clock() - start_time;          time_taken = clock() - start_time;
2587          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
2588            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /            (((double)time_taken * 1000.0) / (double)timeitm) /
2589              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
2590          }          }
2591    
2592        /* If find_match_limit is set, we want to do repeated matches with        /* If find_match_limit is set, we want to do repeated matches with
2593        varying limits in order to find the minimum value. */        varying limits in order to find the minimum value for the match limit and
2594          for the recursion limit. */
2595    
2596        if (find_match_limit)        if (find_match_limit)
2597          {          {
         int min = 0;  
         int mid = 64;  
         int max = -1;  
   
2598          if (extra == NULL)          if (extra == NULL)
2599            {            {
2600            extra = (pcre_extra *)malloc(sizeof(pcre_extra));            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2601            extra->flags = 0;            extra->flags = 0;
2602            }            }
         extra->flags |= PCRE_EXTRA_MATCH_LIMIT;  
   
         for (;;)  
           {  
           extra->match_limit = mid;  
           count = pcre_exec(re, extra, (char *)bptr, len, start_offset,  
             options | g_notempty, use_offsets, use_size_offsets);  
           if (count == PCRE_ERROR_MATCHLIMIT)  
             {  
             /* fprintf(outfile, "Testing match limit = %d\n", mid); */  
             min = mid;  
             mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;  
             }  
           else if (count >= 0 || count == PCRE_ERROR_NOMATCH)  
             {  
             if (mid == min + 1)  
               {  
               fprintf(outfile, "Minimum match limit = %d\n", mid);  
               break;  
               }  
             /* fprintf(outfile, "Testing match limit = %d\n", mid); */  
             max = mid;  
             mid = (min + mid)/2;  
             }  
           else break;    /* Some other error */  
           }  
2603    
2604          extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;          (void)check_match_limit(re, extra, bptr, len, start_offset,
2605              options|g_notempty, use_offsets, use_size_offsets,
2606              PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2607              PCRE_ERROR_MATCHLIMIT, "match()");
2608    
2609            count = check_match_limit(re, extra, bptr, len, start_offset,
2610              options|g_notempty, use_offsets, use_size_offsets,
2611              PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2612              PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2613          }          }
2614    
2615        /* If callout_data is set, use the interface with additional data */        /* If callout_data is set, use the interface with additional data */
# Line 1305  while (!done) Line 2631  while (!done)
2631        /* The normal case is just to do the match once, with the default        /* The normal case is just to do the match once, with the default
2632        value of match_limit. */        value of match_limit. */
2633    
2634        else count = pcre_exec(re, extra, (char *)bptr, len,  #if !defined NODFA
2635          start_offset, options | g_notempty, use_offsets, use_size_offsets);        else if (all_use_dfa || use_dfa)
2636            {
2637            int workspace[1000];
2638            count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2639              options | g_notempty, use_offsets, use_size_offsets, workspace,
2640              sizeof(workspace)/sizeof(int));
2641            if (count == 0)
2642              {
2643              fprintf(outfile, "Matched, but too many subsidiary matches\n");
2644              count = use_size_offsets/2;
2645              }
2646            }
2647    #endif
2648    
2649        if (count == 0)        else
2650          {          {
2651          fprintf(outfile, "Matched, but too many substrings\n");          count = pcre_exec(re, extra, (char *)bptr, len,
2652          count = use_size_offsets/3;            start_offset, options | g_notempty, use_offsets, use_size_offsets);
2653            if (count == 0)
2654              {
2655              fprintf(outfile, "Matched, but too many substrings\n");
2656              count = use_size_offsets/3;
2657              }
2658          }          }
2659    
2660        /* Matched */        /* Matched */
2661    
2662        if (count >= 0)        if (count >= 0)
2663          {          {
2664          int i;          int i, maxcount;
2665    
2666    #if !defined NODFA
2667            if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2668    #endif
2669              maxcount = use_size_offsets/3;
2670    
2671            /* This is a check against a lunatic return value. */
2672    
2673            if (count > maxcount)
2674              {
2675              fprintf(outfile,
2676                "** PCRE error: returned count %d is too big for offset size %d\n",
2677                count, use_size_offsets);
2678              count = use_size_offsets/3;
2679              if (do_g || do_G)
2680                {
2681                fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2682                do_g = do_G = FALSE;        /* Break g/G loop */
2683                }
2684              }
2685    
2686          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
2687            {            {
2688            if (use_offsets[i] < 0)            if (use_offsets[i] < 0)
# Line 1342  while (!done) Line 2706  while (!done)
2706              }              }
2707            }            }
2708    
2709            if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
2710    
2711          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
2712            {            {
2713            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
2714              {              {
2715              char copybuffer[16];              char copybuffer[256];
2716              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2717                i, copybuffer, sizeof(copybuffer));                i, copybuffer, sizeof(copybuffer));
2718              if (rc < 0)              if (rc < 0)
# Line 1356  while (!done) Line 2722  while (!done)
2722              }              }
2723            }            }
2724    
2725            for (copynamesptr = copynames;
2726                 *copynamesptr != 0;
2727                 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2728              {
2729              char copybuffer[256];
2730              int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2731                count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2732              if (rc < 0)
2733                fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2734              else
2735                fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2736              }
2737    
2738          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
2739            {            {
2740            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
# Line 1368  while (!done) Line 2747  while (!done)
2747              else              else
2748                {                {
2749                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
               /* free((void *)substring); */  
2750                pcre_free_substring(substring);                pcre_free_substring(substring);
2751                }                }
2752              }              }
2753            }            }
2754    
2755            for (getnamesptr = getnames;
2756                 *getnamesptr != 0;
2757                 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2758              {
2759              const char *substring;
2760              int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2761                count, (char *)getnamesptr, &substring);
2762              if (rc < 0)
2763                fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2764              else
2765                {
2766                fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);
2767                pcre_free_substring(substring);
2768                }
2769              }
2770    
2771          if (getlist)          if (getlist)
2772            {            {
2773            const char **stringlist;            const char **stringlist;
# Line 1393  while (!done) Line 2787  while (!done)
2787            }            }
2788          }          }
2789    
2790          /* There was a partial match */
2791    
2792          else if (count == PCRE_ERROR_PARTIAL)
2793            {
2794            if (markptr == NULL) fprintf(outfile, "Partial match");
2795              else fprintf(outfile, "Partial match, mark=%s", markptr);
2796            if (use_size_offsets > 1)
2797              {
2798              fprintf(outfile, ": ");
2799              pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2800                outfile);
2801              }
2802            fprintf(outfile, "\n");
2803            break;  /* Out of the /g loop */
2804            }
2805    
2806        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
2807        g_notempty after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end. We want
2808        We want to advance the start offset, and continue. In the case of UTF-8        to advance the start offset, and continue. We won't be at the end of the
2809        matching, the advance must be one character, not one byte. Fudge the        string - that was checked before setting g_notempty.
2810        offset values to achieve this. We won't be at the end of the string -  
2811        that was checked before setting g_notempty. */        Complication arises in the case when the newline convention is "any",
2812          "crlf", or "anycrlf". If the previous match was at the end of a line
2813          terminated by CRLF, an advance of one character just passes the \r,
2814          whereas we should prefer the longer newline sequence, as does the code in
2815          pcre_exec(). Fudge the offset value to achieve this. We check for a
2816          newline setting in the pattern; if none was set, use pcre_config() to
2817          find the default.
2818    
2819          Otherwise, in the case of UTF-8 matching, the advance must be one
2820          character, not one byte. */
2821    
2822        else        else
2823          {          {
2824          if (g_notempty != 0)          if (g_notempty != 0)
2825            {            {
2826            int onechar = 1;            int onechar = 1;
2827              unsigned int obits = ((real_pcre *)re)->options;
2828            use_offsets[0] = start_offset;            use_offsets[0] = start_offset;
2829            if (use_utf8)            if ((obits & PCRE_NEWLINE_BITS) == 0)
2830                {
2831                int d;
2832                (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2833                /* Note that these values are always the ASCII ones, even in
2834                EBCDIC environments. CR = 13, NL = 10. */
2835                obits = (d == 13)? PCRE_NEWLINE_CR :
2836                        (d == 10)? PCRE_NEWLINE_LF :
2837                        (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
2838                        (d == -2)? PCRE_NEWLINE_ANYCRLF :
2839                        (d == -1)? PCRE_NEWLINE_ANY : 0;
2840                }
2841              if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2842                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
2843                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2844                  &&
2845                  start_offset < len - 1 &&
2846                  bptr[start_offset] == '\r' &&
2847                  bptr[start_offset+1] == '\n')
2848                onechar++;
2849              else if (use_utf8)
2850              {              {
2851              while (start_offset + onechar < len)              while (start_offset + onechar < len)
2852                {                {
2853                int tb = bptr[start_offset+onechar];                if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
2854                if (tb <= 127) break;                onechar++;
               tb &= 0xc0;  
               if (tb != 0 && tb != 0xc0) onechar++;  
2855                }                }
2856              }              }
2857            use_offsets[1] = start_offset + onechar;            use_offsets[1] = start_offset + onechar;
# Line 1422  while (!done) Line 2860  while (!done)
2860            {            {
2861            if (count == PCRE_ERROR_NOMATCH)            if (count == PCRE_ERROR_NOMATCH)
2862              {              {
2863              if (gmatched == 0) fprintf(outfile, "No match\n");              if (gmatched == 0)
2864                  {
2865                  if (markptr == NULL) fprintf(outfile, "No match\n");
2866                    else fprintf(outfile, "No match, mark = %s\n", markptr);
2867                  }
2868              }              }
2869            else fprintf(outfile, "Error %d\n", count);            else fprintf(outfile, "Error %d\n", count);
2870            break;  /* Out of the /g loop */            break;  /* Out of the /g loop */
# Line 1434  while (!done) Line 2876  while (!done)
2876        if (!do_g && !do_G) break;        if (!do_g && !do_G) break;
2877    
2878        /* If we have matched an empty string, first check to see if we are at        /* If we have matched an empty string, first check to see if we are at
2879        the end of the subject. If so, the /g loop is over. Otherwise, mimic        the end of the subject. If so, the /g loop is over. Otherwise, mimic what
2880        what Perl's /g options does. This turns out to be rather cunning. First        Perl's /g options does. This turns out to be rather cunning. First we set
2881        we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the        PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
2882        same point. If this fails (picked up above) we advance to the next        same point. If this fails (picked up above) we advance to the next
2883        character. */        character. */
2884    
2885        g_notempty = 0;        g_notempty = 0;
2886    
2887        if (use_offsets[0] == use_offsets[1])        if (use_offsets[0] == use_offsets[1])
2888          {          {
2889          if (use_offsets[0] == len) break;          if (use_offsets[0] == len) break;
2890          g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;          g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
2891          }          }
2892    
2893        /* For /g, update the start offset, leaving the rest alone */        /* For /g, update the start offset, leaving the rest alone */
# Line 1459  while (!done) Line 2902  while (!done)
2902          len -= use_offsets[1];          len -= use_offsets[1];
2903          }          }
2904        }  /* End of loop for /g and /G */        }  /* End of loop for /g and /G */
2905    
2906        NEXT_DATA: continue;
2907      }    /* End of loop for data lines */      }    /* End of loop for data lines */
2908    
2909    CONTINUE:    CONTINUE:
# Line 1467  while (!done) Line 2912  while (!done)
2912    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
2913  #endif  #endif
2914    
2915    if (re != NULL) free(re);    if (re != NULL) new_free(re);
2916    if (extra != NULL) free(extra);    if (extra != NULL) new_free(extra);
2917    if (tables != NULL)    if (locale_set)
2918      {      {
2919      free((void *)tables);      new_free((void *)tables);
2920      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
2921        locale_set = 0;
2922      }      }
2923    }    }
2924    
2925  if (infile == stdin) fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
2926  return 0;  
2927    EXIT:
2928    
2929    if (infile != NULL && infile != stdin) fclose(infile);
2930    if (outfile != NULL && outfile != stdout) fclose(outfile);
2931    
2932    free(buffer);
2933    free(dbuffer);
2934    free(pbuffer);
2935    free(offsets);
2936    
2937    return yield;
2938  }  }
2939    
2940  /* End */  /* End of pcretest.c */

Legend:
Removed from v.73  
changed lines
  Added in v.580

  ViewVC Help
Powered by ViewVC 1.1.5