/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 31 by nigel, Sat Feb 24 21:38:57 2007 UTC revision 914 by zherczeg, Mon Feb 13 06:04:50 2012 UTC
# Line 2  Line 2 
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5    /* This program was hacked up as a tester for PCRE. I really should have
6    written it more tidily in the first place. Will I ever learn? It has grown and
7    been extended and consequently is now rather, er, *very* untidy in places. The
8    addition of 16-bit support has made it even worse. :-(
9    
10    -----------------------------------------------------------------------------
11    Redistribution and use in source and binary forms, with or without
12    modification, are permitted provided that the following conditions are met:
13    
14        * Redistributions of source code must retain the above copyright notice,
15          this list of conditions and the following disclaimer.
16    
17        * Redistributions in binary form must reproduce the above copyright
18          notice, this list of conditions and the following disclaimer in the
19          documentation and/or other materials provided with the distribution.
20    
21        * Neither the name of the University of Cambridge nor the names of its
22          contributors may be used to endorse or promote products derived from
23          this software without specific prior written permission.
24    
25    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35    POSSIBILITY OF SUCH DAMAGE.
36    -----------------------------------------------------------------------------
37    */
38    
39    /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40    libraries in a single program. This is different from the modules such as
41    pcre_compile.c in the library itself, which are compiled separately for each
42    mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43    (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44    compiled only once. Therefore, it must not make use of any of the macros from
45    pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46    however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47    only supported library functions. */
48    
49    
50    #ifdef HAVE_CONFIG_H
51    #include "config.h"
52    #endif
53    
54  #include <ctype.h>  #include <ctype.h>
55  #include <stdio.h>  #include <stdio.h>
56  #include <string.h>  #include <string.h>
57  #include <stdlib.h>  #include <stdlib.h>
58  #include <time.h>  #include <time.h>
59  #include <locale.h>  #include <locale.h>
60    #include <errno.h>
61    
62    #ifdef SUPPORT_LIBREADLINE
63    #ifdef HAVE_UNISTD_H
64    #include <unistd.h>
65    #endif
66    #include <readline/readline.h>
67    #include <readline/history.h>
68    #endif
69    
70    
71    /* A number of things vary for Windows builds. Originally, pcretest opened its
72    input and output without "b"; then I was told that "b" was needed in some
73    environments, so it was added for release 5.0 to both the input and output. (It
74    makes no difference on Unix-like systems.) Later I was told that it is wrong
75    for the input on Windows. I've now abstracted the modes into two macros that
76    are set here, to make it easier to fiddle with them, and removed "b" from the
77    input mode under Windows. */
78    
79    #if defined(_WIN32) || defined(WIN32)
80    #include <io.h>                /* For _setmode() */
81    #include <fcntl.h>             /* For _O_BINARY */
82    #define INPUT_MODE   "r"
83    #define OUTPUT_MODE  "wb"
84    
85    #ifndef isatty
86    #define isatty _isatty         /* This is what Windows calls them, I'm told, */
87    #endif                         /* though in some environments they seem to   */
88                                   /* be already defined, hence the #ifndefs.    */
89    #ifndef fileno
90    #define fileno _fileno
91    #endif
92    
93    /* A user sent this fix for Borland Builder 5 under Windows. */
94    
95    #ifdef __BORLANDC__
96    #define _setmode(handle, mode) setmode(handle, mode)
97    #endif
98    
99    /* Not Windows */
100    
101    #else
102    #include <sys/time.h>          /* These two includes are needed */
103    #include <sys/resource.h>      /* for setrlimit(). */
104    #define INPUT_MODE   "rb"
105    #define OUTPUT_MODE  "wb"
106    #endif
107    
108    #define PRIV(name) name
109    
110  /* Use the internal info for displaying the results of pcre_study(). */  /* We have to include pcre_internal.h because we need the internal info for
111    displaying the results of pcre_study() and we also need to know about the
112    internal macros, structures, and other internal data values; pcretest has
113    "inside information" compared to a program that strictly follows the PCRE API.
114    
115    Although pcre_internal.h does itself include pcre.h, we explicitly include it
116    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
117    appropriately for an application, not for building PCRE. */
118    
119    #include "pcre.h"
120    
121    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8
122    /* Configure internal macros to 16 bit mode. */
123    #define COMPILE_PCRE16
124    #endif
125    
126    #include "pcre_internal.h"
127    
128    /* The pcre_printint() function, which prints the internal form of a compiled
129    regex, is held in a separate file so that (a) it can be compiled in either
130    8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
131    when that is compiled in debug mode. */
132    
133    #ifdef SUPPORT_PCRE8
134    void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
135    #endif
136    #ifdef SUPPORT_PCRE16
137    void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
138    #endif
139    
140  #include "internal.h"  /* We need access to some of the data tables that PCRE uses. So as not to have
141    to keep two copies, we include the source file here, changing the names of the
142    external symbols to prevent clashes. */
143    
144    #define PCRE_INCLUDED
145    
146    #include "pcre_tables.c"
147    
148    /* The definition of the macro PRINTABLE, which determines whether to print an
149    output character as-is or as a hex value when showing compiled patterns, is
150    the same as in the printint.src file. We uses it here in cases when the locale
151    has not been explicitly changed, so as to get consistent output from systems
152    that differ in their output from isprint() even in the "C" locale. */
153    
154    #ifdef EBCDIC
155    #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
156    #else
157    #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
158    #endif
159    
160    #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
161    
162    /* Posix support is disabled in 16 bit only mode. */
163    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX
164    #define NOPOSIX
165    #endif
166    
167    /* It is possible to compile this test program without including support for
168    testing the POSIX interface, though this is not available via the standard
169    Makefile. */
170    
171    #if !defined NOPOSIX
172  #include "pcreposix.h"  #include "pcreposix.h"
173    #endif
174    
175    /* It is also possible, originally for the benefit of a version that was
176    imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
177    NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
178    automatically cut out the UTF support if PCRE is built without it. */
179    
180    #ifndef SUPPORT_UTF
181    #ifndef NOUTF
182    #define NOUTF
183    #endif
184    #endif
185    
186    /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
187    for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
188    only from one place and is handled differently). I couldn't dream up any way of
189    using a single macro to do this in a generic way, because of the many different
190    argument requirements. We know that at least one of SUPPORT_PCRE8 and
191    SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
192    use these in the definitions of generic macros.
193    
194    **** Special note about the PCHARSxxx macros: the address of the string to be
195    printed is always given as two arguments: a base address followed by an offset.
196    The base address is cast to the correct data size for 8 or 16 bit data; the
197    offset is in units of this size. If the string were given as base+offset in one
198    argument, the casting might be incorrectly applied. */
199    
200    #ifdef SUPPORT_PCRE8
201    
202    #define PCHARS8(lv, p, offset, len, f) \
203      lv = pchars((pcre_uint8 *)(p) + offset, len, f)
204    
205    #define PCHARSV8(p, offset, len, f) \
206      (void)pchars((pcre_uint8 *)(p) + offset, len, f)
207    
208    #define READ_CAPTURE_NAME8(p, cn8, cn16, re) \
209      p = read_capture_name8(p, cn8, re)
210    
211    #define STRLEN8(p) ((int)strlen((char *)p))
212    
213    #define SET_PCRE_CALLOUT8(callout) \
214      pcre_callout = callout
215    
216    #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
217       pcre_assign_jit_stack(extra, callback, userdata)
218    
219    #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
220      re = pcre_compile((char *)pat, options, error, erroffset, tables)
221    
222    #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
223        namesptr, cbuffer, size) \
224      rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
225        (char *)namesptr, cbuffer, size)
226    
227    #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
228      rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
229    
230    #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
231        offsets, size_offsets, workspace, size_workspace) \
232      count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
233        offsets, size_offsets, workspace, size_workspace)
234    
235    #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
236        offsets, size_offsets) \
237      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
238        offsets, size_offsets)
239    
240    #define PCRE_FREE_STUDY8(extra) \
241      pcre_free_study(extra)
242    
243    #define PCRE_FREE_SUBSTRING8(substring) \
244      pcre_free_substring(substring)
245    
246    #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
247      pcre_free_substring_list(listptr)
248    
249    #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
250        getnamesptr, subsptr) \
251      rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
252        (char *)getnamesptr, subsptr)
253    
254    #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
255      n = pcre_get_stringnumber(re, (char *)ptr)
256    
257    #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
258      rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
259    
260    #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
261      rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
262    
263    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
264      rc = pcre_pattern_to_host_byte_order(re, extra, tables)
265    
266    #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
267      pcre_printint(re, outfile, debug_lengths)
268    
269    #define PCRE_STUDY8(extra, re, options, error) \
270      extra = pcre_study(re, options, error)
271    
272    #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
273      pcre_jit_stack_alloc(startsize, maxsize)
274    
275    #define PCRE_JIT_STACK_FREE8(stack) \
276      pcre_jit_stack_free(stack)
277    
278    #endif /* SUPPORT_PCRE8 */
279    
280    /* -----------------------------------------------------------*/
281    
282    #ifdef SUPPORT_PCRE16
283    
284    #define PCHARS16(lv, p, offset, len, f) \
285      lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
286    
287    #define PCHARSV16(p, offset, len, f) \
288      (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
289    
290    #define READ_CAPTURE_NAME16(p, cn8, cn16, re) \
291      p = read_capture_name16(p, cn16, re)
292    
293    #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
294    
295    #define SET_PCRE_CALLOUT16(callout) \
296      pcre16_callout = (int (*)(pcre16_callout_block *))callout
297    
298    #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
299      pcre16_assign_jit_stack((pcre16_extra *)extra, \
300        (pcre16_jit_callback)callback, userdata)
301    
302    #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
303      re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
304        tables)
305    
306    #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
307        namesptr, cbuffer, size) \
308      rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
309        count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
310    
311    #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
312      rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
313        (PCRE_UCHAR16 *)cbuffer, size/2)
314    
315    #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
316        offsets, size_offsets, workspace, size_workspace) \
317      count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
318        (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
319        workspace, size_workspace)
320    
321    #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
322        offsets, size_offsets) \
323      count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
324        len, start_offset, options, offsets, size_offsets)
325    
326    #define PCRE_FREE_STUDY16(extra) \
327      pcre16_free_study((pcre16_extra *)extra)
328    
329    #define PCRE_FREE_SUBSTRING16(substring) \
330      pcre16_free_substring((PCRE_SPTR16)substring)
331    
332    #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
333      pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
334    
335    #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
336        getnamesptr, subsptr) \
337      rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
338        count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
339    
340    #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
341      n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
342    
343    #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
344      rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
345        (PCRE_SPTR16 *)(void*)subsptr)
346    
347    #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
348      rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
349        (PCRE_SPTR16 **)(void*)listptr)
350    
351    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
352      rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
353        tables)
354    
355    #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
356      pcre16_printint(re, outfile, debug_lengths)
357    
358    #define PCRE_STUDY16(extra, re, options, error) \
359      extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
360    
361    #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
362      (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
363    
364    #define PCRE_JIT_STACK_FREE16(stack) \
365      pcre16_jit_stack_free((pcre16_jit_stack *)stack)
366    
367    #endif /* SUPPORT_PCRE16 */
368    
369    
370    /* ----- Both modes are supported; a runtime test is needed, except for
371    pcre_config(), and the JIT stack functions, when it doesn't matter which
372    version is called. ----- */
373    
374    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
375    
376    #define CHAR_SIZE (use_pcre16? 2:1)
377    
378    #define PCHARS(lv, p, offset, len, f) \
379      if (use_pcre16) \
380        PCHARS16(lv, p, offset, len, f); \
381      else \
382        PCHARS8(lv, p, offset, len, f)
383    
384    #define PCHARSV(p, offset, len, f) \
385      if (use_pcre16) \
386        PCHARSV16(p, offset, len, f); \
387      else \
388        PCHARSV8(p, offset, len, f)
389    
390    #define READ_CAPTURE_NAME(p, cn8, cn16, re) \
391      if (use_pcre16) \
392        READ_CAPTURE_NAME16(p, cn8, cn16, re); \
393      else \
394        READ_CAPTURE_NAME8(p, cn8, cn16, re)
395    
396    #define SET_PCRE_CALLOUT(callout) \
397      if (use_pcre16) \
398        SET_PCRE_CALLOUT16(callout); \
399      else \
400        SET_PCRE_CALLOUT8(callout)
401    
402    #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
403    
404    #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
405      if (use_pcre16) \
406        PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
407      else \
408        PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
409    
410    #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
411      if (use_pcre16) \
412        PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
413      else \
414        PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
415    
416    #define PCRE_CONFIG pcre_config
417    
418    #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
419        namesptr, cbuffer, size) \
420      if (use_pcre16) \
421        PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
422          namesptr, cbuffer, size); \
423      else \
424        PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
425          namesptr, cbuffer, size)
426    
427    #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
428      if (use_pcre16) \
429        PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
430      else \
431        PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
432    
433    #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
434        offsets, size_offsets, workspace, size_workspace) \
435      if (use_pcre16) \
436        PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
437          offsets, size_offsets, workspace, size_workspace); \
438      else \
439        PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
440          offsets, size_offsets, workspace, size_workspace)
441    
442    #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
443        offsets, size_offsets) \
444      if (use_pcre16) \
445        PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
446          offsets, size_offsets); \
447      else \
448        PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
449          offsets, size_offsets)
450    
451    #define PCRE_FREE_STUDY(extra) \
452      if (use_pcre16) \
453        PCRE_FREE_STUDY16(extra); \
454      else \
455        PCRE_FREE_STUDY8(extra)
456    
457    #define PCRE_FREE_SUBSTRING(substring) \
458      if (use_pcre16) \
459        PCRE_FREE_SUBSTRING16(substring); \
460      else \
461        PCRE_FREE_SUBSTRING8(substring)
462    
463    #define PCRE_FREE_SUBSTRING_LIST(listptr) \
464      if (use_pcre16) \
465        PCRE_FREE_SUBSTRING_LIST16(listptr); \
466      else \
467        PCRE_FREE_SUBSTRING_LIST8(listptr)
468    
469    #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
470        getnamesptr, subsptr) \
471      if (use_pcre16) \
472        PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
473          getnamesptr, subsptr); \
474      else \
475        PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
476          getnamesptr, subsptr)
477    
478    #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
479      if (use_pcre16) \
480        PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
481      else \
482        PCRE_GET_STRINGNUMBER8(n, rc, ptr)
483    
484    #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
485      if (use_pcre16) \
486        PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
487      else \
488        PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
489    
490    #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
491      if (use_pcre16) \
492        PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
493      else \
494        PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
495    
496    #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
497      (use_pcre16 ? \
498         PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
499        :PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
500    
501    #define PCRE_JIT_STACK_FREE(stack) \
502      if (use_pcre16) \
503        PCRE_JIT_STACK_FREE16(stack); \
504      else \
505        PCRE_JIT_STACK_FREE8(stack)
506    
507    #define PCRE_MAKETABLES \
508      (use_pcre16? pcre16_maketables() : pcre_maketables())
509    
510    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
511      if (use_pcre16) \
512        PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
513      else \
514        PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
515    
516    #define PCRE_PRINTINT(re, outfile, debug_lengths) \
517      if (use_pcre16) \
518        PCRE_PRINTINT16(re, outfile, debug_lengths); \
519      else \
520        PCRE_PRINTINT8(re, outfile, debug_lengths)
521    
522    #define PCRE_STUDY(extra, re, options, error) \
523      if (use_pcre16) \
524        PCRE_STUDY16(extra, re, options, error); \
525      else \
526        PCRE_STUDY8(extra, re, options, error)
527    
528    /* ----- Only 8-bit mode is supported ----- */
529    
530    #elif defined SUPPORT_PCRE8
531    #define CHAR_SIZE                 1
532    #define PCHARS                    PCHARS8
533    #define PCHARSV                   PCHARSV8
534    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME8
535    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT8
536    #define STRLEN                    STRLEN8
537    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK8
538    #define PCRE_COMPILE              PCRE_COMPILE8
539    #define PCRE_CONFIG               pcre_config
540    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
541    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING8
542    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC8
543    #define PCRE_EXEC                 PCRE_EXEC8
544    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY8
545    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING8
546    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST8
547    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING8
548    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER8
549    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING8
550    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST8
551    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC8
552    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE8
553    #define PCRE_MAKETABLES           pcre_maketables()
554    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
555    #define PCRE_PRINTINT             PCRE_PRINTINT8
556    #define PCRE_STUDY                PCRE_STUDY8
557    
558    /* ----- Only 16-bit mode is supported ----- */
559    
560    #else
561    #define CHAR_SIZE                 2
562    #define PCHARS                    PCHARS16
563    #define PCHARSV                   PCHARSV16
564    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME16
565    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT16
566    #define STRLEN                    STRLEN16
567    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK16
568    #define PCRE_COMPILE              PCRE_COMPILE16
569    #define PCRE_CONFIG               pcre16_config
570    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
571    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING16
572    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC16
573    #define PCRE_EXEC                 PCRE_EXEC16
574    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY16
575    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING16
576    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST16
577    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING16
578    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER16
579    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING16
580    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST16
581    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC16
582    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE16
583    #define PCRE_MAKETABLES           pcre16_maketables()
584    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
585    #define PCRE_PRINTINT             PCRE_PRINTINT16
586    #define PCRE_STUDY                PCRE_STUDY16
587    #endif
588    
589    /* ----- End of mode-specific function call macros ----- */
590    
591    
592    /* Other parameters */
593    
594  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
595  #ifdef CLK_TCK  #ifdef CLK_TCK
# Line 22  Line 599 
599  #endif  #endif
600  #endif  #endif
601    
602  #define LOOPREPEAT 20000  /* This is the default loop count for timing. */
603    
604    #define LOOPREPEAT 500000
605    
606    /* Static variables */
607    
608  static FILE *outfile;  static FILE *outfile;
609  static int log_store = 0;  static int log_store = 0;
610    static int callout_count;
611    static int callout_extra;
612    static int callout_fail_count;
613    static int callout_fail_id;
614    static int debug_lengths;
615    static int first_callout;
616    static int locale_set = 0;
617    static int show_malloc;
618    static int use_utf;
619    static size_t gotten_store;
620    static size_t first_gotten_store = 0;
621    static const unsigned char *last_callout_mark = NULL;
622    
623    /* The buffers grow automatically if very long input lines are encountered. */
624    
625    static int buffer_size = 50000;
626    static pcre_uint8 *buffer = NULL;
627    static pcre_uint8 *dbuffer = NULL;
628    static pcre_uint8 *pbuffer = NULL;
629    
630    /* Another buffer is needed translation to 16-bit character strings. It will
631    obtained and extended as required. */
632    
633    #ifdef SUPPORT_PCRE16
634    static int buffer16_size = 0;
635    static pcre_uint16 *buffer16 = NULL;
636    
637    #ifdef SUPPORT_PCRE8
638    
639    /* We need the table of operator lengths that is used for 16-bit compiling, in
640    order to swap bytes in a pattern for saving/reloading testing. Luckily, the
641    data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
642    appropriately for the 16-bit world. Just as a safety check, make sure that
643    COMPILE_PCRE16 is *not* set. */
644    
645    #ifdef COMPILE_PCRE16
646    #error COMPILE_PCRE16 must not be set when compiling pcretest.c
647    #endif
648    
649    #if LINK_SIZE == 2
650    #undef LINK_SIZE
651    #define LINK_SIZE 1
652    #elif LINK_SIZE == 3 || LINK_SIZE == 4
653    #undef LINK_SIZE
654    #define LINK_SIZE 2
655    #else
656    #error LINK_SIZE must be either 2, 3, or 4
657    #endif
658    
659    #undef IMM2_SIZE
660    #define IMM2_SIZE 1
661    
662    #endif /* SUPPORT_PCRE8 */
663    
664    static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
665    #endif  /* SUPPORT_PCRE16 */
666    
667    /* If we have 8-bit support, default use_pcre16 to false; if there is also
668    16-bit support, it can be changed by an option. If there is no 8-bit support,
669    there must be 16-bit support, so default it to 1. */
670    
671    #ifdef SUPPORT_PCRE8
672    static int use_pcre16 = 0;
673    #else
674    static int use_pcre16 = 1;
675    #endif
676    
677  /* Debugging function to print the internal form of the regex. This is the same  /* Textual explanations for runtime error codes */
 code as contained in pcre.c under the DEBUG macro. */  
678    
679  static const char *OP_names[] = {  static const char *errtexts[] = {
680    "End", "\\A", "\\B", "\\b", "\\D", "\\d",    NULL,  /* 0 is no error */
681    "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",    NULL,  /* NOMATCH is handled specially */
682    "Opt", "^", "$", "Any", "chars", "not",    "NULL argument passed",
683    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "bad option value",
684    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "magic number missing",
685    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "unknown opcode - pattern overwritten?",
686    "*", "*?", "+", "+?", "?", "??", "{", "{",    "no more memory",
687    "class", "Ref",    NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */
688    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",    "match limit exceeded",
689    "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",    "callout error code",
690    "Brazero", "Braminzero", "Bra"    NULL,  /* BADUTF8/16 is handled specially */
691      NULL,  /* BADUTF8/16 offset is handled specially */
692      NULL,  /* PARTIAL is handled specially */
693      "not used - internal error",
694      "internal error - pattern overwritten?",
695      "bad count value",
696      "item unsupported for DFA matching",
697      "backreference condition or recursion test not supported for DFA matching",
698      "match limit not supported for DFA matching",
699      "workspace size exceeded in DFA matching",
700      "too much recursion for DFA matching",
701      "recursion limit exceeded",
702      "not used - internal error",
703      "invalid combination of newline options",
704      "bad offset value",
705      NULL,  /* SHORTUTF8/16 is handled specially */
706      "nested recursion at the same subject position",
707      "JIT stack limit reached",
708      "pattern compiled in wrong mode: 8-bit/16-bit error"
709  };  };
710    
711    
712  static void print_internals(pcre *re, FILE *outfile)  /*************************************************
713    *         Alternate character tables             *
714    *************************************************/
715    
716    /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
717    using the default tables of the library. However, the T option can be used to
718    select alternate sets of tables, for different kinds of testing. Note also that
719    the L (locale) option also adjusts the tables. */
720    
721    /* This is the set of tables distributed as default with PCRE. It recognizes
722    only ASCII characters. */
723    
724    static const pcre_uint8 tables0[] = {
725    
726    /* This table is a lower casing table. */
727    
728        0,  1,  2,  3,  4,  5,  6,  7,
729        8,  9, 10, 11, 12, 13, 14, 15,
730       16, 17, 18, 19, 20, 21, 22, 23,
731       24, 25, 26, 27, 28, 29, 30, 31,
732       32, 33, 34, 35, 36, 37, 38, 39,
733       40, 41, 42, 43, 44, 45, 46, 47,
734       48, 49, 50, 51, 52, 53, 54, 55,
735       56, 57, 58, 59, 60, 61, 62, 63,
736       64, 97, 98, 99,100,101,102,103,
737      104,105,106,107,108,109,110,111,
738      112,113,114,115,116,117,118,119,
739      120,121,122, 91, 92, 93, 94, 95,
740       96, 97, 98, 99,100,101,102,103,
741      104,105,106,107,108,109,110,111,
742      112,113,114,115,116,117,118,119,
743      120,121,122,123,124,125,126,127,
744      128,129,130,131,132,133,134,135,
745      136,137,138,139,140,141,142,143,
746      144,145,146,147,148,149,150,151,
747      152,153,154,155,156,157,158,159,
748      160,161,162,163,164,165,166,167,
749      168,169,170,171,172,173,174,175,
750      176,177,178,179,180,181,182,183,
751      184,185,186,187,188,189,190,191,
752      192,193,194,195,196,197,198,199,
753      200,201,202,203,204,205,206,207,
754      208,209,210,211,212,213,214,215,
755      216,217,218,219,220,221,222,223,
756      224,225,226,227,228,229,230,231,
757      232,233,234,235,236,237,238,239,
758      240,241,242,243,244,245,246,247,
759      248,249,250,251,252,253,254,255,
760    
761    /* This table is a case flipping table. */
762    
763        0,  1,  2,  3,  4,  5,  6,  7,
764        8,  9, 10, 11, 12, 13, 14, 15,
765       16, 17, 18, 19, 20, 21, 22, 23,
766       24, 25, 26, 27, 28, 29, 30, 31,
767       32, 33, 34, 35, 36, 37, 38, 39,
768       40, 41, 42, 43, 44, 45, 46, 47,
769       48, 49, 50, 51, 52, 53, 54, 55,
770       56, 57, 58, 59, 60, 61, 62, 63,
771       64, 97, 98, 99,100,101,102,103,
772      104,105,106,107,108,109,110,111,
773      112,113,114,115,116,117,118,119,
774      120,121,122, 91, 92, 93, 94, 95,
775       96, 65, 66, 67, 68, 69, 70, 71,
776       72, 73, 74, 75, 76, 77, 78, 79,
777       80, 81, 82, 83, 84, 85, 86, 87,
778       88, 89, 90,123,124,125,126,127,
779      128,129,130,131,132,133,134,135,
780      136,137,138,139,140,141,142,143,
781      144,145,146,147,148,149,150,151,
782      152,153,154,155,156,157,158,159,
783      160,161,162,163,164,165,166,167,
784      168,169,170,171,172,173,174,175,
785      176,177,178,179,180,181,182,183,
786      184,185,186,187,188,189,190,191,
787      192,193,194,195,196,197,198,199,
788      200,201,202,203,204,205,206,207,
789      208,209,210,211,212,213,214,215,
790      216,217,218,219,220,221,222,223,
791      224,225,226,227,228,229,230,231,
792      232,233,234,235,236,237,238,239,
793      240,241,242,243,244,245,246,247,
794      248,249,250,251,252,253,254,255,
795    
796    /* This table contains bit maps for various character classes. Each map is 32
797    bytes long and the bits run from the least significant end of each byte. The
798    classes that have their own maps are: space, xdigit, digit, upper, lower, word,
799    graph, print, punct, and cntrl. Other classes are built from combinations. */
800    
801      0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
802      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
803      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
804      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
805    
806      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
807      0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
808      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
809      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
810    
811      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
812      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
813      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
814      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
815    
816      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
817      0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
818      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
819      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
820    
821      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
822      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
823      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
824      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
825    
826      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
827      0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
828      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
829      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
830    
831      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
832      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
833      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
834      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
835    
836      0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
837      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
838      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
839      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
840    
841      0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
842      0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
843      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
844      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
845    
846      0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
847      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
848      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
849      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
850    
851    /* This table identifies various classes of character by individual bits:
852      0x01   white space character
853      0x02   letter
854      0x04   decimal digit
855      0x08   hexadecimal digit
856      0x10   alphanumeric or '_'
857      0x80   regular expression metacharacter or binary zero
858    */
859    
860      0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
861      0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
862      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
863      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
864      0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
865      0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
866      0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
867      0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
868      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
869      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
870      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
871      0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
872      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
873      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
874      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
875      0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
876      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
877      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
878      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
879      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
880      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
881      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
882      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
883      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
884      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
885      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
886      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
887      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
888      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
889      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
890      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
891      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
892    
893    /* This is a set of tables that came orginally from a Windows user. It seems to
894    be at least an approximation of ISO 8859. In particular, there are characters
895    greater than 128 that are marked as spaces, letters, etc. */
896    
897    static const pcre_uint8 tables1[] = {
898    0,1,2,3,4,5,6,7,
899    8,9,10,11,12,13,14,15,
900    16,17,18,19,20,21,22,23,
901    24,25,26,27,28,29,30,31,
902    32,33,34,35,36,37,38,39,
903    40,41,42,43,44,45,46,47,
904    48,49,50,51,52,53,54,55,
905    56,57,58,59,60,61,62,63,
906    64,97,98,99,100,101,102,103,
907    104,105,106,107,108,109,110,111,
908    112,113,114,115,116,117,118,119,
909    120,121,122,91,92,93,94,95,
910    96,97,98,99,100,101,102,103,
911    104,105,106,107,108,109,110,111,
912    112,113,114,115,116,117,118,119,
913    120,121,122,123,124,125,126,127,
914    128,129,130,131,132,133,134,135,
915    136,137,138,139,140,141,142,143,
916    144,145,146,147,148,149,150,151,
917    152,153,154,155,156,157,158,159,
918    160,161,162,163,164,165,166,167,
919    168,169,170,171,172,173,174,175,
920    176,177,178,179,180,181,182,183,
921    184,185,186,187,188,189,190,191,
922    224,225,226,227,228,229,230,231,
923    232,233,234,235,236,237,238,239,
924    240,241,242,243,244,245,246,215,
925    248,249,250,251,252,253,254,223,
926    224,225,226,227,228,229,230,231,
927    232,233,234,235,236,237,238,239,
928    240,241,242,243,244,245,246,247,
929    248,249,250,251,252,253,254,255,
930    0,1,2,3,4,5,6,7,
931    8,9,10,11,12,13,14,15,
932    16,17,18,19,20,21,22,23,
933    24,25,26,27,28,29,30,31,
934    32,33,34,35,36,37,38,39,
935    40,41,42,43,44,45,46,47,
936    48,49,50,51,52,53,54,55,
937    56,57,58,59,60,61,62,63,
938    64,97,98,99,100,101,102,103,
939    104,105,106,107,108,109,110,111,
940    112,113,114,115,116,117,118,119,
941    120,121,122,91,92,93,94,95,
942    96,65,66,67,68,69,70,71,
943    72,73,74,75,76,77,78,79,
944    80,81,82,83,84,85,86,87,
945    88,89,90,123,124,125,126,127,
946    128,129,130,131,132,133,134,135,
947    136,137,138,139,140,141,142,143,
948    144,145,146,147,148,149,150,151,
949    152,153,154,155,156,157,158,159,
950    160,161,162,163,164,165,166,167,
951    168,169,170,171,172,173,174,175,
952    176,177,178,179,180,181,182,183,
953    184,185,186,187,188,189,190,191,
954    224,225,226,227,228,229,230,231,
955    232,233,234,235,236,237,238,239,
956    240,241,242,243,244,245,246,215,
957    248,249,250,251,252,253,254,223,
958    192,193,194,195,196,197,198,199,
959    200,201,202,203,204,205,206,207,
960    208,209,210,211,212,213,214,247,
961    216,217,218,219,220,221,222,255,
962    0,62,0,0,1,0,0,0,
963    0,0,0,0,0,0,0,0,
964    32,0,0,0,1,0,0,0,
965    0,0,0,0,0,0,0,0,
966    0,0,0,0,0,0,255,3,
967    126,0,0,0,126,0,0,0,
968    0,0,0,0,0,0,0,0,
969    0,0,0,0,0,0,0,0,
970    0,0,0,0,0,0,255,3,
971    0,0,0,0,0,0,0,0,
972    0,0,0,0,0,0,12,2,
973    0,0,0,0,0,0,0,0,
974    0,0,0,0,0,0,0,0,
975    254,255,255,7,0,0,0,0,
976    0,0,0,0,0,0,0,0,
977    255,255,127,127,0,0,0,0,
978    0,0,0,0,0,0,0,0,
979    0,0,0,0,254,255,255,7,
980    0,0,0,0,0,4,32,4,
981    0,0,0,128,255,255,127,255,
982    0,0,0,0,0,0,255,3,
983    254,255,255,135,254,255,255,7,
984    0,0,0,0,0,4,44,6,
985    255,255,127,255,255,255,127,255,
986    0,0,0,0,254,255,255,255,
987    255,255,255,255,255,255,255,127,
988    0,0,0,0,254,255,255,255,
989    255,255,255,255,255,255,255,255,
990    0,2,0,0,255,255,255,255,
991    255,255,255,255,255,255,255,127,
992    0,0,0,0,255,255,255,255,
993    255,255,255,255,255,255,255,255,
994    0,0,0,0,254,255,0,252,
995    1,0,0,248,1,0,0,120,
996    0,0,0,0,254,255,255,255,
997    0,0,128,0,0,0,128,0,
998    255,255,255,255,0,0,0,0,
999    0,0,0,0,0,0,0,128,
1000    255,255,255,255,0,0,0,0,
1001    0,0,0,0,0,0,0,0,
1002    128,0,0,0,0,0,0,0,
1003    0,1,1,0,1,1,0,0,
1004    0,0,0,0,0,0,0,0,
1005    0,0,0,0,0,0,0,0,
1006    1,0,0,0,128,0,0,0,
1007    128,128,128,128,0,0,128,0,
1008    28,28,28,28,28,28,28,28,
1009    28,28,0,0,0,0,0,128,
1010    0,26,26,26,26,26,26,18,
1011    18,18,18,18,18,18,18,18,
1012    18,18,18,18,18,18,18,18,
1013    18,18,18,128,128,0,128,16,
1014    0,26,26,26,26,26,26,18,
1015    18,18,18,18,18,18,18,18,
1016    18,18,18,18,18,18,18,18,
1017    18,18,18,128,128,0,0,0,
1018    0,0,0,0,0,1,0,0,
1019    0,0,0,0,0,0,0,0,
1020    0,0,0,0,0,0,0,0,
1021    0,0,0,0,0,0,0,0,
1022    1,0,0,0,0,0,0,0,
1023    0,0,18,0,0,0,0,0,
1024    0,0,20,20,0,18,0,0,
1025    0,20,18,0,0,0,0,0,
1026    18,18,18,18,18,18,18,18,
1027    18,18,18,18,18,18,18,18,
1028    18,18,18,18,18,18,18,0,
1029    18,18,18,18,18,18,18,18,
1030    18,18,18,18,18,18,18,18,
1031    18,18,18,18,18,18,18,18,
1032    18,18,18,18,18,18,18,0,
1033    18,18,18,18,18,18,18,18
1034    };
1035    
1036    
1037    
1038    
1039    #ifndef HAVE_STRERROR
1040    /*************************************************
1041    *     Provide strerror() for non-ANSI libraries  *
1042    *************************************************/
1043    
1044    /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1045    in their libraries, but can provide the same facility by this simple
1046    alternative function. */
1047    
1048    extern int   sys_nerr;
1049    extern char *sys_errlist[];
1050    
1051    char *
1052    strerror(int n)
1053    {
1054    if (n < 0 || n >= sys_nerr) return "unknown error number";
1055    return sys_errlist[n];
1056    }
1057    #endif /* HAVE_STRERROR */
1058    
1059    
1060    /*************************************************
1061    *         JIT memory callback                    *
1062    *************************************************/
1063    
1064    static pcre_jit_stack* jit_callback(void *arg)
1065    {
1066    return (pcre_jit_stack *)arg;
1067    }
1068    
1069    
1070    #if !defined NOUTF || defined SUPPORT_PCRE16
1071    /*************************************************
1072    *            Convert UTF-8 string to value       *
1073    *************************************************/
1074    
1075    /* This function takes one or more bytes that represents a UTF-8 character,
1076    and returns the value of the character.
1077    
1078    Argument:
1079      utf8bytes   a pointer to the byte vector
1080      vptr        a pointer to an int to receive the value
1081    
1082    Returns:      >  0 => the number of bytes consumed
1083                  -6 to 0 => malformed UTF-8 character at offset = (-return)
1084    */
1085    
1086    static int
1087    utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1088    {
1089    int c = *utf8bytes++;
1090    int d = c;
1091    int i, j, s;
1092    
1093    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
1094      {
1095      if ((d & 0x80) == 0) break;
1096      d <<= 1;
1097      }
1098    
1099    if (i == -1) { *vptr = c; return 1; }  /* ascii character */
1100    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
1101    
1102    /* i now has a value in the range 1-5 */
1103    
1104    s = 6*i;
1105    d = (c & utf8_table3[i]) << s;
1106    
1107    for (j = 0; j < i; j++)
1108      {
1109      c = *utf8bytes++;
1110      if ((c & 0xc0) != 0x80) return -(j+1);
1111      s -= 6;
1112      d |= (c & 0x3f) << s;
1113      }
1114    
1115    /* Check that encoding was the correct unique one */
1116    
1117    for (j = 0; j < utf8_table1_size; j++)
1118      if (d <= utf8_table1[j]) break;
1119    if (j != i) return -(i+1);
1120    
1121    /* Valid value */
1122    
1123    *vptr = d;
1124    return i+1;
1125    }
1126    #endif /* NOUTF || SUPPORT_PCRE16 */
1127    
1128    
1129    
1130    #if !defined NOUTF || defined SUPPORT_PCRE16
1131    /*************************************************
1132    *       Convert character value to UTF-8         *
1133    *************************************************/
1134    
1135    /* This function takes an integer value in the range 0 - 0x7fffffff
1136    and encodes it as a UTF-8 character in 0 to 6 bytes.
1137    
1138    Arguments:
1139      cvalue     the character value
1140      utf8bytes  pointer to buffer for result - at least 6 bytes long
1141    
1142    Returns:     number of characters placed in the buffer
1143    */
1144    
1145    static int
1146    ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1147    {
1148    register int i, j;
1149    for (i = 0; i < utf8_table1_size; i++)
1150      if (cvalue <= utf8_table1[i]) break;
1151    utf8bytes += i;
1152    for (j = i; j > 0; j--)
1153     {
1154     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1155     cvalue >>= 6;
1156     }
1157    *utf8bytes = utf8_table2[i] | cvalue;
1158    return i + 1;
1159    }
1160    #endif
1161    
1162    
1163    #ifdef SUPPORT_PCRE16
1164    /*************************************************
1165    *         Convert a string to 16-bit             *
1166    *************************************************/
1167    
1168    /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1169    8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1170    double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1171    in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1172    result is always left in buffer16.
1173    
1174    Note that this function does not object to surrogate values. This is
1175    deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1176    for the purpose of testing that they are correctly faulted.
1177    
1178    Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1179    in UTF-8 so that values greater than 255 can be handled.
1180    
1181    Arguments:
1182      data       TRUE if converting a data line; FALSE for a regex
1183      p          points to a byte string
1184      utf        true if UTF-8 (to be converted to UTF-16)
1185      len        number of bytes in the string (excluding trailing zero)
1186    
1187    Returns:     number of 16-bit data items used (excluding trailing zero)
1188                 OR -1 if a UTF-8 string is malformed
1189                 OR -2 if a value > 0x10ffff is encountered
1190                 OR -3 if a value > 0xffff is encountered when not in UTF mode
1191    */
1192    
1193    static int
1194    to16(int data, pcre_uint8 *p, int utf, int len)
1195    {
1196    pcre_uint16 *pp;
1197    
1198    if (buffer16_size < 2*len + 2)
1199      {
1200      if (buffer16 != NULL) free(buffer16);
1201      buffer16_size = 2*len + 2;
1202      buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1203      if (buffer16 == NULL)
1204        {
1205        fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1206        exit(1);
1207        }
1208      }
1209    
1210    pp = buffer16;
1211    
1212    if (!utf && !data)
1213      {
1214      while (len-- > 0) *pp++ = *p++;
1215      }
1216    
1217    else
1218      {
1219      int c = 0;
1220      while (len > 0)
1221        {
1222        int chlen = utf82ord(p, &c);
1223        if (chlen <= 0) return -1;
1224        if (c > 0x10ffff) return -2;
1225        p += chlen;
1226        len -= chlen;
1227        if (c < 0x10000) *pp++ = c; else
1228          {
1229          if (!utf) return -3;
1230          c -= 0x10000;
1231          *pp++ = 0xD800 | (c >> 10);
1232          *pp++ = 0xDC00 | (c & 0x3ff);
1233          }
1234        }
1235      }
1236    
1237    *pp = 0;
1238    return pp - buffer16;
1239    }
1240    #endif
1241    
1242    
1243    /*************************************************
1244    *        Read or extend an input line            *
1245    *************************************************/
1246    
1247    /* Input lines are read into buffer, but both patterns and data lines can be
1248    continued over multiple input lines. In addition, if the buffer fills up, we
1249    want to automatically expand it so as to be able to handle extremely large
1250    lines that are needed for certain stress tests. When the input buffer is
1251    expanded, the other two buffers must also be expanded likewise, and the
1252    contents of pbuffer, which are a copy of the input for callouts, must be
1253    preserved (for when expansion happens for a data line). This is not the most
1254    optimal way of handling this, but hey, this is just a test program!
1255    
1256    Arguments:
1257      f            the file to read
1258      start        where in buffer to start (this *must* be within buffer)
1259      prompt       for stdin or readline()
1260    
1261    Returns:       pointer to the start of new data
1262                   could be a copy of start, or could be moved
1263                   NULL if no data read and EOF reached
1264    */
1265    
1266    static pcre_uint8 *
1267    extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1268    {
1269    pcre_uint8 *here = start;
1270    
1271    for (;;)
1272      {
1273      size_t rlen = (size_t)(buffer_size - (here - buffer));
1274    
1275      if (rlen > 1000)
1276        {
1277        int dlen;
1278    
1279        /* If libreadline support is required, use readline() to read a line if the
1280        input is a terminal. Note that readline() removes the trailing newline, so
1281        we must put it back again, to be compatible with fgets(). */
1282    
1283    #ifdef SUPPORT_LIBREADLINE
1284        if (isatty(fileno(f)))
1285          {
1286          size_t len;
1287          char *s = readline(prompt);
1288          if (s == NULL) return (here == start)? NULL : start;
1289          len = strlen(s);
1290          if (len > 0) add_history(s);
1291          if (len > rlen - 1) len = rlen - 1;
1292          memcpy(here, s, len);
1293          here[len] = '\n';
1294          here[len+1] = 0;
1295          free(s);
1296          }
1297        else
1298    #endif
1299    
1300        /* Read the next line by normal means, prompting if the file is stdin. */
1301    
1302          {
1303          if (f == stdin) printf("%s", prompt);
1304          if (fgets((char *)here, rlen,  f) == NULL)
1305            return (here == start)? NULL : start;
1306          }
1307    
1308        dlen = (int)strlen((char *)here);
1309        if (dlen > 0 && here[dlen - 1] == '\n') return start;
1310        here += dlen;
1311        }
1312    
1313      else
1314        {
1315        int new_buffer_size = 2*buffer_size;
1316        pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1317        pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1318        pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1319    
1320        if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1321          {
1322          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1323          exit(1);
1324          }
1325    
1326        memcpy(new_buffer, buffer, buffer_size);
1327        memcpy(new_pbuffer, pbuffer, buffer_size);
1328    
1329        buffer_size = new_buffer_size;
1330    
1331        start = new_buffer + (start - buffer);
1332        here = new_buffer + (here - buffer);
1333    
1334        free(buffer);
1335        free(dbuffer);
1336        free(pbuffer);
1337    
1338        buffer = new_buffer;
1339        dbuffer = new_dbuffer;
1340        pbuffer = new_pbuffer;
1341        }
1342      }
1343    
1344    return NULL;  /* Control never gets here */
1345    }
1346    
1347    
1348    
1349    /*************************************************
1350    *          Read number from string               *
1351    *************************************************/
1352    
1353    /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1354    around with conditional compilation, just do the job by hand. It is only used
1355    for unpicking arguments, so just keep it simple.
1356    
1357    Arguments:
1358      str           string to be converted
1359      endptr        where to put the end pointer
1360    
1361    Returns:        the unsigned long
1362    */
1363    
1364    static int
1365    get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1366    {
1367    int result = 0;
1368    while(*str != 0 && isspace(*str)) str++;
1369    while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1370    *endptr = str;
1371    return(result);
1372    }
1373    
1374    
1375    
1376    /*************************************************
1377    *             Print one character                *
1378    *************************************************/
1379    
1380    /* Print a single character either literally, or as a hex escape. */
1381    
1382    static int pchar(int c, FILE *f)
1383    {
1384    if (PRINTOK(c))
1385      {
1386      if (f != NULL) fprintf(f, "%c", c);
1387      return 1;
1388      }
1389    
1390    if (c < 0x100)
1391      {
1392      if (use_utf)
1393        {
1394        if (f != NULL) fprintf(f, "\\x{%02x}", c);
1395        return 6;
1396        }
1397      else
1398        {
1399        if (f != NULL) fprintf(f, "\\x%02x", c);
1400        return 4;
1401        }
1402      }
1403    
1404    if (f != NULL) fprintf(f, "\\x{%02x}", c);
1405    return (c <= 0x000000ff)? 6 :
1406           (c <= 0x00000fff)? 7 :
1407           (c <= 0x0000ffff)? 8 :
1408           (c <= 0x000fffff)? 9 : 10;
1409    }
1410    
1411    
1412    
1413    #ifdef SUPPORT_PCRE8
1414    /*************************************************
1415    *         Print 8-bit character string           *
1416    *************************************************/
1417    
1418    /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1419    If handed a NULL file, just counts chars without printing. */
1420    
1421    static int pchars(pcre_uint8 *p, int length, FILE *f)
1422    {
1423    int c = 0;
1424    int yield = 0;
1425    
1426    if (length < 0)
1427      length = strlen((char *)p);
1428    
1429    while (length-- > 0)
1430      {
1431    #if !defined NOUTF
1432      if (use_utf)
1433        {
1434        int rc = utf82ord(p, &c);
1435        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
1436          {
1437          length -= rc - 1;
1438          p += rc;
1439          yield += pchar(c, f);
1440          continue;
1441          }
1442        }
1443    #endif
1444      c = *p++;
1445      yield += pchar(c, f);
1446      }
1447    
1448    return yield;
1449    }
1450    #endif
1451    
1452    
1453    
1454    #ifdef SUPPORT_PCRE16
1455    /*************************************************
1456    *    Find length of 0-terminated 16-bit string   *
1457    *************************************************/
1458    
1459    static int strlen16(PCRE_SPTR16 p)
1460    {
1461    int len = 0;
1462    while (*p++ != 0) len++;
1463    return len;
1464    }
1465    #endif  /* SUPPORT_PCRE16 */
1466    
1467    
1468    #ifdef SUPPORT_PCRE16
1469    /*************************************************
1470    *           Print 16-bit character string        *
1471    *************************************************/
1472    
1473    /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1474    If handed a NULL file, just counts chars without printing. */
1475    
1476    static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1477    {
1478    int yield = 0;
1479    
1480    if (length < 0)
1481      length = strlen16(p);
1482    
1483    while (length-- > 0)
1484      {
1485      int c = *p++ & 0xffff;
1486    #if !defined NOUTF
1487      if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1488        {
1489        int d = *p & 0xffff;
1490        if (d >= 0xDC00 && d < 0xDFFF)
1491          {
1492          c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1493          length--;
1494          p++;
1495          }
1496        }
1497    #endif
1498      yield += pchar(c, f);
1499      }
1500    
1501    return yield;
1502    }
1503    #endif  /* SUPPORT_PCRE16 */
1504    
1505    
1506    
1507    #ifdef SUPPORT_PCRE8
1508    /*************************************************
1509    *     Read a capture name (8-bit) and check it   *
1510    *************************************************/
1511    
1512    static pcre_uint8 *
1513    read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1514    {
1515    pcre_uint8 *npp = *pp;
1516    while (isalnum(*p)) *npp++ = *p++;
1517    *npp++ = 0;
1518    *npp = 0;
1519    if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1520      {
1521      fprintf(outfile, "no parentheses with name \"");
1522      PCHARSV(*pp, 0, -1, outfile);
1523      fprintf(outfile, "\"\n");
1524      }
1525    
1526    *pp = npp;
1527    return p;
1528    }
1529    #endif  /* SUPPORT_PCRE8 */
1530    
1531    
1532    
1533    #ifdef SUPPORT_PCRE16
1534    /*************************************************
1535    *     Read a capture name (16-bit) and check it  *
1536    *************************************************/
1537    
1538    /* Note that the text being read is 8-bit. */
1539    
1540    static pcre_uint8 *
1541    read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1542    {
1543    pcre_uint16 *npp = *pp;
1544    while (isalnum(*p)) *npp++ = *p++;
1545    *npp++ = 0;
1546    *npp = 0;
1547    if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
1548      {
1549      fprintf(outfile, "no parentheses with name \"");
1550      PCHARSV(*pp, 0, -1, outfile);
1551      fprintf(outfile, "\"\n");
1552      }
1553    *pp = npp;
1554    return p;
1555    }
1556    #endif  /* SUPPORT_PCRE16 */
1557    
1558    
1559    
1560    /*************************************************
1561    *              Callout function                  *
1562    *************************************************/
1563    
1564    /* Called from PCRE as a result of the (?C) item. We print out where we are in
1565    the match. Yield zero unless more callouts than the fail count, or the callout
1566    data is not zero. */
1567    
1568    static int callout(pcre_callout_block *cb)
1569    {
1570    FILE *f = (first_callout | callout_extra)? outfile : NULL;
1571    int i, pre_start, post_start, subject_length;
1572    
1573    if (callout_extra)
1574      {
1575      fprintf(f, "Callout %d: last capture = %d\n",
1576        cb->callout_number, cb->capture_last);
1577    
1578      for (i = 0; i < cb->capture_top * 2; i += 2)
1579        {
1580        if (cb->offset_vector[i] < 0)
1581          fprintf(f, "%2d: <unset>\n", i/2);
1582        else
1583          {
1584          fprintf(f, "%2d: ", i/2);
1585          PCHARSV(cb->subject, cb->offset_vector[i],
1586            cb->offset_vector[i+1] - cb->offset_vector[i], f);
1587          fprintf(f, "\n");
1588          }
1589        }
1590      }
1591    
1592    /* Re-print the subject in canonical form, the first time or if giving full
1593    datails. On subsequent calls in the same match, we use pchars just to find the
1594    printed lengths of the substrings. */
1595    
1596    if (f != NULL) fprintf(f, "--->");
1597    
1598    PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
1599    PCHARS(post_start, cb->subject, cb->start_match,
1600      cb->current_position - cb->start_match, f);
1601    
1602    PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
1603    
1604    PCHARSV(cb->subject, cb->current_position,
1605      cb->subject_length - cb->current_position, f);
1606    
1607    if (f != NULL) fprintf(f, "\n");
1608    
1609    /* Always print appropriate indicators, with callout number if not already
1610    shown. For automatic callouts, show the pattern offset. */
1611    
1612    if (cb->callout_number == 255)
1613      {
1614      fprintf(outfile, "%+3d ", cb->pattern_position);
1615      if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
1616      }
1617    else
1618      {
1619      if (callout_extra) fprintf(outfile, "    ");
1620        else fprintf(outfile, "%3d ", cb->callout_number);
1621      }
1622    
1623    for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
1624    fprintf(outfile, "^");
1625    
1626    if (post_start > 0)
1627      {
1628      for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
1629      fprintf(outfile, "^");
1630      }
1631    
1632    for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
1633      fprintf(outfile, " ");
1634    
1635    fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
1636      pbuffer + cb->pattern_position);
1637    
1638    fprintf(outfile, "\n");
1639    first_callout = 0;
1640    
1641    if (cb->mark != last_callout_mark)
1642      {
1643      if (cb->mark == NULL)
1644        fprintf(outfile, "Latest Mark: <unset>\n");
1645      else
1646        {
1647        fprintf(outfile, "Latest Mark: ");
1648        PCHARSV(cb->mark, 0, -1, outfile);
1649        putc('\n', outfile);
1650        }
1651      last_callout_mark = cb->mark;
1652      }
1653    
1654    if (cb->callout_data != NULL)
1655      {
1656      int callout_data = *((int *)(cb->callout_data));
1657      if (callout_data != 0)
1658        {
1659        fprintf(outfile, "Callout data = %d\n", callout_data);
1660        return callout_data;
1661        }
1662      }
1663    
1664    return (cb->callout_number != callout_fail_id)? 0 :
1665           (++callout_count >= callout_fail_count)? 1 : 0;
1666    }
1667    
1668    
1669    /*************************************************
1670    *            Local malloc functions              *
1671    *************************************************/
1672    
1673    /* Alternative malloc function, to test functionality and save the size of a
1674    compiled re, which is the first store request that pcre_compile() makes. The
1675    show_malloc variable is set only during matching. */
1676    
1677    static void *new_malloc(size_t size)
1678    {
1679    void *block = malloc(size);
1680    gotten_store = size;
1681    if (first_gotten_store == 0) first_gotten_store = size;
1682    if (show_malloc)
1683      fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
1684    return block;
1685    }
1686    
1687    static void new_free(void *block)
1688  {  {
1689  unsigned char *code = ((real_pcre *)re)->code;  if (show_malloc)
1690      fprintf(outfile, "free             %p\n", block);
1691    free(block);
1692    }
1693    
1694    /* For recursion malloc/free, to test stacking calls */
1695    
1696  fprintf(outfile, "------------------------------------------------------------------\n");  static void *stack_malloc(size_t size)
1697    {
1698    void *block = malloc(size);
1699    if (show_malloc)
1700      fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1701    return block;
1702    }
1703    
1704    static void stack_free(void *block)
1705    {
1706    if (show_malloc)
1707      fprintf(outfile, "stack_free       %p\n", block);
1708    free(block);
1709    }
1710    
1711  for(;;)  
1712    /*************************************************
1713    *          Call pcre_fullinfo()                  *
1714    *************************************************/
1715    
1716    /* Get one piece of information from the pcre_fullinfo() function. When only
1717    one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1718    value, but the code is defensive.
1719    
1720    Arguments:
1721      re        compiled regex
1722      study     study data
1723      option    PCRE_INFO_xxx option
1724      ptr       where to put the data
1725    
1726    Returns:    0 when OK, < 0 on error
1727    */
1728    
1729    static int
1730    new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1731    {
1732    int rc;
1733    
1734    if (use_pcre16)
1735    #ifdef SUPPORT_PCRE16
1736      rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
1737    #else
1738      rc = PCRE_ERROR_BADMODE;
1739    #endif
1740    else
1741    #ifdef SUPPORT_PCRE8
1742      rc = pcre_fullinfo(re, study, option, ptr);
1743    #else
1744      rc = PCRE_ERROR_BADMODE;
1745    #endif
1746    
1747    if (rc < 0)
1748      {
1749      fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1750        use_pcre16? "16" : "", option);
1751      if (rc == PCRE_ERROR_BADMODE)
1752        fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
1753          "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
1754      }
1755    
1756    return rc;
1757    }
1758    
1759    
1760    
1761    /*************************************************
1762    *             Swap byte functions                *
1763    *************************************************/
1764    
1765    /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
1766    value, respectively.
1767    
1768    Arguments:
1769      value        any number
1770    
1771    Returns:       the byte swapped value
1772    */
1773    
1774    static pcre_uint32
1775    swap_uint32(pcre_uint32 value)
1776    {
1777    return ((value & 0x000000ff) << 24) |
1778           ((value & 0x0000ff00) <<  8) |
1779           ((value & 0x00ff0000) >>  8) |
1780           (value >> 24);
1781    }
1782    
1783    static pcre_uint16
1784    swap_uint16(pcre_uint16 value)
1785    {
1786    return (value >> 8) | (value << 8);
1787    }
1788    
1789    
1790    
1791    /*************************************************
1792    *        Flip bytes in a compiled pattern        *
1793    *************************************************/
1794    
1795    /* This function is called if the 'F' option was present on a pattern that is
1796    to be written to a file. We flip the bytes of all the integer fields in the
1797    regex data block and the study block. In 16-bit mode this also flips relevant
1798    bytes in the pattern itself. This is to make it possible to test PCRE's
1799    ability to reload byte-flipped patterns, e.g. those compiled on a different
1800    architecture. */
1801    
1802    static void
1803    regexflip(pcre *ere, pcre_extra *extra)
1804    {
1805    REAL_PCRE *re = (REAL_PCRE *)ere;
1806    #ifdef SUPPORT_PCRE16
1807    int op;
1808    pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1809    int length = re->name_count * re->name_entry_size;
1810    #ifdef SUPPORT_UTF
1811    BOOL utf = (re->options & PCRE_UTF16) != 0;
1812    BOOL utf16_char = FALSE;
1813    #endif /* SUPPORT_UTF */
1814    #endif /* SUPPORT_PCRE16 */
1815    
1816    /* Always flip the bytes in the main data block and study blocks. */
1817    
1818    re->magic_number = REVERSED_MAGIC_NUMBER;
1819    re->size = swap_uint32(re->size);
1820    re->options = swap_uint32(re->options);
1821    re->flags = swap_uint16(re->flags);
1822    re->top_bracket = swap_uint16(re->top_bracket);
1823    re->top_backref = swap_uint16(re->top_backref);
1824    re->first_char = swap_uint16(re->first_char);
1825    re->req_char = swap_uint16(re->req_char);
1826    re->name_table_offset = swap_uint16(re->name_table_offset);
1827    re->name_entry_size = swap_uint16(re->name_entry_size);
1828    re->name_count = swap_uint16(re->name_count);
1829    
1830    if (extra != NULL)
1831    {    {
1832    int c;    pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1833    int charlength;    rsd->size = swap_uint32(rsd->size);
1834      rsd->flags = swap_uint32(rsd->flags);
1835      rsd->minlength = swap_uint32(rsd->minlength);
1836      }
1837    
1838    fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));  /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1839    in the name table, if present, and then in the pattern itself. */
1840    
1841    if (*code >= OP_BRA)  #ifdef SUPPORT_PCRE16
1842    if (!use_pcre16) return;
1843    
1844    while(TRUE)
1845      {
1846      /* Swap previous characters. */
1847      while (length-- > 0)
1848      {      {
1849      fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);      *ptr = swap_uint16(*ptr);
1850      code += 2;      ptr++;
1851      }      }
1852    #ifdef SUPPORT_UTF
1853      if (utf16_char)
1854        {
1855        if ((ptr[-1] & 0xfc00) == 0xd800)
1856          {
1857          /* We know that there is only one extra character in UTF-16. */
1858          *ptr = swap_uint16(*ptr);
1859          ptr++;
1860          }
1861        }
1862      utf16_char = FALSE;
1863    #endif /* SUPPORT_UTF */
1864    
1865      /* Get next opcode. */
1866    
1867      length = 0;
1868      op = *ptr;
1869      *ptr++ = swap_uint16(op);
1870    
1871    else switch(*code)    switch (op)
1872      {      {
1873      case OP_END:      case OP_END:
     fprintf(outfile, "    %s\n", OP_names[*code]);  
     fprintf(outfile, "------------------------------------------------------------------\n");  
1874      return;      return;
1875    
1876      case OP_OPT:  #ifdef SUPPORT_UTF
1877      fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);      case OP_CHAR:
1878      code++;      case OP_CHARI:
1879      break;      case OP_NOT:
1880        case OP_NOTI:
     case OP_COND:  
     fprintf(outfile, "%3d Cond", (code[1] << 8) + code[2]);  
     code += 2;  
     break;  
   
     case OP_CREF:  
     fprintf(outfile, " %.2d %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
     case OP_CHARS:  
     charlength = *(++code);  
     fprintf(outfile, "%3d ", charlength);  
     while (charlength-- > 0)  
       if (isprint(c = *(++code))) fprintf(outfile, "%c", c);  
         else fprintf(outfile, "\\x%02x", c);  
     break;  
   
     case OP_KETRMAX:  
     case OP_KETRMIN:  
     case OP_ALT:  
     case OP_KET:  
     case OP_ASSERT:  
     case OP_ASSERT_NOT:  
     case OP_ASSERTBACK:  
     case OP_ASSERTBACK_NOT:  
     case OP_ONCE:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_REVERSE:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
1881      case OP_STAR:      case OP_STAR:
1882      case OP_MINSTAR:      case OP_MINSTAR:
1883      case OP_PLUS:      case OP_PLUS:
1884      case OP_MINPLUS:      case OP_MINPLUS:
1885      case OP_QUERY:      case OP_QUERY:
1886      case OP_MINQUERY:      case OP_MINQUERY:
     case OP_TYPESTAR:  
     case OP_TYPEMINSTAR:  
     case OP_TYPEPLUS:  
     case OP_TYPEMINPLUS:  
     case OP_TYPEQUERY:  
     case OP_TYPEMINQUERY:  
     if (*code >= OP_TYPESTAR)  
       fprintf(outfile, "    %s", OP_names[code[1]]);  
     else if (isprint(c = code[1])) fprintf(outfile, "    %c", c);  
       else fprintf(outfile, "    \\x%02x", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_EXACT:  
1887      case OP_UPTO:      case OP_UPTO:
1888      case OP_MINUPTO:      case OP_MINUPTO:
1889      if (isprint(c = code[3])) fprintf(outfile, "    %c{", c);      case OP_EXACT:
1890        else fprintf(outfile, "    \\x%02x{", c);      case OP_POSSTAR:
1891      if (*code != OP_EXACT) fprintf(outfile, ",");      case OP_POSPLUS:
1892      fprintf(outfile, "%d}", (code[1] << 8) + code[2]);      case OP_POSQUERY:
1893      if (*code == OP_MINUPTO) fprintf(outfile, "?");      case OP_POSUPTO:
1894      code += 3;      case OP_STARI:
1895      break;      case OP_MINSTARI:
1896        case OP_PLUSI:
1897      case OP_TYPEEXACT:      case OP_MINPLUSI:
1898      case OP_TYPEUPTO:      case OP_QUERYI:
1899      case OP_TYPEMINUPTO:      case OP_MINQUERYI:
1900      fprintf(outfile, "    %s{", OP_names[code[3]]);      case OP_UPTOI:
1901      if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");      case OP_MINUPTOI:
1902      fprintf(outfile, "%d}", (code[1] << 8) + code[2]);      case OP_EXACTI:
1903      if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");      case OP_POSSTARI:
1904      code += 3;      case OP_POSPLUSI:
1905      break;      case OP_POSQUERYI:
1906        case OP_POSUPTOI:
     case OP_NOT:  
     if (isprint(c = *(++code))) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     break;  
   
1907      case OP_NOTSTAR:      case OP_NOTSTAR:
1908      case OP_NOTMINSTAR:      case OP_NOTMINSTAR:
1909      case OP_NOTPLUS:      case OP_NOTPLUS:
1910      case OP_NOTMINPLUS:      case OP_NOTMINPLUS:
1911      case OP_NOTQUERY:      case OP_NOTQUERY:
1912      case OP_NOTMINQUERY:      case OP_NOTMINQUERY:
     if (isprint(c = code[1])) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_NOTEXACT:  
1913      case OP_NOTUPTO:      case OP_NOTUPTO:
1914      case OP_NOTMINUPTO:      case OP_NOTMINUPTO:
1915      if (isprint(c = code[3])) fprintf(outfile, "    [^%c]{", c);      case OP_NOTEXACT:
1916        else fprintf(outfile, "    [^\\x%02x]{", c);      case OP_NOTPOSSTAR:
1917      if (*code != OP_NOTEXACT) fprintf(outfile, ",");      case OP_NOTPOSPLUS:
1918      fprintf(outfile, "%d}", (code[1] << 8) + code[2]);      case OP_NOTPOSQUERY:
1919      if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");      case OP_NOTPOSUPTO:
1920      code += 3;      case OP_NOTSTARI:
1921      break;      case OP_NOTMINSTARI:
1922        case OP_NOTPLUSI:
1923        case OP_NOTMINPLUSI:
1924        case OP_NOTQUERYI:
1925        case OP_NOTMINQUERYI:
1926        case OP_NOTUPTOI:
1927        case OP_NOTMINUPTOI:
1928        case OP_NOTEXACTI:
1929        case OP_NOTPOSSTARI:
1930        case OP_NOTPOSPLUSI:
1931        case OP_NOTPOSQUERYI:
1932        case OP_NOTPOSUPTOI:
1933        if (utf) utf16_char = TRUE;
1934    #endif
1935        /* Fall through. */
1936    
1937      case OP_REF:      default:
1938      fprintf(outfile, "    \\%d", *(++code));      length = OP_lengths16[op] - 1;
1939      code++;      break;
     goto CLASS_REF_REPEAT;  
1940    
1941      case OP_CLASS:      case OP_CLASS:
1942        case OP_NCLASS:
1943        /* Skip the character bit map. */
1944        ptr += 32/sizeof(pcre_uint16);
1945        length = 0;
1946        break;
1947    
1948        case OP_XCLASS:
1949        /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
1950        if (LINK_SIZE > 1)
1951          length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
1952            - (1 + LINK_SIZE + 1));
1953        else
1954          length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
1955    
1956        /* Reverse the size of the XCLASS instance. */
1957        *ptr = swap_uint16(*ptr);
1958        ptr++;
1959        if (LINK_SIZE > 1)
1960        {        {
1961        int i, min, max;        *ptr = swap_uint16(*ptr);
1962        code++;        ptr++;
1963        fprintf(outfile, "    [");        }
1964    
1965        for (i = 0; i < 256; i++)      op = *ptr;
1966          {      *ptr = swap_uint16(op);
1967          if ((code[i/8] & (1 << (i&7))) != 0)      ptr++;
1968            {      if ((op & XCL_MAP) != 0)
1969            int j;        {
1970            for (j = i+1; j < 256; j++)        /* Skip the character bit map. */
1971              if ((code[j/8] & (1 << (j&7))) == 0) break;        ptr += 32/sizeof(pcre_uint16);
1972            if (i == '-' || i == ']') fprintf(outfile, "\\");        length -= 32/sizeof(pcre_uint16);
1973            if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);        }
1974            if (--j > i)      break;
1975              {      }
1976              fprintf(outfile, "-");    }
1977              if (j == '-' || j == ']') fprintf(outfile, "\\");  /* Control should never reach here in 16 bit mode. */
1978              if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);  #endif /* SUPPORT_PCRE16 */
1979              }  }
           i = j;  
           }  
         }  
       fprintf(outfile, "]");  
       code += 32;  
1980    
       CLASS_REF_REPEAT:  
1981    
       switch(*code)  
         {  
         case OP_CRSTAR:  
         case OP_CRMINSTAR:  
         case OP_CRPLUS:  
         case OP_CRMINPLUS:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         fprintf(outfile, "%s", OP_names[*code]);  
         break;  
1982    
1983          case OP_CRRANGE:  /*************************************************
1984          case OP_CRMINRANGE:  *        Check match or recursion limit          *
1985          min = (code[1] << 8) + code[2];  *************************************************/
         max = (code[3] << 8) + code[4];  
         if (max == 0) fprintf(outfile, "{%d,}", min);  
         else fprintf(outfile, "{%d,%d}", min, max);  
         if (*code == OP_CRMINRANGE) fprintf(outfile, "?");  
         code += 4;  
         break;  
1986    
1987          default:  static int
1988          code--;  check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1989          }    int start_offset, int options, int *use_offsets, int use_size_offsets,
1990        }    int flag, unsigned long int *limit, int errnumber, const char *msg)
1991      break;  {
1992    int count;
1993    int min = 0;
1994    int mid = 64;
1995    int max = -1;
1996    
1997      /* Anything else is just a one-node item */  extra->flags |= flag;
1998    
1999      default:  for (;;)
2000      fprintf(outfile, "    %s", OP_names[*code]);    {
2001      break;    *limit = mid;
2002    
2003      PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2004        use_offsets, use_size_offsets);
2005    
2006      if (count == errnumber)
2007        {
2008        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2009        min = mid;
2010        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
2011      }      }
2012    
2013    code++;    else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
2014    fprintf(outfile, "\n");                           count == PCRE_ERROR_PARTIAL)
2015        {
2016        if (mid == min + 1)
2017          {
2018          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
2019          break;
2020          }
2021        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2022        max = mid;
2023        mid = (min + mid)/2;
2024        }
2025      else break;    /* Some other error */
2026    }    }
2027    
2028    extra->flags &= ~flag;
2029    return count;
2030  }  }
2031    
2032    
2033    
2034  /* Character string printing function. */  /*************************************************
2035    *         Case-independent strncmp() function    *
2036    *************************************************/
2037    
2038    /*
2039    Arguments:
2040      s         first string
2041      t         second string
2042      n         number of characters to compare
2043    
2044    Returns:    < 0, = 0, or > 0, according to the comparison
2045    */
2046    
2047  static void pchars(unsigned char *p, int length)  static int
2048    strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2049  {  {
2050  int c;  while (n--)
2051  while (length-- > 0)    {
2052    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);    int c = tolower(*s++) - tolower(*t++);
2053      else fprintf(outfile, "\\x%02x", c);    if (c) return c;
2054      }
2055    return 0;
2056  }  }
2057    
2058    
2059    
2060  /* Alternative malloc function, to test functionality and show the size of the  /*************************************************
2061  compiled re. */  *         Check newline indicator                *
2062    *************************************************/
2063    
2064    /* This is used both at compile and run-time to check for <xxx> escapes. Print
2065    a message and return 0 if there is no match.
2066    
2067  static void *new_malloc(size_t size)  Arguments:
2068      p           points after the leading '<'
2069      f           file for error message
2070    
2071    Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
2072    */
2073    
2074    static int
2075    check_newline(pcre_uint8 *p, FILE *f)
2076    {
2077    if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2078    if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2079    if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2080    if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2081    if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2082    if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2083    if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2084    fprintf(f, "Unknown newline type at: <%s\n", p);
2085    return 0;
2086    }
2087    
2088    
2089    
2090    /*************************************************
2091    *             Usage function                     *
2092    *************************************************/
2093    
2094    static void
2095    usage(void)
2096  {  {
2097  if (log_store)  printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
2098    fprintf(outfile, "Memory allocation request: %d (code space %d)\n",  printf("Input and output default to stdin and stdout.\n");
2099      (int)size, (int)size - offsetof(real_pcre, code[0]));  #ifdef SUPPORT_LIBREADLINE
2100  return malloc(size);  printf("If input is a terminal, readline() is used to read from it.\n");
2101    #else
2102    printf("This version of pcretest is not linked with readline().\n");
2103    #endif
2104    printf("\nOptions:\n");
2105    #ifdef SUPPORT_PCRE16
2106    printf("  -16      use the 16-bit library\n");
2107    #endif
2108    printf("  -b       show compiled code\n");
2109    printf("  -C       show PCRE compile-time options and exit\n");
2110    printf("  -C arg   show a specific compile-time option\n");
2111    printf("           and exit with its value. The arg can be:\n");
2112    printf("     linksize     internal link size [2, 3, 4]\n");
2113    printf("     pcre8        8 bit library support enabled [0, 1]\n");
2114    printf("     pcre16       16 bit library support enabled [0, 1]\n");
2115    printf("     utf          Unicode Transformation Format supported [0, 1]\n");
2116    printf("     ucp          Unicode Properties supported [0, 1]\n");
2117    printf("     jit          Just-in-time compiler supported [0, 1]\n");
2118    printf("     newline      Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2119    printf("  -d       debug: show compiled code and information (-b and -i)\n");
2120    #if !defined NODFA
2121    printf("  -dfa     force DFA matching for all subjects\n");
2122    #endif
2123    printf("  -help    show usage information\n");
2124    printf("  -i       show information about compiled patterns\n"
2125           "  -M       find MATCH_LIMIT minimum for each subject\n"
2126           "  -m       output memory used information\n"
2127           "  -o <n>   set size of offsets vector to <n>\n");
2128    #if !defined NOPOSIX
2129    printf("  -p       use POSIX interface\n");
2130    #endif
2131    printf("  -q       quiet: do not output PCRE version number at start\n");
2132    printf("  -S <n>   set stack size to <n> megabytes\n");
2133    printf("  -s       force each pattern to be studied at basic level\n"
2134           "  -s+      force each pattern to be studied, using JIT if available\n"
2135           "  -t       time compilation and execution\n");
2136    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
2137    printf("  -tm      time execution (matching) only\n");
2138    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
2139  }  }
2140    
2141    
2142    
2143    /*************************************************
2144    *                Main Program                    *
2145    *************************************************/
2146    
2147  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
2148  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
2149  options, followed by a set of test data, terminated by an empty line. */  options, followed by a set of test data, terminated by an empty line. */
# Line 289  options, followed by a set of test data, Line 2151  options, followed by a set of test data,
2151  int main(int argc, char **argv)  int main(int argc, char **argv)
2152  {  {
2153  FILE *infile = stdin;  FILE *infile = stdin;
2154    const char *version;
2155  int options = 0;  int options = 0;
2156  int study_options = 0;  int study_options = 0;
2157    int default_find_match_limit = FALSE;
2158  int op = 1;  int op = 1;
2159  int timeit = 0;  int timeit = 0;
2160    int timeitm = 0;
2161  int showinfo = 0;  int showinfo = 0;
2162  int showstore = 0;  int showstore = 0;
2163    int force_study = -1;
2164    int force_study_options = 0;
2165    int quiet = 0;
2166    int size_offsets = 45;
2167    int size_offsets_max;
2168    int *offsets = NULL;
2169    #if !defined NOPOSIX
2170  int posix = 0;  int posix = 0;
2171    #endif
2172  int debug = 0;  int debug = 0;
2173  int done = 0;  int done = 0;
2174  unsigned char buffer[30000];  int all_use_dfa = 0;
2175  unsigned char dbuffer[1024];  int yield = 0;
2176    int stack_size;
2177    
2178    pcre_jit_stack *jit_stack = NULL;
2179    
2180    /* These vectors store, end-to-end, a list of zero-terminated captured
2181    substring names, each list itself being terminated by an empty name. Assume
2182    that 1024 is plenty long enough for the few names we'll be testing. It is
2183    easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
2184    for the actual memory, to ensure alignment. */
2185    
2186    pcre_uint16 copynames[1024];
2187    pcre_uint16 getnames[1024];
2188    
2189    #ifdef SUPPORT_PCRE16
2190    pcre_uint16 *cn16ptr;
2191    pcre_uint16 *gn16ptr;
2192    #endif
2193    
2194    #ifdef SUPPORT_PCRE8
2195    pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2196    pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2197    pcre_uint8 *cn8ptr;
2198    pcre_uint8 *gn8ptr;
2199    #endif
2200    
2201    /* Get buffers from malloc() so that valgrind will check their misuse when
2202    debugging. They grow automatically when very long lines are read. The 16-bit
2203    buffer (buffer16) is obtained only if needed. */
2204    
2205    buffer = (pcre_uint8 *)malloc(buffer_size);
2206    dbuffer = (pcre_uint8 *)malloc(buffer_size);
2207    pbuffer = (pcre_uint8 *)malloc(buffer_size);
2208    
2209  /* Static so that new_malloc can use it. */  /* The outfile variable is static so that new_malloc can use it. */
2210    
2211  outfile = stdout;  outfile = stdout;
2212    
2213    /* The following  _setmode() stuff is some Windows magic that tells its runtime
2214    library to translate CRLF into a single LF character. At least, that's what
2215    I've been told: never having used Windows I take this all on trust. Originally
2216    it set 0x8000, but then I was advised that _O_BINARY was better. */
2217    
2218    #if defined(_WIN32) || defined(WIN32)
2219    _setmode( _fileno( stdout ), _O_BINARY );
2220    #endif
2221    
2222    /* Get the version number: both pcre_version() and pcre16_version() give the
2223    same answer. We just need to ensure that we call one that is available. */
2224    
2225    #ifdef SUPPORT_PCRE8
2226    version = pcre_version();
2227    #else
2228    version = pcre16_version();
2229    #endif
2230    
2231  /* Scan options */  /* Scan options */
2232    
2233  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
2234    {    {
2235    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    pcre_uint8 *endptr;
2236      showstore = 1;  
2237    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    if (strcmp(argv[op], "-m") == 0) showstore = 1;
2238      else if (strcmp(argv[op], "-s") == 0) force_study = 0;
2239      else if (strcmp(argv[op], "-s+") == 0)
2240        {
2241        force_study = 1;
2242        force_study_options = PCRE_STUDY_JIT_COMPILE
2243                            | PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE
2244                            | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE;
2245        }
2246      else if (strcmp(argv[op], "-16") == 0)
2247        {
2248    #ifdef SUPPORT_PCRE16
2249        use_pcre16 = 1;
2250    #else
2251        printf("** This version of PCRE was built without 16-bit support\n");
2252        exit(1);
2253    #endif
2254        }
2255      else if (strcmp(argv[op], "-q") == 0) quiet = 1;
2256      else if (strcmp(argv[op], "-b") == 0) debug = 1;
2257    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
2258    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
2259      else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
2260    #if !defined NODFA
2261      else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
2262    #endif
2263      else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
2264          ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2265            *endptr == 0))
2266        {
2267        op++;
2268        argc--;
2269        }
2270      else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
2271        {
2272        int both = argv[op][2] == 0;
2273        int temp;
2274        if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2275                         *endptr == 0))
2276          {
2277          timeitm = temp;
2278          op++;
2279          argc--;
2280          }
2281        else timeitm = LOOPREPEAT;
2282        if (both) timeit = timeitm;
2283        }
2284      else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
2285          ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2286            *endptr == 0))
2287        {
2288    #if defined(_WIN32) || defined(WIN32) || defined(__minix)
2289        printf("PCRE: -S not supported on this OS\n");
2290        exit(1);
2291    #else
2292        int rc;
2293        struct rlimit rlim;
2294        getrlimit(RLIMIT_STACK, &rlim);
2295        rlim.rlim_cur = stack_size * 1024 * 1024;
2296        rc = setrlimit(RLIMIT_STACK, &rlim);
2297        if (rc != 0)
2298          {
2299        printf("PCRE: setrlimit() failed with error %d\n", rc);
2300        exit(1);
2301          }
2302        op++;
2303        argc--;
2304    #endif
2305        }
2306    #if !defined NOPOSIX
2307    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
2308    #endif
2309      else if (strcmp(argv[op], "-C") == 0)
2310        {
2311        int rc;
2312        unsigned long int lrc;
2313    
2314        if (argc > 2)
2315          {
2316          if (strcmp(argv[op + 1], "linksize") == 0)
2317            {
2318            (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2319            printf("%d\n", rc);
2320            yield = rc;
2321            goto EXIT;
2322            }
2323          if (strcmp(argv[op + 1], "pcre8") == 0)
2324            {
2325    #ifdef SUPPORT_PCRE8
2326            printf("1\n");
2327            yield = 1;
2328    #else
2329            printf("0\n");
2330            yield = 0;
2331    #endif
2332            goto EXIT;
2333            }
2334          if (strcmp(argv[op + 1], "pcre16") == 0)
2335            {
2336    #ifdef SUPPORT_PCRE16
2337            printf("1\n");
2338            yield = 1;
2339    #else
2340            printf("0\n");
2341            yield = 0;
2342    #endif
2343            goto EXIT;
2344            }
2345          if (strcmp(argv[op + 1], "utf") == 0)
2346            {
2347    #ifdef SUPPORT_PCRE8
2348            (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2349            printf("%d\n", rc);
2350            yield = rc;
2351    #else
2352            (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2353            printf("%d\n", rc);
2354            yield = rc;
2355    #endif
2356            goto EXIT;
2357            }
2358          if (strcmp(argv[op + 1], "ucp") == 0)
2359            {
2360            (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2361            printf("%d\n", rc);
2362            yield = rc;
2363            goto EXIT;
2364            }
2365          if (strcmp(argv[op + 1], "jit") == 0)
2366            {
2367            (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2368            printf("%d\n", rc);
2369            yield = rc;
2370            goto EXIT;
2371            }
2372          if (strcmp(argv[op + 1], "newline") == 0)
2373            {
2374            (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2375            /* Note that these values are always the ASCII values, even
2376            in EBCDIC environments. CR is 13 and NL is 10. */
2377            printf("%s\n", (rc == 13)? "CR" :
2378              (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2379              (rc == -2)? "ANYCRLF" :
2380              (rc == -1)? "ANY" : "???");
2381            goto EXIT;
2382            }
2383          printf("Unknown -C option: %s\n", argv[op + 1]);
2384          goto EXIT;
2385          }
2386    
2387        printf("PCRE version %s\n", version);
2388        printf("Compiled with\n");
2389    
2390    /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2391    are set, either both UTFs are supported or both are not supported. */
2392    
2393    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2394        printf("  8-bit and 16-bit support\n");
2395        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2396        if (rc)
2397          printf("  UTF-8 and UTF-16 support\n");
2398        else
2399          printf("  No UTF-8 or UTF-16 support\n");
2400    #elif defined SUPPORT_PCRE8
2401        printf("  8-bit support only\n");
2402        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2403        printf("  %sUTF-8 support\n", rc? "" : "No ");
2404    #else
2405        printf("  16-bit support only\n");
2406        (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2407        printf("  %sUTF-16 support\n", rc? "" : "No ");
2408    #endif
2409    
2410        (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2411        printf("  %sUnicode properties support\n", rc? "" : "No ");
2412        (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2413        if (rc)
2414          {
2415          const char *arch;
2416          (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
2417          printf("  Just-in-time compiler support: %s\n", arch);
2418          }
2419        else
2420          printf("  No just-in-time compiler support\n");
2421        (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2422        /* Note that these values are always the ASCII values, even
2423        in EBCDIC environments. CR is 13 and NL is 10. */
2424        printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
2425          (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2426          (rc == -2)? "ANYCRLF" :
2427          (rc == -1)? "ANY" : "???");
2428        (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
2429        printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
2430                                         "all Unicode newlines");
2431        (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2432        printf("  Internal link size = %d\n", rc);
2433        (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
2434        printf("  POSIX malloc threshold = %d\n", rc);
2435        (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
2436        printf("  Default match limit = %ld\n", lrc);
2437        (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
2438        printf("  Default recursion depth limit = %ld\n", lrc);
2439        (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
2440        printf("  Match recursion uses %s", rc? "stack" : "heap");
2441        if (showstore)
2442          {
2443          PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
2444          printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
2445          }
2446        printf("\n");
2447        goto EXIT;
2448        }
2449      else if (strcmp(argv[op], "-help") == 0 ||
2450               strcmp(argv[op], "--help") == 0)
2451        {
2452        usage();
2453        goto EXIT;
2454        }
2455    else    else
2456      {      {
2457      printf("*** Unknown option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
2458      printf("Usage: pcretest [-d] [-i] [-p] [-s] [-t] [<input> [<output>]]\n");      usage();
2459      printf("  -d   debug: show compiled code; implies -i\n"      yield = 1;
2460             "  -i   show information about compiled pattern\n"      goto EXIT;
            "  -p   use POSIX interface\n"  
            "  -s   output store information\n"  
            "  -t   time compilation and execution\n");  
     return 1;  
2461      }      }
2462    op++;    op++;
2463    argc--;    argc--;
2464    }    }
2465    
2466    /* Get the store for the offsets vector, and remember what it was */
2467    
2468    size_offsets_max = size_offsets;
2469    offsets = (int *)malloc(size_offsets_max * sizeof(int));
2470    if (offsets == NULL)
2471      {
2472      printf("** Failed to get %d bytes of memory for offsets vector\n",
2473        (int)(size_offsets_max * sizeof(int)));
2474      yield = 1;
2475      goto EXIT;
2476      }
2477    
2478  /* Sort out the input and output files */  /* Sort out the input and output files */
2479    
2480  if (argc > 1)  if (argc > 1)
2481    {    {
2482    infile = fopen(argv[op], "r");    infile = fopen(argv[op], INPUT_MODE);
2483    if (infile == NULL)    if (infile == NULL)
2484      {      {
2485      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
2486      return 1;      yield = 1;
2487        goto EXIT;
2488      }      }
2489    }    }
2490    
2491  if (argc > 2)  if (argc > 2)
2492    {    {
2493    outfile = fopen(argv[op+1], "w");    outfile = fopen(argv[op+1], OUTPUT_MODE);
2494    if (outfile == NULL)    if (outfile == NULL)
2495      {      {
2496      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
2497      return 1;      yield = 1;
2498        goto EXIT;
2499      }      }
2500    }    }
2501    
2502  /* Set alternative malloc function */  /* Set alternative malloc function */
2503    
2504    #ifdef SUPPORT_PCRE8
2505  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
2506    pcre_free = new_free;
2507    pcre_stack_malloc = stack_malloc;
2508    pcre_stack_free = stack_free;
2509    #endif
2510    
2511    #ifdef SUPPORT_PCRE16
2512    pcre16_malloc = new_malloc;
2513    pcre16_free = new_free;
2514    pcre16_stack_malloc = stack_malloc;
2515    pcre16_stack_free = stack_free;
2516    #endif
2517    
2518  /* Heading line, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
2519    
2520  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
2521    
2522  /* Main loop */  /* Main loop */
2523    
# Line 366  while (!done) Line 2525  while (!done)
2525    {    {
2526    pcre *re = NULL;    pcre *re = NULL;
2527    pcre_extra *extra = NULL;    pcre_extra *extra = NULL;
2528    
2529    #if !defined NOPOSIX  /* There are still compilers that require no indent */
2530    regex_t preg;    regex_t preg;
2531      int do_posix = 0;
2532    #endif
2533    
2534    const char *error;    const char *error;
2535    unsigned char *p, *pp, *ppp;    pcre_uint8 *markptr;
2536    unsigned const char *tables = NULL;    pcre_uint8 *p, *pp, *ppp;
2537      pcre_uint8 *to_file = NULL;
2538      const pcre_uint8 *tables = NULL;
2539      unsigned long int get_options;
2540      unsigned long int true_size, true_study_size = 0;
2541      size_t size, regex_gotten_store;
2542      int do_allcaps = 0;
2543      int do_mark = 0;
2544    int do_study = 0;    int do_study = 0;
2545      int no_force_study = 0;
2546    int do_debug = debug;    int do_debug = debug;
2547      int do_G = 0;
2548      int do_g = 0;
2549    int do_showinfo = showinfo;    int do_showinfo = showinfo;
2550    int do_posix = 0;    int do_showrest = 0;
2551    int erroroffset, len, delimiter;    int do_showcaprest = 0;
2552      int do_flip = 0;
2553      int erroroffset, len, delimiter, poffset;
2554    
2555    if (infile == stdin) printf("  re> ");    use_utf = 0;
2556    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;    debug_lengths = 1;
2557    
2558      if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
2559    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2560      fflush(outfile);
2561    
2562    p = buffer;    p = buffer;
2563    while (isspace(*p)) p++;    while (isspace(*p)) p++;
2564    if (*p == 0) continue;    if (*p == 0) continue;
2565    
2566    /* Get the delimiter and seek the end of the pattern; if is isn't    /* See if the pattern is to be loaded pre-compiled from a file. */
2567    complete, read more. */  
2568      if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2569        {
2570        pcre_uint32 magic;
2571        pcre_uint8 sbuf[8];
2572        FILE *f;
2573    
2574        p++;
2575        if (*p == '!')
2576          {
2577          do_debug = TRUE;
2578          do_showinfo = TRUE;
2579          p++;
2580          }
2581    
2582        pp = p + (int)strlen((char *)p);
2583        while (isspace(pp[-1])) pp--;
2584        *pp = 0;
2585    
2586        f = fopen((char *)p, "rb");
2587        if (f == NULL)
2588          {
2589          fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
2590          continue;
2591          }
2592    
2593        first_gotten_store = 0;
2594        if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2595    
2596        true_size =
2597          (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
2598        true_study_size =
2599          (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2600    
2601        re = (pcre *)new_malloc(true_size);
2602        regex_gotten_store = first_gotten_store;
2603    
2604        if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2605    
2606        magic = ((REAL_PCRE *)re)->magic_number;
2607        if (magic != MAGIC_NUMBER)
2608          {
2609          if (swap_uint32(magic) == MAGIC_NUMBER)
2610            {
2611            do_flip = 1;
2612            }
2613          else
2614            {
2615            fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
2616            fclose(f);
2617            continue;
2618            }
2619          }
2620    
2621        /* We hide the byte-invert info for little and big endian tests. */
2622        fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2623          do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
2624    
2625        /* Now see if there is any following study data. */
2626    
2627        if (true_study_size != 0)
2628          {
2629          pcre_study_data *psd;
2630    
2631          extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
2632          extra->flags = PCRE_EXTRA_STUDY_DATA;
2633    
2634          psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
2635          extra->study_data = psd;
2636    
2637          if (fread(psd, 1, true_study_size, f) != true_study_size)
2638            {
2639            FAIL_READ:
2640            fprintf(outfile, "Failed to read data from %s\n", p);
2641            if (extra != NULL)
2642              {
2643              PCRE_FREE_STUDY(extra);
2644              }
2645            if (re != NULL) new_free(re);
2646            fclose(f);
2647            continue;
2648            }
2649          fprintf(outfile, "Study data loaded from %s\n", p);
2650          do_study = 1;     /* To get the data output if requested */
2651          }
2652        else fprintf(outfile, "No study data\n");
2653    
2654        /* Flip the necessary bytes. */
2655        if (do_flip)
2656          {
2657          int rc;
2658          PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
2659          if (rc == PCRE_ERROR_BADMODE)
2660            {
2661            /* Simulate the result of the function call below. */
2662            fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2663              use_pcre16? "16" : "", PCRE_INFO_OPTIONS);
2664            fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
2665              "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
2666            continue;
2667            }
2668          }
2669    
2670        /* Need to know if UTF-8 for printing data strings. */
2671    
2672        if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0) continue;
2673        use_utf = (get_options & PCRE_UTF8) != 0;
2674    
2675        fclose(f);
2676        goto SHOW_INFO;
2677        }
2678    
2679      /* In-line pattern (the usual case). Get the delimiter and seek the end of
2680      the pattern; if it isn't complete, read more. */
2681    
2682    delimiter = *p++;    delimiter = *p++;
2683    
2684    if (isalnum(delimiter) || delimiter == '\\')    if (isalnum(delimiter) || delimiter == '\\')
2685      {      {
2686      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");      fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
2687      goto SKIP_DATA;      goto SKIP_DATA;
2688      }      }
2689    
2690    pp = p;    pp = p;
2691      poffset = (int)(p - buffer);
2692    
2693    for(;;)    for(;;)
2694      {      {
# Line 406  while (!done) Line 2699  while (!done)
2699        pp++;        pp++;
2700        }        }
2701      if (*pp != 0) break;      if (*pp != 0) break;
2702        if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
     len = sizeof(buffer) - (pp - buffer);  
     if (len < 256)  
       {  
       fprintf(outfile, "** Expression too long - missing delimiter?\n");  
       goto SKIP_DATA;  
       }  
   
     if (infile == stdin) printf("    > ");  
     if (fgets((char *)pp, len, infile) == NULL)  
2703        {        {
2704        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
2705        done = 1;        done = 1;
# Line 424  while (!done) Line 2708  while (!done)
2708      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
2709      }      }
2710    
2711      /* The buffer may have moved while being extended; reset the start of data
2712      pointer to the correct relative point in the buffer. */
2713    
2714      p = buffer + poffset;
2715    
2716    /* If the first character after the delimiter is backslash, make    /* If the first character after the delimiter is backslash, make
2717    the pattern end with backslash. This is purely to provide a way    the pattern end with backslash. This is purely to provide a way
2718    of testing for the error message when a pattern ends with backslash. */    of testing for the error message when a pattern ends with backslash. */
2719    
2720    if (pp[1] == '\\') *pp++ = '\\';    if (pp[1] == '\\') *pp++ = '\\';
2721    
2722    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter, and save a copy of the pattern
2723      for callouts. */
2724    
2725    *pp++ = 0;    *pp++ = 0;
2726      strcpy((char *)pbuffer, (char *)p);
2727    
2728    /* Look for options after final delimiter */    /* Look for options after final delimiter */
2729    
# Line 444  while (!done) Line 2735  while (!done)
2735      {      {
2736      switch (*pp++)      switch (*pp++)
2737        {        {
2738          case 'f': options |= PCRE_FIRSTLINE; break;
2739          case 'g': do_g = 1; break;
2740        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
2741        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
2742        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
2743        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
2744    
2745          case '+':
2746          if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2747          break;
2748    
2749          case '=': do_allcaps = 1; break;
2750        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
2751          case 'B': do_debug = 1; break;
2752          case 'C': options |= PCRE_AUTO_CALLOUT; break;
2753        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
2754        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
2755          case 'F': do_flip = 1; break;
2756          case 'G': do_G = 1; break;
2757        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
2758          case 'J': options |= PCRE_DUPNAMES; break;
2759          case 'K': do_mark = 1; break;
2760        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
2761          case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
2762    
2763    #if !defined NOPOSIX
2764        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
2765        case 'S': do_study = 1; break;  #endif
2766    
2767          case 'S':
2768          if (do_study == 0)
2769            {
2770            do_study = 1;
2771            if (*pp == '+')
2772              {
2773              study_options |= PCRE_STUDY_JIT_COMPILE
2774                            | PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE
2775                            | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE;
2776              pp++;
2777              }
2778            }
2779          else
2780            {
2781            do_study = 0;
2782            no_force_study = 1;
2783            }
2784          break;
2785    
2786        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
2787          case 'W': options |= PCRE_UCP; break;
2788        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
2789          case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2790          case 'Z': debug_lengths = 0; break;
2791          case '8': options |= PCRE_UTF8; use_utf = 1; break;
2792          case '?': options |= PCRE_NO_UTF8_CHECK; break;
2793    
2794          case 'T':
2795          switch (*pp++)
2796            {
2797            case '0': tables = tables0; break;
2798            case '1': tables = tables1; break;
2799    
2800            case '\r':
2801            case '\n':
2802            case ' ':
2803            case 0:
2804            fprintf(outfile, "** Missing table number after /T\n");
2805            goto SKIP_DATA;
2806    
2807            default:
2808            fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2809            goto SKIP_DATA;
2810            }
2811          break;
2812    
2813        case 'L':        case 'L':
2814        ppp = pp;        ppp = pp;
2815        while (*ppp != '\n' && *ppp != ' ') ppp++;        /* The '\r' test here is so that it works on Windows. */
2816          /* The '0' test is just in case this is an unterminated line. */
2817          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
2818        *ppp = 0;        *ppp = 0;
2819        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
2820          {          {
2821          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
2822          goto SKIP_DATA;          goto SKIP_DATA;
2823          }          }
2824        tables = pcre_maketables();        locale_set = 1;
2825          tables = PCRE_MAKETABLES;
2826        pp = ppp;        pp = ppp;
2827        break;        break;
2828    
2829        case '\n': case ' ': break;        case '>':
2830          to_file = pp;
2831          while (*pp != 0) pp++;
2832          while (isspace(pp[-1])) pp--;
2833          *pp = 0;
2834          break;
2835    
2836          case '<':
2837            {
2838            if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2839              {
2840              options |= PCRE_JAVASCRIPT_COMPAT;
2841              pp += 3;
2842              }
2843            else
2844              {
2845              int x = check_newline(pp, outfile);
2846              if (x == 0) goto SKIP_DATA;
2847              options |= x;
2848              while (*pp++ != '>');
2849              }
2850            }
2851          break;
2852    
2853          case '\r':                      /* So that it works in Windows */
2854          case '\n':
2855          case ' ':
2856          break;
2857    
2858        default:        default:
2859        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
2860        goto SKIP_DATA;        goto SKIP_DATA;
# Line 481  while (!done) Line 2863  while (!done)
2863    
2864    /* Handle compiling via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
2865    timing, showing, or debugging options, nor the ability to pass over    timing, showing, or debugging options, nor the ability to pass over
2866    local character tables. */    local character tables. Neither does it have 16-bit support. */
2867    
2868    #if !defined NOPOSIX
2869    if (posix || do_posix)    if (posix || do_posix)
2870      {      {
2871      int rc;      int rc;
2872      int cflags = 0;      int cflags = 0;
2873    
2874      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
2875      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
2876        if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
2877        if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
2878        if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
2879        if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2880        if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2881    
2882        first_gotten_store = 0;
2883      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
2884    
2885      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 496  while (!done) Line 2887  while (!done)
2887    
2888      if (rc != 0)      if (rc != 0)
2889        {        {
2890        (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));        (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2891        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
2892        goto SKIP_DATA;        goto SKIP_DATA;
2893        }        }
# Line 505  while (!done) Line 2896  while (!done)
2896    /* Handle compiling via the native interface */    /* Handle compiling via the native interface */
2897    
2898    else    else
2899    #endif  /* !defined NOPOSIX */
2900    
2901      {      {
2902      if (timeit)      /* In 16-bit mode, convert the input. */
2903    
2904    #ifdef SUPPORT_PCRE16
2905        if (use_pcre16)
2906          {
2907          switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
2908            {
2909            case -1:
2910            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2911              "converted to UTF-16\n");
2912            goto SKIP_DATA;
2913    
2914            case -2:
2915            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
2916              "cannot be converted to UTF-16\n");
2917            goto SKIP_DATA;
2918    
2919            case -3: /* "Impossible error" when to16 is called arg1 FALSE */
2920            fprintf(outfile, "**Failed: character value greater than 0xffff "
2921              "cannot be converted to 16-bit in non-UTF mode\n");
2922            goto SKIP_DATA;
2923    
2924            default:
2925            break;
2926            }
2927          p = (pcre_uint8 *)buffer16;
2928          }
2929    #endif
2930    
2931        /* Compile many times when timing */
2932    
2933        if (timeit > 0)
2934        {        {
2935        register int i;        register int i;
2936        clock_t time_taken;        clock_t time_taken;
2937        clock_t start_time = clock();        clock_t start_time = clock();
2938        for (i = 0; i < LOOPREPEAT; i++)        for (i = 0; i < timeit; i++)
2939          {          {
2940          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2941          if (re != NULL) free(re);          if (re != NULL) free(re);
2942          }          }
2943        time_taken = clock() - start_time;        time_taken = clock() - start_time;
2944        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.4f milliseconds\n",
2945          ((double)time_taken * 1000.0) /          (((double)time_taken * 1000.0) / (double)timeit) /
2946          ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));            (double)CLOCKS_PER_SEC);
2947        }        }
2948    
2949      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      first_gotten_store = 0;
2950        PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2951    
2952      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
2953      if non-interactive. */      if non-interactive. */
# Line 535  while (!done) Line 2960  while (!done)
2960          {          {
2961          for (;;)          for (;;)
2962            {            {
2963            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (extend_inputline(infile, buffer, NULL) == NULL)
2964              {              {
2965              done = 1;              done = 1;
2966              goto CONTINUE;              goto CONTINUE;
# Line 549  while (!done) Line 2974  while (!done)
2974        goto CONTINUE;        goto CONTINUE;
2975        }        }
2976    
2977      /* Compilation succeeded; print data if required */      /* Compilation succeeded. It is now possible to set the UTF-8 option from
2978        within the regex; check for this so that we know how to process the data
2979        lines. */
2980    
2981      if (do_showinfo)      if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
2982        {        goto SKIP_DATA;
2983        int first_char, count;      if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
2984    
2985        if (do_debug) print_internals(re, outfile);      /* Extract the size for possible writing before possibly flipping it,
2986        and remember the store that was got. */
2987    
2988        count = pcre_info(re, &options, &first_char);      true_size = ((REAL_PCRE *)re)->size;
2989        if (count < 0) fprintf(outfile,      regex_gotten_store = first_gotten_store;
2990          "Error %d while reading info\n", count);  
2991        else      /* Output code size information if requested */
2992    
2993        if (log_store)
2994          fprintf(outfile, "Memory allocation (code space): %d\n",
2995            (int)(first_gotten_store -
2996                  sizeof(REAL_PCRE) -
2997                  ((REAL_PCRE *)re)->name_count * ((REAL_PCRE *)re)->name_entry_size));
2998    
2999        /* If -s or /S was present, study the regex to generate additional info to
3000        help with the matching, unless the pattern has the SS option, which
3001        suppresses the effect of /S (used for a few test patterns where studying is
3002        never sensible). */
3003    
3004        if (do_study || (force_study >= 0 && !no_force_study))
3005          {
3006          if (timeit > 0)
3007          {          {
3008          fprintf(outfile, "Identifying subpattern count = %d\n", count);          register int i;
3009          if (options == 0) fprintf(outfile, "No options\n");          clock_t time_taken;
3010            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s\n",          clock_t start_time = clock();
3011              ((options & PCRE_ANCHORED) != 0)? " anchored" : "",          for (i = 0; i < timeit; i++)
             ((options & PCRE_CASELESS) != 0)? " caseless" : "",  
             ((options & PCRE_EXTENDED) != 0)? " extended" : "",  
             ((options & PCRE_MULTILINE) != 0)? " multiline" : "",  
             ((options & PCRE_DOTALL) != 0)? " dotall" : "",  
             ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",  
             ((options & PCRE_EXTRA) != 0)? " extra" : "",  
             ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");  
         if (first_char == -1)  
3012            {            {
3013            fprintf(outfile, "First char at start or follows \\n\n");            PCRE_STUDY(extra, re, study_options | force_study_options, &error);
3014            }            }
3015          else if (first_char < 0)          time_taken = clock() - start_time;
3016            if (extra != NULL)
3017            {            {
3018            fprintf(outfile, "No first char\n");            PCRE_FREE_STUDY(extra);
3019            }            }
3020          else          fprintf(outfile, "  Study time %.4f milliseconds\n",
3021              (((double)time_taken * 1000.0) / (double)timeit) /
3022                (double)CLOCKS_PER_SEC);
3023            }
3024          PCRE_STUDY(extra, re, study_options | force_study_options, &error);
3025          if (error != NULL)
3026            fprintf(outfile, "Failed to study: %s\n", error);
3027          else if (extra != NULL)
3028            {
3029            true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3030            if (log_store)
3031            {            {
3032            if (isprint(first_char))            size_t jitsize;
3033              fprintf(outfile, "First char = \'%c\'\n", first_char);            if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3034            else                jitsize != 0)
3035              fprintf(outfile, "First char = %d\n", first_char);              fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3036            }            }
3037          }          }
3038        }        }
3039    
3040      /* If /S was present, study the regexp to generate additional info to      /* If /K was present, we set up for handling MARK data. */
     help with the matching. */  
3041    
3042      if (do_study)      if (do_mark)
3043        {        {
3044        if (timeit)        if (extra == NULL)
3045          {          {
3046          register int i;          extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3047          clock_t time_taken;          extra->flags = 0;
         clock_t start_time = clock();  
         for (i = 0; i < LOOPREPEAT; i++)  
           extra = pcre_study(re, study_options, &error);  
         time_taken = clock() - start_time;  
         if (extra != NULL) free(extra);  
         fprintf(outfile, "  Study time %.3f milliseconds\n",  
           ((double)time_taken * 1000.0)/  
           ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));  
3048          }          }
3049          extra->mark = &markptr;
3050          extra->flags |= PCRE_EXTRA_MARK;
3051          }
3052    
3053        extra = pcre_study(re, study_options, &error);      /* Extract and display information from the compiled data if required. */
       if (error != NULL)  
         fprintf(outfile, "Failed to study: %s\n", error);  
       else if (extra == NULL)  
         fprintf(outfile, "Study returned NULL\n");  
3054    
3055        /* This looks at internal information. A bit kludgy to do it this      SHOW_INFO:
       way, but it is useful for testing. */  
3056    
3057        else if (do_showinfo)      if (do_debug)
3058          {
3059          fprintf(outfile, "------------------------------------------------------------------\n");
3060          PCRE_PRINTINT(re, outfile, debug_lengths);
3061          }
3062    
3063        /* We already have the options in get_options (see above) */
3064    
3065        if (do_showinfo)
3066          {
3067          unsigned long int all_options;
3068          int count, backrefmax, first_char, need_char, okpartial, jchanged,
3069            hascrorlf;
3070          int nameentrysize, namecount;
3071          const pcre_uint8 *nametable;
3072    
3073          if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3074              new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3075              new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3076              new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +
3077              new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +
3078              new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3079              new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3080              new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3081              new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3082              new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3083              new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf)
3084              != 0)
3085            goto SKIP_DATA;
3086    
3087          if (size != regex_gotten_store) fprintf(outfile,
3088            "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
3089            (int)size, (int)regex_gotten_store);
3090    
3091          fprintf(outfile, "Capturing subpattern count = %d\n", count);
3092          if (backrefmax > 0)
3093            fprintf(outfile, "Max back reference = %d\n", backrefmax);
3094    
3095          if (namecount > 0)
3096            {
3097            fprintf(outfile, "Named capturing subpatterns:\n");
3098            while (namecount-- > 0)
3099              {
3100    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3101              int imm2_size = use_pcre16 ? 1 : 2;
3102    #else
3103              int imm2_size = IMM2_SIZE;
3104    #endif
3105              int length = (int)STRLEN(nametable + imm2_size);
3106              fprintf(outfile, "  ");
3107              PCHARSV(nametable, imm2_size, length, outfile);
3108              while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3109    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3110              fprintf(outfile, "%3d\n", use_pcre16?
3111                 (int)(((PCRE_SPTR16)nametable)[0])
3112                :((int)nametable[0] << 8) | (int)nametable[1]);
3113              nametable += nameentrysize * (use_pcre16 ? 2 : 1);
3114    #else
3115              fprintf(outfile, "%3d\n", GET2(nametable, 0));
3116    #ifdef SUPPORT_PCRE8
3117              nametable += nameentrysize;
3118    #else
3119              nametable += nameentrysize * 2;
3120    #endif
3121    #endif
3122              }
3123            }
3124    
3125          if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3126          if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3127    
3128          all_options = ((REAL_PCRE *)re)->options;
3129          if (do_flip) all_options = swap_uint32(all_options);
3130    
3131          if (get_options == 0) fprintf(outfile, "No options\n");
3132            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
3133              ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
3134              ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
3135              ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
3136              ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
3137              ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
3138              ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
3139              ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
3140              ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
3141              ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
3142              ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3143              ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3144              ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3145              ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3146              ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3147              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3148              ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3149              ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3150    
3151          if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
3152    
3153          switch (get_options & PCRE_NEWLINE_BITS)
3154            {
3155            case PCRE_NEWLINE_CR:
3156            fprintf(outfile, "Forced newline sequence: CR\n");
3157            break;
3158    
3159            case PCRE_NEWLINE_LF:
3160            fprintf(outfile, "Forced newline sequence: LF\n");
3161            break;
3162    
3163            case PCRE_NEWLINE_CRLF:
3164            fprintf(outfile, "Forced newline sequence: CRLF\n");
3165            break;
3166    
3167            case PCRE_NEWLINE_ANYCRLF:
3168            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
3169            break;
3170    
3171            case PCRE_NEWLINE_ANY:
3172            fprintf(outfile, "Forced newline sequence: ANY\n");
3173            break;
3174    
3175            default:
3176            break;
3177            }
3178    
3179          if (first_char == -1)
3180            {
3181            fprintf(outfile, "First char at start or follows newline\n");
3182            }
3183          else if (first_char < 0)
3184            {
3185            fprintf(outfile, "No first char\n");
3186            }
3187          else
3188            {
3189            const char *caseless =
3190              ((((REAL_PCRE *)re)->flags & PCRE_FCH_CASELESS) == 0)?
3191              "" : " (caseless)";
3192    
3193            if (PRINTOK(first_char))
3194              fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3195            else
3196              {
3197              fprintf(outfile, "First char = ");
3198              pchar(first_char, outfile);
3199              fprintf(outfile, "%s\n", caseless);
3200              }
3201            }
3202    
3203          if (need_char < 0)
3204            {
3205            fprintf(outfile, "No need char\n");
3206            }
3207          else
3208            {
3209            const char *caseless =
3210              ((((REAL_PCRE *)re)->flags & PCRE_RCH_CASELESS) == 0)?
3211              "" : " (caseless)";
3212    
3213            if (PRINTOK(need_char))
3214              fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3215            else
3216              {
3217              fprintf(outfile, "Need char = ");
3218              pchar(need_char, outfile);
3219              fprintf(outfile, "%s\n", caseless);
3220              }
3221            }
3222    
3223          /* Don't output study size; at present it is in any case a fixed
3224          value, but it varies, depending on the computer architecture, and
3225          so messes up the test suite. (And with the /F option, it might be
3226          flipped.) If study was forced by an external -s, don't show this
3227          information unless -i or -d was also present. This means that, except
3228          when auto-callouts are involved, the output from runs with and without
3229          -s should be identical. */
3230    
3231          if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3232          {          {
3233          real_pcre_extra *xx = (real_pcre_extra *)extra;          if (extra == NULL)
3234          if ((xx->options & PCRE_STUDY_MAPPED) == 0)            fprintf(outfile, "Study returned NULL\n");
           fprintf(outfile, "No starting character set\n");  
3235          else          else
3236            {            {
3237            int i;            pcre_uint8 *start_bits = NULL;
3238            int c = 24;            int minlength;
3239            fprintf(outfile, "Starting character set: ");  
3240            for (i = 0; i < 256; i++)            if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3241                fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3242    
3243              if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
3244              {              {
3245              if ((xx->start_bits[i/8] & (1<<(i%8))) != 0)              if (start_bits == NULL)
3246                  fprintf(outfile, "No set of starting bytes\n");
3247                else
3248                {                {
3249                if (c > 75)                int i;
3250                  {                int c = 24;
3251                  fprintf(outfile, "\n  ");                fprintf(outfile, "Starting byte set: ");
3252                  c = 2;                for (i = 0; i < 256; i++)
                 }  
               if (isprint(i) && i != ' ')  
                 {  
                 fprintf(outfile, "%c ", i);  
                 c += 2;  
                 }  
               else  
3253                  {                  {
3254                  fprintf(outfile, "\\x%02x ", i);                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
3255                  c += 5;                    {
3256                      if (c > 75)
3257                        {
3258                        fprintf(outfile, "\n  ");
3259                        c = 2;
3260                        }
3261                      if (PRINTOK(i) && i != ' ')
3262                        {
3263                        fprintf(outfile, "%c ", i);
3264                        c += 2;
3265                        }
3266                      else
3267                        {
3268                        fprintf(outfile, "\\x%02x ", i);
3269                        c += 5;
3270                        }
3271                      }
3272                  }                  }
3273                  fprintf(outfile, "\n");
3274                }                }
3275              }              }
3276            fprintf(outfile, "\n");            }
3277    
3278            /* Show this only if the JIT was set by /S, not by -s. */
3279    
3280            if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3281              {
3282              int jit;
3283              if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3284                {
3285                if (jit)
3286                  fprintf(outfile, "JIT study was successful\n");
3287                else
3288    #ifdef SUPPORT_JIT
3289                  fprintf(outfile, "JIT study was not successful\n");
3290    #else
3291                  fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3292    #endif
3293                }
3294            }            }
3295          }          }
3296        }        }
3297      }  
3298        /* If the '>' option was present, we write out the regex to a file, and
3299        that is all. The first 8 bytes of the file are the regex length and then
3300        the study length, in big-endian order. */
3301    
3302        if (to_file != NULL)
3303          {
3304          FILE *f = fopen((char *)to_file, "wb");
3305          if (f == NULL)
3306            {
3307            fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
3308            }
3309          else
3310            {
3311            pcre_uint8 sbuf[8];
3312    
3313            if (do_flip) regexflip(re, extra);
3314            sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
3315            sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
3316            sbuf[2] = (pcre_uint8)((true_size >>  8) & 255);
3317            sbuf[3] = (pcre_uint8)((true_size) & 255);
3318            sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
3319            sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
3320            sbuf[6] = (pcre_uint8)((true_study_size >>  8) & 255);
3321            sbuf[7] = (pcre_uint8)((true_study_size) & 255);
3322    
3323            if (fwrite(sbuf, 1, 8, f) < 8 ||
3324                fwrite(re, 1, true_size, f) < true_size)
3325              {
3326              fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
3327              }
3328            else
3329              {
3330              fprintf(outfile, "Compiled pattern written to %s\n", to_file);
3331    
3332              /* If there is study data, write it. */
3333    
3334              if (extra != NULL)
3335                {
3336                if (fwrite(extra->study_data, 1, true_study_size, f) <
3337                    true_study_size)
3338                  {
3339                  fprintf(outfile, "Write error on %s: %s\n", to_file,
3340                    strerror(errno));
3341                  }
3342                else fprintf(outfile, "Study data written to %s\n", to_file);
3343                }
3344              }
3345            fclose(f);
3346            }
3347    
3348          new_free(re);
3349          if (extra != NULL)
3350            {
3351            PCRE_FREE_STUDY(extra);
3352            }
3353          if (locale_set)
3354            {
3355            new_free((void *)tables);
3356            setlocale(LC_CTYPE, "C");
3357            locale_set = 0;
3358            }
3359          continue;  /* With next regex */
3360          }
3361        }        /* End of non-POSIX compile */
3362    
3363    /* Read data lines and test them */    /* Read data lines and test them */
3364    
3365    for (;;)    for (;;)
3366      {      {
3367      unsigned char *q;      pcre_uint8 *q;
3368        pcre_uint8 *bptr;
3369        int *use_offsets = offsets;
3370        int use_size_offsets = size_offsets;
3371        int callout_data = 0;
3372        int callout_data_set = 0;
3373      int count, c;      int count, c;
3374      int copystrings = 0;      int copystrings = 0;
3375        int find_match_limit = default_find_match_limit;
3376      int getstrings = 0;      int getstrings = 0;
3377      int getlist = 0;      int getlist = 0;
3378      int offsets[45];      int gmatched = 0;
3379      int size_offsets = sizeof(offsets)/sizeof(int);      int start_offset = 0;
3380        int start_offset_sign = 1;
3381        int g_notempty = 0;
3382        int use_dfa = 0;
3383    
3384        *copynames = 0;
3385        *getnames = 0;
3386    
3387    #ifdef SUPPORT_PCRE16
3388        cn16ptr = copynames;
3389        gn16ptr = getnames;
3390    #endif
3391    #ifdef SUPPORT_PCRE8
3392        cn8ptr = copynames8;
3393        gn8ptr = getnames8;
3394    #endif
3395    
3396        SET_PCRE_CALLOUT(callout);
3397        first_callout = 1;
3398        last_callout_mark = NULL;
3399        callout_extra = 0;
3400        callout_count = 0;
3401        callout_fail_count = 999999;
3402        callout_fail_id = -1;
3403        show_malloc = 0;
3404      options = 0;      options = 0;
3405    
3406      if (infile == stdin) printf("  data> ");      if (extra != NULL) extra->flags &=
3407      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
3408    
3409        len = 0;
3410        for (;;)
3411        {        {
3412        done = 1;        if (extend_inputline(infile, buffer + len, "data> ") == NULL)
3413        goto CONTINUE;          {
3414            if (len > 0)    /* Reached EOF without hitting a newline */
3415              {
3416              fprintf(outfile, "\n");
3417              break;
3418              }
3419            done = 1;
3420            goto CONTINUE;
3421            }
3422          if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3423          len = (int)strlen((char *)buffer);
3424          if (buffer[len-1] == '\n') break;
3425        }        }
     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);  
3426    
     len = (int)strlen((char *)buffer);  
3427      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
3428      buffer[len] = 0;      buffer[len] = 0;
3429      if (len == 0) break;      if (len == 0) break;
# Line 686  while (!done) Line 3431  while (!done)
3431      p = buffer;      p = buffer;
3432      while (isspace(*p)) p++;      while (isspace(*p)) p++;
3433    
3434      q = dbuffer;      bptr = q = dbuffer;
3435      while ((c = *p++) != 0)      while ((c = *p++) != 0)
3436        {        {
3437        int i = 0;        int i = 0;
3438        int n = 0;        int n = 0;
3439        if (c == '\\') switch ((c = *p++))  
3440          /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
3441          In non-UTF mode, allow the value of the byte to fall through to later,
3442          where values greater than 127 are turned into UTF-8 when running in
3443          16-bit mode. */
3444    
3445          if (c != '\\')
3446            {
3447            if (use_utf)
3448              {
3449              *q++ = c;
3450              continue;
3451              }
3452            }
3453    
3454          /* Handle backslash escapes */
3455    
3456          else switch ((c = *p++))
3457          {          {
3458          case 'a': c =    7; break;          case 'a': c =    7; break;
3459          case 'b': c = '\b'; break;          case 'b': c = '\b'; break;
# Line 709  while (!done) Line 3471  while (!done)
3471            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
3472          break;          break;
3473    
3474          case 'x':          case 'x':
3475          c = 0;          if (*p == '{')
3476          while (i++ < 2 && isxdigit(*p))            {
3477              pcre_uint8 *pt = p;
3478              c = 0;
3479    
3480              /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
3481              when isxdigit() is a macro that refers to its argument more than
3482              once. This is banned by the C Standard, but apparently happens in at
3483              least one MacOS environment. */
3484    
3485              for (pt++; isxdigit(*pt); pt++)
3486                {
3487                if (++i == 9)
3488                  fprintf(outfile, "** Too many hex digits in \\x{...} item; "
3489                                   "using only the first eight.\n");
3490                else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
3491                }
3492              if (*pt == '}')
3493                {
3494                p = pt + 1;
3495                break;
3496                }
3497              /* Not correct form for \x{...}; fall through */
3498              }