/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 31 by nigel, Sat Feb 24 21:38:57 2007 UTC revision 901 by ph10, Sat Jan 21 15:47:59 2012 UTC
# Line 2  Line 2 
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5    /* This program was hacked up as a tester for PCRE. I really should have
6    written it more tidily in the first place. Will I ever learn? It has grown and
7    been extended and consequently is now rather, er, *very* untidy in places. The
8    addition of 16-bit support has made it even worse. :-(
9    
10    -----------------------------------------------------------------------------
11    Redistribution and use in source and binary forms, with or without
12    modification, are permitted provided that the following conditions are met:
13    
14        * Redistributions of source code must retain the above copyright notice,
15          this list of conditions and the following disclaimer.
16    
17        * Redistributions in binary form must reproduce the above copyright
18          notice, this list of conditions and the following disclaimer in the
19          documentation and/or other materials provided with the distribution.
20    
21        * Neither the name of the University of Cambridge nor the names of its
22          contributors may be used to endorse or promote products derived from
23          this software without specific prior written permission.
24    
25    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35    POSSIBILITY OF SUCH DAMAGE.
36    -----------------------------------------------------------------------------
37    */
38    
39    /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40    libraries in a single program. This is different from the modules such as
41    pcre_compile.c in the library itself, which are compiled separately for each
42    mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43    (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44    compiled only once. Therefore, it must not make use of any of the macros from
45    pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46    however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47    only supported library functions. */
48    
49    
50    #ifdef HAVE_CONFIG_H
51    #include "config.h"
52    #endif
53    
54  #include <ctype.h>  #include <ctype.h>
55  #include <stdio.h>  #include <stdio.h>
56  #include <string.h>  #include <string.h>
57  #include <stdlib.h>  #include <stdlib.h>
58  #include <time.h>  #include <time.h>
59  #include <locale.h>  #include <locale.h>
60    #include <errno.h>
61    
62    #ifdef SUPPORT_LIBREADLINE
63    #ifdef HAVE_UNISTD_H
64    #include <unistd.h>
65    #endif
66    #include <readline/readline.h>
67    #include <readline/history.h>
68    #endif
69    
70    
71    /* A number of things vary for Windows builds. Originally, pcretest opened its
72    input and output without "b"; then I was told that "b" was needed in some
73    environments, so it was added for release 5.0 to both the input and output. (It
74    makes no difference on Unix-like systems.) Later I was told that it is wrong
75    for the input on Windows. I've now abstracted the modes into two macros that
76    are set here, to make it easier to fiddle with them, and removed "b" from the
77    input mode under Windows. */
78    
79    #if defined(_WIN32) || defined(WIN32)
80    #include <io.h>                /* For _setmode() */
81    #include <fcntl.h>             /* For _O_BINARY */
82    #define INPUT_MODE   "r"
83    #define OUTPUT_MODE  "wb"
84    
85    #ifndef isatty
86    #define isatty _isatty         /* This is what Windows calls them, I'm told, */
87    #endif                         /* though in some environments they seem to   */
88                                   /* be already defined, hence the #ifndefs.    */
89    #ifndef fileno
90    #define fileno _fileno
91    #endif
92    
93    /* A user sent this fix for Borland Builder 5 under Windows. */
94    
95    #ifdef __BORLANDC__
96    #define _setmode(handle, mode) setmode(handle, mode)
97    #endif
98    
99    /* Not Windows */
100    
101    #else
102    #include <sys/time.h>          /* These two includes are needed */
103    #include <sys/resource.h>      /* for setrlimit(). */
104    #define INPUT_MODE   "rb"
105    #define OUTPUT_MODE  "wb"
106    #endif
107    
108    
109  /* Use the internal info for displaying the results of pcre_study(). */  /* We have to include pcre_internal.h because we need the internal info for
110    displaying the results of pcre_study() and we also need to know about the
111    internal macros, structures, and other internal data values; pcretest has
112    "inside information" compared to a program that strictly follows the PCRE API.
113    
114    Although pcre_internal.h does itself include pcre.h, we explicitly include it
115    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
116    appropriately for an application, not for building PCRE. */
117    
118    #include "pcre.h"
119    
120    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8
121    /* Configure internal macros to 16 bit mode. */
122    #define COMPILE_PCRE16
123    #endif
124    
125    #include "pcre_internal.h"
126    
127    /* The pcre_printint() function, which prints the internal form of a compiled
128    regex, is held in a separate file so that (a) it can be compiled in either
129    8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
130    when that is compiled in debug mode. */
131    
132    #ifdef SUPPORT_PCRE8
133    void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
134    #endif
135    #ifdef SUPPORT_PCRE16
136    void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
137    #endif
138    
139  #include "internal.h"  /* We need access to some of the data tables that PCRE uses. So as not to have
140    to keep two copies, we include the source file here, changing the names of the
141    external symbols to prevent clashes. */
142    
143    #define PCRE_INCLUDED
144    #undef PRIV
145    #define PRIV(name) name
146    
147    #include "pcre_tables.c"
148    
149    /* The definition of the macro PRINTABLE, which determines whether to print an
150    output character as-is or as a hex value when showing compiled patterns, is
151    the same as in the printint.src file. We uses it here in cases when the locale
152    has not been explicitly changed, so as to get consistent output from systems
153    that differ in their output from isprint() even in the "C" locale. */
154    
155    #ifdef EBCDIC
156    #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
157    #else
158    #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
159    #endif
160    
161    #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
162    
163    /* Posix support is disabled in 16 bit only mode. */
164    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX
165    #define NOPOSIX
166    #endif
167    
168    /* It is possible to compile this test program without including support for
169    testing the POSIX interface, though this is not available via the standard
170    Makefile. */
171    
172    #if !defined NOPOSIX
173  #include "pcreposix.h"  #include "pcreposix.h"
174    #endif
175    
176    /* It is also possible, originally for the benefit of a version that was
177    imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
178    NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
179    automatically cut out the UTF support if PCRE is built without it. */
180    
181    #ifndef SUPPORT_UTF
182    #ifndef NOUTF
183    #define NOUTF
184    #endif
185    #endif
186    
187    /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
188    for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
189    only from one place and is handled differently). I couldn't dream up any way of
190    using a single macro to do this in a generic way, because of the many different
191    argument requirements. We know that at least one of SUPPORT_PCRE8 and
192    SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
193    use these in the definitions of generic macros.
194    
195    **** Special note about the PCHARSxxx macros: the address of the string to be
196    printed is always given as two arguments: a base address followed by an offset.
197    The base address is cast to the correct data size for 8 or 16 bit data; the
198    offset is in units of this size. If the string were given as base+offset in one
199    argument, the casting might be incorrectly applied. */
200    
201    #ifdef SUPPORT_PCRE8
202    
203    #define PCHARS8(lv, p, offset, len, f) \
204      lv = pchars((pcre_uint8 *)(p) + offset, len, f)
205    
206    #define PCHARSV8(p, offset, len, f) \
207      (void)pchars((pcre_uint8 *)(p) + offset, len, f)
208    
209    #define READ_CAPTURE_NAME8(p, cn8, cn16, re) \
210      p = read_capture_name8(p, cn8, re)
211    
212    #define STRLEN8(p) ((int)strlen((char *)p))
213    
214    #define SET_PCRE_CALLOUT8(callout) \
215      pcre_callout = callout
216    
217    #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
218       pcre_assign_jit_stack(extra, callback, userdata)
219    
220    #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
221      re = pcre_compile((char *)pat, options, error, erroffset, tables)
222    
223    #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
224        namesptr, cbuffer, size) \
225      rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
226        (char *)namesptr, cbuffer, size)
227    
228    #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
229      rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
230    
231    #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
232        offsets, size_offsets, workspace, size_workspace) \
233      count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
234        offsets, size_offsets, workspace, size_workspace)
235    
236    #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
237        offsets, size_offsets) \
238      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
239        offsets, size_offsets)
240    
241    #define PCRE_FREE_STUDY8(extra) \
242      pcre_free_study(extra)
243    
244    #define PCRE_FREE_SUBSTRING8(substring) \
245      pcre_free_substring(substring)
246    
247    #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
248      pcre_free_substring_list(listptr)
249    
250    #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
251        getnamesptr, subsptr) \
252      rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
253        (char *)getnamesptr, subsptr)
254    
255    #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
256      n = pcre_get_stringnumber(re, (char *)ptr)
257    
258    #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
259      rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
260    
261    #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
262      rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
263    
264    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
265      rc = pcre_pattern_to_host_byte_order(re, extra, tables)
266    
267    #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
268      pcre_printint(re, outfile, debug_lengths)
269    
270    #define PCRE_STUDY8(extra, re, options, error) \
271      extra = pcre_study(re, options, error)
272    
273    #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
274      pcre_jit_stack_alloc(startsize, maxsize)
275    
276    #define PCRE_JIT_STACK_FREE8(stack) \
277      pcre_jit_stack_free(stack)
278    
279    #endif /* SUPPORT_PCRE8 */
280    
281    /* -----------------------------------------------------------*/
282    
283    #ifdef SUPPORT_PCRE16
284    
285    #define PCHARS16(lv, p, offset, len, f) \
286      lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
287    
288    #define PCHARSV16(p, offset, len, f) \
289      (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
290    
291    #define READ_CAPTURE_NAME16(p, cn8, cn16, re) \
292      p = read_capture_name16(p, cn16, re)
293    
294    #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
295    
296    #define SET_PCRE_CALLOUT16(callout) \
297      pcre16_callout = (int (*)(pcre16_callout_block *))callout
298    
299    #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
300      pcre16_assign_jit_stack((pcre16_extra *)extra, \
301        (pcre16_jit_callback)callback, userdata)
302    
303    #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
304      re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
305        tables)
306    
307    #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
308        namesptr, cbuffer, size) \
309      rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
310        count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
311    
312    #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
313      rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
314        (PCRE_UCHAR16 *)cbuffer, size/2)
315    
316    #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
317        offsets, size_offsets, workspace, size_workspace) \
318      count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
319        (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
320        workspace, size_workspace)
321    
322    #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
323        offsets, size_offsets) \
324      count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
325        len, start_offset, options, offsets, size_offsets)
326    
327    #define PCRE_FREE_STUDY16(extra) \
328      pcre16_free_study((pcre16_extra *)extra)
329    
330    #define PCRE_FREE_SUBSTRING16(substring) \
331      pcre16_free_substring((PCRE_SPTR16)substring)
332    
333    #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
334      pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
335    
336    #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
337        getnamesptr, subsptr) \
338      rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
339        count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
340    
341    #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
342      n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
343    
344    #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
345      rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
346        (PCRE_SPTR16 *)(void*)subsptr)
347    
348    #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
349      rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
350        (PCRE_SPTR16 **)(void*)listptr)
351    
352    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
353      rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
354        tables)
355    
356    #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
357      pcre16_printint(re, outfile, debug_lengths)
358    
359    #define PCRE_STUDY16(extra, re, options, error) \
360      extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
361    
362    #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
363      (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
364    
365    #define PCRE_JIT_STACK_FREE16(stack) \
366      pcre16_jit_stack_free((pcre16_jit_stack *)stack)
367    
368    #endif /* SUPPORT_PCRE16 */
369    
370    
371    /* ----- Both modes are supported; a runtime test is needed, except for
372    pcre_config(), and the JIT stack functions, when it doesn't matter which
373    version is called. ----- */
374    
375    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
376    
377    #define CHAR_SIZE (use_pcre16? 2:1)
378    
379    #define PCHARS(lv, p, offset, len, f) \
380      if (use_pcre16) \
381        PCHARS16(lv, p, offset, len, f); \
382      else \
383        PCHARS8(lv, p, offset, len, f)
384    
385    #define PCHARSV(p, offset, len, f) \
386      if (use_pcre16) \
387        PCHARSV16(p, offset, len, f); \
388      else \
389        PCHARSV8(p, offset, len, f)
390    
391    #define READ_CAPTURE_NAME(p, cn8, cn16, re) \
392      if (use_pcre16) \
393        READ_CAPTURE_NAME16(p, cn8, cn16, re); \
394      else \
395        READ_CAPTURE_NAME8(p, cn8, cn16, re)
396    
397    #define SET_PCRE_CALLOUT(callout) \
398      if (use_pcre16) \
399        SET_PCRE_CALLOUT16(callout); \
400      else \
401        SET_PCRE_CALLOUT8(callout)
402    
403    #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
404    
405    #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
406      if (use_pcre16) \
407        PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
408      else \
409        PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
410    
411    #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
412      if (use_pcre16) \
413        PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
414      else \
415        PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
416    
417    #define PCRE_CONFIG pcre_config
418    
419    #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
420        namesptr, cbuffer, size) \
421      if (use_pcre16) \
422        PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
423          namesptr, cbuffer, size); \
424      else \
425        PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
426          namesptr, cbuffer, size)
427    
428    #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
429      if (use_pcre16) \
430        PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
431      else \
432        PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
433    
434    #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
435        offsets, size_offsets, workspace, size_workspace) \
436      if (use_pcre16) \
437        PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
438          offsets, size_offsets, workspace, size_workspace); \
439      else \
440        PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
441          offsets, size_offsets, workspace, size_workspace)
442    
443    #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
444        offsets, size_offsets) \
445      if (use_pcre16) \
446        PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
447          offsets, size_offsets); \
448      else \
449        PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
450          offsets, size_offsets)
451    
452    #define PCRE_FREE_STUDY(extra) \
453      if (use_pcre16) \
454        PCRE_FREE_STUDY16(extra); \
455      else \
456        PCRE_FREE_STUDY8(extra)
457    
458    #define PCRE_FREE_SUBSTRING(substring) \
459      if (use_pcre16) \
460        PCRE_FREE_SUBSTRING16(substring); \
461      else \
462        PCRE_FREE_SUBSTRING8(substring)
463    
464    #define PCRE_FREE_SUBSTRING_LIST(listptr) \
465      if (use_pcre16) \
466        PCRE_FREE_SUBSTRING_LIST16(listptr); \
467      else \
468        PCRE_FREE_SUBSTRING_LIST8(listptr)
469    
470    #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
471        getnamesptr, subsptr) \
472      if (use_pcre16) \
473        PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
474          getnamesptr, subsptr); \
475      else \
476        PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
477          getnamesptr, subsptr)
478    
479    #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
480      if (use_pcre16) \
481        PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
482      else \
483        PCRE_GET_STRINGNUMBER8(n, rc, ptr)
484    
485    #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
486      if (use_pcre16) \
487        PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
488      else \
489        PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
490    
491    #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
492      if (use_pcre16) \
493        PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
494      else \
495        PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
496    
497    #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
498      (use_pcre16 ? \
499         PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
500        :PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
501    
502    #define PCRE_JIT_STACK_FREE(stack) \
503      if (use_pcre16) \
504        PCRE_JIT_STACK_FREE16(stack); \
505      else \
506        PCRE_JIT_STACK_FREE8(stack)
507    
508    #define PCRE_MAKETABLES \
509      (use_pcre16? pcre16_maketables() : pcre_maketables())
510    
511    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
512      if (use_pcre16) \
513        PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
514      else \
515        PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
516    
517    #define PCRE_PRINTINT(re, outfile, debug_lengths) \
518      if (use_pcre16) \
519        PCRE_PRINTINT16(re, outfile, debug_lengths); \
520      else \
521        PCRE_PRINTINT8(re, outfile, debug_lengths)
522    
523    #define PCRE_STUDY(extra, re, options, error) \
524      if (use_pcre16) \
525        PCRE_STUDY16(extra, re, options, error); \
526      else \
527        PCRE_STUDY8(extra, re, options, error)
528    
529    /* ----- Only 8-bit mode is supported ----- */
530    
531    #elif defined SUPPORT_PCRE8
532    #define CHAR_SIZE                 1
533    #define PCHARS                    PCHARS8
534    #define PCHARSV                   PCHARSV8
535    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME8
536    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT8
537    #define STRLEN                    STRLEN8
538    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK8
539    #define PCRE_COMPILE              PCRE_COMPILE8
540    #define PCRE_CONFIG               pcre_config
541    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
542    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING8
543    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC8
544    #define PCRE_EXEC                 PCRE_EXEC8
545    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY8
546    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING8
547    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST8
548    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING8
549    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER8
550    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING8
551    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST8
552    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC8
553    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE8
554    #define PCRE_MAKETABLES           pcre_maketables()
555    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
556    #define PCRE_PRINTINT             PCRE_PRINTINT8
557    #define PCRE_STUDY                PCRE_STUDY8
558    
559    /* ----- Only 16-bit mode is supported ----- */
560    
561    #else
562    #define CHAR_SIZE                 2
563    #define PCHARS                    PCHARS16
564    #define PCHARSV                   PCHARSV16
565    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME16
566    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT16
567    #define STRLEN                    STRLEN16
568    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK16
569    #define PCRE_COMPILE              PCRE_COMPILE16
570    #define PCRE_CONFIG               pcre16_config
571    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
572    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING16
573    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC16
574    #define PCRE_EXEC                 PCRE_EXEC16
575    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY16
576    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING16
577    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST16
578    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING16
579    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER16
580    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING16
581    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST16
582    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC16
583    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE16
584    #define PCRE_MAKETABLES           pcre16_maketables()
585    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
586    #define PCRE_PRINTINT             PCRE_PRINTINT16
587    #define PCRE_STUDY                PCRE_STUDY16
588    #endif
589    
590    /* ----- End of mode-specific function call macros ----- */
591    
592    
593    /* Other parameters */
594    
595  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
596  #ifdef CLK_TCK  #ifdef CLK_TCK
# Line 22  Line 600 
600  #endif  #endif
601  #endif  #endif
602    
603  #define LOOPREPEAT 20000  /* This is the default loop count for timing. */
604    
605    #define LOOPREPEAT 500000
606    
607    /* Static variables */
608    
609  static FILE *outfile;  static FILE *outfile;
610  static int log_store = 0;  static int log_store = 0;
611    static int callout_count;
612    static int callout_extra;
613    static int callout_fail_count;
614    static int callout_fail_id;
615    static int debug_lengths;
616    static int first_callout;
617    static int locale_set = 0;
618    static int show_malloc;
619    static int use_utf;
620    static size_t gotten_store;
621    static size_t first_gotten_store = 0;
622    static const unsigned char *last_callout_mark = NULL;
623    
624    /* The buffers grow automatically if very long input lines are encountered. */
625    
626    static int buffer_size = 50000;
627    static pcre_uint8 *buffer = NULL;
628    static pcre_uint8 *dbuffer = NULL;
629    static pcre_uint8 *pbuffer = NULL;
630    
631    /* Another buffer is needed translation to 16-bit character strings. It will
632    obtained and extended as required. */
633    
634    #ifdef SUPPORT_PCRE16
635    static int buffer16_size = 0;
636    static pcre_uint16 *buffer16 = NULL;
637    
638    #ifdef SUPPORT_PCRE8
639    
640    /* We need the table of operator lengths that is used for 16-bit compiling, in
641    order to swap bytes in a pattern for saving/reloading testing. Luckily, the
642    data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
643    appropriately for the 16-bit world. Just as a safety check, make sure that
644    COMPILE_PCRE16 is *not* set. */
645    
646    #ifdef COMPILE_PCRE16
647    #error COMPILE_PCRE16 must not be set when compiling pcretest.c
648    #endif
649    
650    #if LINK_SIZE == 2
651    #undef LINK_SIZE
652    #define LINK_SIZE 1
653    #elif LINK_SIZE == 3 || LINK_SIZE == 4
654    #undef LINK_SIZE
655    #define LINK_SIZE 2
656    #else
657    #error LINK_SIZE must be either 2, 3, or 4
658    #endif
659    
660    #undef IMM2_SIZE
661    #define IMM2_SIZE 1
662    
663    #endif /* SUPPORT_PCRE8 */
664    
665    static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
666    #endif  /* SUPPORT_PCRE16 */
667    
668    /* If we have 8-bit support, default use_pcre16 to false; if there is also
669    16-bit support, it can be changed by an option. If there is no 8-bit support,
670    there must be 16-bit support, so default it to 1. */
671    
672    #ifdef SUPPORT_PCRE8
673    static int use_pcre16 = 0;
674    #else
675    static int use_pcre16 = 1;
676    #endif
677    
678    /* Textual explanations for runtime error codes */
679    
680    static const char *errtexts[] = {
681      NULL,  /* 0 is no error */
682      NULL,  /* NOMATCH is handled specially */
683      "NULL argument passed",
684      "bad option value",
685      "magic number missing",
686      "unknown opcode - pattern overwritten?",
687      "no more memory",
688      NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */
689      "match limit exceeded",
690      "callout error code",
691      NULL,  /* BADUTF8/16 is handled specially */
692      NULL,  /* BADUTF8/16 offset is handled specially */
693      NULL,  /* PARTIAL is handled specially */
694      "not used - internal error",
695      "internal error - pattern overwritten?",
696      "bad count value",
697      "item unsupported for DFA matching",
698      "backreference condition or recursion test not supported for DFA matching",
699      "match limit not supported for DFA matching",
700      "workspace size exceeded in DFA matching",
701      "too much recursion for DFA matching",
702      "recursion limit exceeded",
703      "not used - internal error",
704      "invalid combination of newline options",
705      "bad offset value",
706      NULL,  /* SHORTUTF8/16 is handled specially */
707      "nested recursion at the same subject position",
708      "JIT stack limit reached",
709      "pattern compiled in wrong mode: 8-bit/16-bit error"
710    };
711    
712    
713  /* Debugging function to print the internal form of the regex. This is the same  /*************************************************
714  code as contained in pcre.c under the DEBUG macro. */  *         Alternate character tables             *
715    *************************************************/
716    
717  static const char *OP_names[] = {  /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
718    "End", "\\A", "\\B", "\\b", "\\D", "\\d",  using the default tables of the library. However, the T option can be used to
719    "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",  select alternate sets of tables, for different kinds of testing. Note also that
720    "Opt", "^", "$", "Any", "chars", "not",  the L (locale) option also adjusts the tables. */
721    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
722    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  /* This is the set of tables distributed as default with PCRE. It recognizes
723    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  only ASCII characters. */
724    "*", "*?", "+", "+?", "?", "??", "{", "{",  
725    "class", "Ref",  static const pcre_uint8 tables0[] = {
726    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",  
727    "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",  /* This table is a lower casing table. */
728    "Brazero", "Braminzero", "Bra"  
729        0,  1,  2,  3,  4,  5,  6,  7,
730        8,  9, 10, 11, 12, 13, 14, 15,
731       16, 17, 18, 19, 20, 21, 22, 23,
732       24, 25, 26, 27, 28, 29, 30, 31,
733       32, 33, 34, 35, 36, 37, 38, 39,
734       40, 41, 42, 43, 44, 45, 46, 47,
735       48, 49, 50, 51, 52, 53, 54, 55,
736       56, 57, 58, 59, 60, 61, 62, 63,
737       64, 97, 98, 99,100,101,102,103,
738      104,105,106,107,108,109,110,111,
739      112,113,114,115,116,117,118,119,
740      120,121,122, 91, 92, 93, 94, 95,
741       96, 97, 98, 99,100,101,102,103,
742      104,105,106,107,108,109,110,111,
743      112,113,114,115,116,117,118,119,
744      120,121,122,123,124,125,126,127,
745      128,129,130,131,132,133,134,135,
746      136,137,138,139,140,141,142,143,
747      144,145,146,147,148,149,150,151,
748      152,153,154,155,156,157,158,159,
749      160,161,162,163,164,165,166,167,
750      168,169,170,171,172,173,174,175,
751      176,177,178,179,180,181,182,183,
752      184,185,186,187,188,189,190,191,
753      192,193,194,195,196,197,198,199,
754      200,201,202,203,204,205,206,207,
755      208,209,210,211,212,213,214,215,
756      216,217,218,219,220,221,222,223,
757      224,225,226,227,228,229,230,231,
758      232,233,234,235,236,237,238,239,
759      240,241,242,243,244,245,246,247,
760      248,249,250,251,252,253,254,255,
761    
762    /* This table is a case flipping table. */
763    
764        0,  1,  2,  3,  4,  5,  6,  7,
765        8,  9, 10, 11, 12, 13, 14, 15,
766       16, 17, 18, 19, 20, 21, 22, 23,
767       24, 25, 26, 27, 28, 29, 30, 31,
768       32, 33, 34, 35, 36, 37, 38, 39,
769       40, 41, 42, 43, 44, 45, 46, 47,
770       48, 49, 50, 51, 52, 53, 54, 55,
771       56, 57, 58, 59, 60, 61, 62, 63,
772       64, 97, 98, 99,100,101,102,103,
773      104,105,106,107,108,109,110,111,
774      112,113,114,115,116,117,118,119,
775      120,121,122, 91, 92, 93, 94, 95,
776       96, 65, 66, 67, 68, 69, 70, 71,
777       72, 73, 74, 75, 76, 77, 78, 79,
778       80, 81, 82, 83, 84, 85, 86, 87,
779       88, 89, 90,123,124,125,126,127,
780      128,129,130,131,132,133,134,135,
781      136,137,138,139,140,141,142,143,
782      144,145,146,147,148,149,150,151,
783      152,153,154,155,156,157,158,159,
784      160,161,162,163,164,165,166,167,
785      168,169,170,171,172,173,174,175,
786      176,177,178,179,180,181,182,183,
787      184,185,186,187,188,189,190,191,
788      192,193,194,195,196,197,198,199,
789      200,201,202,203,204,205,206,207,
790      208,209,210,211,212,213,214,215,
791      216,217,218,219,220,221,222,223,
792      224,225,226,227,228,229,230,231,
793      232,233,234,235,236,237,238,239,
794      240,241,242,243,244,245,246,247,
795      248,249,250,251,252,253,254,255,
796    
797    /* This table contains bit maps for various character classes. Each map is 32
798    bytes long and the bits run from the least significant end of each byte. The
799    classes that have their own maps are: space, xdigit, digit, upper, lower, word,
800    graph, print, punct, and cntrl. Other classes are built from combinations. */
801    
802      0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
803      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
804      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
805      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
806    
807      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
808      0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
809      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
810      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
811    
812      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
813      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
814      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
815      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
816    
817      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
818      0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
819      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
820      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
821    
822      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
823      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
824      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
825      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
826    
827      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
828      0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
829      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
830      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
831    
832      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
833      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
834      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
835      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
836    
837      0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
838      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
839      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
840      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
841    
842      0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
843      0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
844      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
845      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
846    
847      0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
848      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
849      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
850      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
851    
852    /* This table identifies various classes of character by individual bits:
853      0x01   white space character
854      0x02   letter
855      0x04   decimal digit
856      0x08   hexadecimal digit
857      0x10   alphanumeric or '_'
858      0x80   regular expression metacharacter or binary zero
859    */
860    
861      0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
862      0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
863      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
864      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
865      0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
866      0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
867      0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
868      0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
869      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
870      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
871      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
872      0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
873      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
874      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
875      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
876      0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
877      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
878      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
879      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
880      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
881      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
882      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
883      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
884      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
885      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
886      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
887      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
888      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
889      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
890      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
891      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
892      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
893    
894    /* This is a set of tables that came orginally from a Windows user. It seems to
895    be at least an approximation of ISO 8859. In particular, there are characters
896    greater than 128 that are marked as spaces, letters, etc. */
897    
898    static const pcre_uint8 tables1[] = {
899    0,1,2,3,4,5,6,7,
900    8,9,10,11,12,13,14,15,
901    16,17,18,19,20,21,22,23,
902    24,25,26,27,28,29,30,31,
903    32,33,34,35,36,37,38,39,
904    40,41,42,43,44,45,46,47,
905    48,49,50,51,52,53,54,55,
906    56,57,58,59,60,61,62,63,
907    64,97,98,99,100,101,102,103,
908    104,105,106,107,108,109,110,111,
909    112,113,114,115,116,117,118,119,
910    120,121,122,91,92,93,94,95,
911    96,97,98,99,100,101,102,103,
912    104,105,106,107,108,109,110,111,
913    112,113,114,115,116,117,118,119,
914    120,121,122,123,124,125,126,127,
915    128,129,130,131,132,133,134,135,
916    136,137,138,139,140,141,142,143,
917    144,145,146,147,148,149,150,151,
918    152,153,154,155,156,157,158,159,
919    160,161,162,163,164,165,166,167,
920    168,169,170,171,172,173,174,175,
921    176,177,178,179,180,181,182,183,
922    184,185,186,187,188,189,190,191,
923    224,225,226,227,228,229,230,231,
924    232,233,234,235,236,237,238,239,
925    240,241,242,243,244,245,246,215,
926    248,249,250,251,252,253,254,223,
927    224,225,226,227,228,229,230,231,
928    232,233,234,235,236,237,238,239,
929    240,241,242,243,244,245,246,247,
930    248,249,250,251,252,253,254,255,
931    0,1,2,3,4,5,6,7,
932    8,9,10,11,12,13,14,15,
933    16,17,18,19,20,21,22,23,
934    24,25,26,27,28,29,30,31,
935    32,33,34,35,36,37,38,39,
936    40,41,42,43,44,45,46,47,
937    48,49,50,51,52,53,54,55,
938    56,57,58,59,60,61,62,63,
939    64,97,98,99,100,101,102,103,
940    104,105,106,107,108,109,110,111,
941    112,113,114,115,116,117,118,119,
942    120,121,122,91,92,93,94,95,
943    96,65,66,67,68,69,70,71,
944    72,73,74,75,76,77,78,79,
945    80,81,82,83,84,85,86,87,
946    88,89,90,123,124,125,126,127,
947    128,129,130,131,132,133,134,135,
948    136,137,138,139,140,141,142,143,
949    144,145,146,147,148,149,150,151,
950    152,153,154,155,156,157,158,159,
951    160,161,162,163,164,165,166,167,
952    168,169,170,171,172,173,174,175,
953    176,177,178,179,180,181,182,183,
954    184,185,186,187,188,189,190,191,
955    224,225,226,227,228,229,230,231,
956    232,233,234,235,236,237,238,239,
957    240,241,242,243,244,245,246,215,
958    248,249,250,251,252,253,254,223,
959    192,193,194,195,196,197,198,199,
960    200,201,202,203,204,205,206,207,
961    208,209,210,211,212,213,214,247,
962    216,217,218,219,220,221,222,255,
963    0,62,0,0,1,0,0,0,
964    0,0,0,0,0,0,0,0,
965    32,0,0,0,1,0,0,0,
966    0,0,0,0,0,0,0,0,
967    0,0,0,0,0,0,255,3,
968    126,0,0,0,126,0,0,0,
969    0,0,0,0,0,0,0,0,
970    0,0,0,0,0,0,0,0,
971    0,0,0,0,0,0,255,3,
972    0,0,0,0,0,0,0,0,
973    0,0,0,0,0,0,12,2,
974    0,0,0,0,0,0,0,0,
975    0,0,0,0,0,0,0,0,
976    254,255,255,7,0,0,0,0,
977    0,0,0,0,0,0,0,0,
978    255,255,127,127,0,0,0,0,
979    0,0,0,0,0,0,0,0,
980    0,0,0,0,254,255,255,7,
981    0,0,0,0,0,4,32,4,
982    0,0,0,128,255,255,127,255,
983    0,0,0,0,0,0,255,3,
984    254,255,255,135,254,255,255,7,
985    0,0,0,0,0,4,44,6,
986    255,255,127,255,255,255,127,255,
987    0,0,0,0,254,255,255,255,
988    255,255,255,255,255,255,255,127,
989    0,0,0,0,254,255,255,255,
990    255,255,255,255,255,255,255,255,
991    0,2,0,0,255,255,255,255,
992    255,255,255,255,255,255,255,127,
993    0,0,0,0,255,255,255,255,
994    255,255,255,255,255,255,255,255,
995    0,0,0,0,254,255,0,252,
996    1,0,0,248,1,0,0,120,
997    0,0,0,0,254,255,255,255,
998    0,0,128,0,0,0,128,0,
999    255,255,255,255,0,0,0,0,
1000    0,0,0,0,0,0,0,128,
1001    255,255,255,255,0,0,0,0,
1002    0,0,0,0,0,0,0,0,
1003    128,0,0,0,0,0,0,0,
1004    0,1,1,0,1,1,0,0,
1005    0,0,0,0,0,0,0,0,
1006    0,0,0,0,0,0,0,0,
1007    1,0,0,0,128,0,0,0,
1008    128,128,128,128,0,0,128,0,
1009    28,28,28,28,28,28,28,28,
1010    28,28,0,0,0,0,0,128,
1011    0,26,26,26,26,26,26,18,
1012    18,18,18,18,18,18,18,18,
1013    18,18,18,18,18,18,18,18,
1014    18,18,18,128,128,0,128,16,
1015    0,26,26,26,26,26,26,18,
1016    18,18,18,18,18,18,18,18,
1017    18,18,18,18,18,18,18,18,
1018    18,18,18,128,128,0,0,0,
1019    0,0,0,0,0,1,0,0,
1020    0,0,0,0,0,0,0,0,
1021    0,0,0,0,0,0,0,0,
1022    0,0,0,0,0,0,0,0,
1023    1,0,0,0,0,0,0,0,
1024    0,0,18,0,0,0,0,0,
1025    0,0,20,20,0,18,0,0,
1026    0,20,18,0,0,0,0,0,
1027    18,18,18,18,18,18,18,18,
1028    18,18,18,18,18,18,18,18,
1029    18,18,18,18,18,18,18,0,
1030    18,18,18,18,18,18,18,18,
1031    18,18,18,18,18,18,18,18,
1032    18,18,18,18,18,18,18,18,
1033    18,18,18,18,18,18,18,0,
1034    18,18,18,18,18,18,18,18
1035  };  };
1036    
1037    
1038  static void print_internals(pcre *re, FILE *outfile)  
1039    
1040    #ifndef HAVE_STRERROR
1041    /*************************************************
1042    *     Provide strerror() for non-ANSI libraries  *
1043    *************************************************/
1044    
1045    /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1046    in their libraries, but can provide the same facility by this simple
1047    alternative function. */
1048    
1049    extern int   sys_nerr;
1050    extern char *sys_errlist[];
1051    
1052    char *
1053    strerror(int n)
1054  {  {
1055  unsigned char *code = ((real_pcre *)re)->code;  if (n < 0 || n >= sys_nerr) return "unknown error number";
1056    return sys_errlist[n];
1057    }
1058    #endif /* HAVE_STRERROR */
1059    
1060    
1061  fprintf(outfile, "------------------------------------------------------------------\n");  /*************************************************
1062    *         JIT memory callback                    *
1063    *************************************************/
1064    
1065  for(;;)  static pcre_jit_stack* jit_callback(void *arg)
1066    {
1067    return (pcre_jit_stack *)arg;
1068    }
1069    
1070    
1071    #if !defined NOUTF || defined SUPPORT_PCRE16
1072    /*************************************************
1073    *            Convert UTF-8 string to value       *
1074    *************************************************/
1075    
1076    /* This function takes one or more bytes that represents a UTF-8 character,
1077    and returns the value of the character.
1078    
1079    Argument:
1080      utf8bytes   a pointer to the byte vector
1081      vptr        a pointer to an int to receive the value
1082    
1083    Returns:      >  0 => the number of bytes consumed
1084                  -6 to 0 => malformed UTF-8 character at offset = (-return)
1085    */
1086    
1087    static int
1088    utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1089    {
1090    int c = *utf8bytes++;
1091    int d = c;
1092    int i, j, s;
1093    
1094    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
1095    {    {
1096    int c;    if ((d & 0x80) == 0) break;
1097    int charlength;    d <<= 1;
1098      }
1099    
1100    if (i == -1) { *vptr = c; return 1; }  /* ascii character */
1101    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
1102    
1103    /* i now has a value in the range 1-5 */
1104    
1105    s = 6*i;
1106    d = (c & utf8_table3[i]) << s;
1107    
1108    for (j = 0; j < i; j++)
1109      {
1110      c = *utf8bytes++;
1111      if ((c & 0xc0) != 0x80) return -(j+1);
1112      s -= 6;
1113      d |= (c & 0x3f) << s;
1114      }
1115    
1116    /* Check that encoding was the correct unique one */
1117    
1118    for (j = 0; j < utf8_table1_size; j++)
1119      if (d <= utf8_table1[j]) break;
1120    if (j != i) return -(i+1);
1121    
1122    /* Valid value */
1123    
1124    *vptr = d;
1125    return i+1;
1126    }
1127    #endif /* NOUTF || SUPPORT_PCRE16 */
1128    
1129    
1130    
1131    #if !defined NOUTF || defined SUPPORT_PCRE16
1132    /*************************************************
1133    *       Convert character value to UTF-8         *
1134    *************************************************/
1135    
1136    /* This function takes an integer value in the range 0 - 0x7fffffff
1137    and encodes it as a UTF-8 character in 0 to 6 bytes.
1138    
1139    Arguments:
1140      cvalue     the character value
1141      utf8bytes  pointer to buffer for result - at least 6 bytes long
1142    
1143    Returns:     number of characters placed in the buffer
1144    */
1145    
1146    static int
1147    ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1148    {
1149    register int i, j;
1150    for (i = 0; i < utf8_table1_size; i++)
1151      if (cvalue <= utf8_table1[i]) break;
1152    utf8bytes += i;
1153    for (j = i; j > 0; j--)
1154     {
1155     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1156     cvalue >>= 6;
1157     }
1158    *utf8bytes = utf8_table2[i] | cvalue;
1159    return i + 1;
1160    }
1161    #endif
1162    
1163    
1164    #ifdef SUPPORT_PCRE16
1165    /*************************************************
1166    *         Convert a string to 16-bit             *
1167    *************************************************/
1168    
1169    /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1170    8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1171    double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1172    in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1173    result is always left in buffer16.
1174    
1175    Note that this function does not object to surrogate values. This is
1176    deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1177    for the purpose of testing that they are correctly faulted.
1178    
1179    Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1180    in UTF-8 so that values greater than 255 can be handled.
1181    
1182    Arguments:
1183      data       TRUE if converting a data line; FALSE for a regex
1184      p          points to a byte string
1185      utf        true if UTF-8 (to be converted to UTF-16)
1186      len        number of bytes in the string (excluding trailing zero)
1187    
1188    Returns:     number of 16-bit data items used (excluding trailing zero)
1189                 OR -1 if a UTF-8 string is malformed
1190                 OR -2 if a value > 0x10ffff is encountered
1191                 OR -3 if a value > 0xffff is encountered when not in UTF mode
1192    */
1193    
1194    static int
1195    to16(int data, pcre_uint8 *p, int utf, int len)
1196    {
1197    pcre_uint16 *pp;
1198    
1199    if (buffer16_size < 2*len + 2)
1200      {
1201      if (buffer16 != NULL) free(buffer16);
1202      buffer16_size = 2*len + 2;
1203      buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1204      if (buffer16 == NULL)
1205        {
1206        fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1207        exit(1);
1208        }
1209      }
1210    
1211    pp = buffer16;
1212    
1213    if (!utf && !data)
1214      {
1215      while (len-- > 0) *pp++ = *p++;
1216      }
1217    
1218    else
1219      {
1220      int c = 0;
1221      while (len > 0)
1222        {
1223        int chlen = utf82ord(p, &c);
1224        if (chlen <= 0) return -1;
1225        if (c > 0x10ffff) return -2;
1226        p += chlen;
1227        len -= chlen;
1228        if (c < 0x10000) *pp++ = c; else
1229          {
1230          if (!utf) return -3;
1231          c -= 0x10000;
1232          *pp++ = 0xD800 | (c >> 10);
1233          *pp++ = 0xDC00 | (c & 0x3ff);
1234          }
1235        }
1236      }
1237    
1238    *pp = 0;
1239    return pp - buffer16;
1240    }
1241    #endif
1242    
1243    
1244    /*************************************************
1245    *        Read or extend an input line            *
1246    *************************************************/
1247    
1248    /* Input lines are read into buffer, but both patterns and data lines can be
1249    continued over multiple input lines. In addition, if the buffer fills up, we
1250    want to automatically expand it so as to be able to handle extremely large
1251    lines that are needed for certain stress tests. When the input buffer is
1252    expanded, the other two buffers must also be expanded likewise, and the
1253    contents of pbuffer, which are a copy of the input for callouts, must be
1254    preserved (for when expansion happens for a data line). This is not the most
1255    optimal way of handling this, but hey, this is just a test program!
1256    
1257    Arguments:
1258      f            the file to read
1259      start        where in buffer to start (this *must* be within buffer)
1260      prompt       for stdin or readline()
1261    
1262    Returns:       pointer to the start of new data
1263                   could be a copy of start, or could be moved
1264                   NULL if no data read and EOF reached
1265    */
1266    
1267    static pcre_uint8 *
1268    extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1269    {
1270    pcre_uint8 *here = start;
1271    
1272    for (;;)
1273      {
1274      int rlen = (int)(buffer_size - (here - buffer));
1275    
1276      if (rlen > 1000)
1277        {
1278        int dlen;
1279    
1280        /* If libreadline support is required, use readline() to read a line if the
1281        input is a terminal. Note that readline() removes the trailing newline, so
1282        we must put it back again, to be compatible with fgets(). */
1283    
1284    #ifdef SUPPORT_LIBREADLINE
1285        if (isatty(fileno(f)))
1286          {
1287          size_t len;
1288          char *s = readline(prompt);
1289          if (s == NULL) return (here == start)? NULL : start;
1290          len = strlen(s);
1291          if (len > 0) add_history(s);
1292          if (len > rlen - 1) len = rlen - 1;
1293          memcpy(here, s, len);
1294          here[len] = '\n';
1295          here[len+1] = 0;
1296          free(s);
1297          }
1298        else
1299    #endif
1300    
1301        /* Read the next line by normal means, prompting if the file is stdin. */
1302    
1303          {
1304          if (f == stdin) printf("%s", prompt);
1305          if (fgets((char *)here, rlen,  f) == NULL)
1306            return (here == start)? NULL : start;
1307          }
1308    
1309        dlen = (int)strlen((char *)here);
1310        if (dlen > 0 && here[dlen - 1] == '\n') return start;
1311        here += dlen;
1312        }
1313    
1314      else
1315        {
1316        int new_buffer_size = 2*buffer_size;
1317        pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1318        pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1319        pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1320    
1321        if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1322          {
1323          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1324          exit(1);
1325          }
1326    
1327        memcpy(new_buffer, buffer, buffer_size);
1328        memcpy(new_pbuffer, pbuffer, buffer_size);
1329    
1330        buffer_size = new_buffer_size;
1331    
1332        start = new_buffer + (start - buffer);
1333        here = new_buffer + (here - buffer);
1334    
1335        free(buffer);
1336        free(dbuffer);
1337        free(pbuffer);
1338    
1339        buffer = new_buffer;
1340        dbuffer = new_dbuffer;
1341        pbuffer = new_pbuffer;
1342        }
1343      }
1344    
1345    return NULL;  /* Control never gets here */
1346    }
1347    
1348    
1349    
1350    /*************************************************
1351    *          Read number from string               *
1352    *************************************************/
1353    
1354    /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1355    around with conditional compilation, just do the job by hand. It is only used
1356    for unpicking arguments, so just keep it simple.
1357    
1358    Arguments:
1359      str           string to be converted
1360      endptr        where to put the end pointer
1361    
1362    Returns:        the unsigned long
1363    */
1364    
1365    static int
1366    get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1367    {
1368    int result = 0;
1369    while(*str != 0 && isspace(*str)) str++;
1370    while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1371    *endptr = str;
1372    return(result);
1373    }
1374    
1375    
1376    
1377    /*************************************************
1378    *             Print one character                *
1379    *************************************************/
1380    
1381    /* Print a single character either literally, or as a hex escape. */
1382    
1383    static int pchar(int c, FILE *f)
1384    {
1385    if (PRINTOK(c))
1386      {
1387      if (f != NULL) fprintf(f, "%c", c);
1388      return 1;
1389      }
1390    
1391    if (c < 0x100)
1392      {
1393      if (use_utf)
1394        {
1395        if (f != NULL) fprintf(f, "\\x{%02x}", c);
1396        return 6;
1397        }
1398      else
1399        {
1400        if (f != NULL) fprintf(f, "\\x%02x", c);
1401        return 4;
1402        }
1403      }
1404    
1405    if (f != NULL) fprintf(f, "\\x{%02x}", c);
1406    return (c <= 0x000000ff)? 6 :
1407           (c <= 0x00000fff)? 7 :
1408           (c <= 0x0000ffff)? 8 :
1409           (c <= 0x000fffff)? 9 : 10;
1410    }
1411    
1412    
1413    
1414    #ifdef SUPPORT_PCRE8
1415    /*************************************************
1416    *         Print 8-bit character string           *
1417    *************************************************/
1418    
1419    /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1420    If handed a NULL file, just counts chars without printing. */
1421    
1422    static int pchars(pcre_uint8 *p, int length, FILE *f)
1423    {
1424    int c = 0;
1425    int yield = 0;
1426    
1427    if (length < 0)
1428      length = strlen((char *)p);
1429    
1430    while (length-- > 0)
1431      {
1432    #if !defined NOUTF
1433      if (use_utf)
1434        {
1435        int rc = utf82ord(p, &c);
1436        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
1437          {
1438          length -= rc - 1;
1439          p += rc;
1440          yield += pchar(c, f);
1441          continue;
1442          }
1443        }
1444    #endif
1445      c = *p++;
1446      yield += pchar(c, f);
1447      }
1448    
1449    return yield;
1450    }
1451    #endif
1452    
1453    
1454    
1455    #ifdef SUPPORT_PCRE16
1456    /*************************************************
1457    *    Find length of 0-terminated 16-bit string   *
1458    *************************************************/
1459    
1460    static int strlen16(PCRE_SPTR16 p)
1461    {
1462    int len = 0;
1463    while (*p++ != 0) len++;
1464    return len;
1465    }
1466    #endif  /* SUPPORT_PCRE16 */
1467    
1468    
1469    #ifdef SUPPORT_PCRE16
1470    /*************************************************
1471    *           Print 16-bit character string        *
1472    *************************************************/
1473    
1474    /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1475    If handed a NULL file, just counts chars without printing. */
1476    
1477    static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1478    {
1479    int yield = 0;
1480    
1481    if (length < 0)
1482      length = strlen16(p);
1483    
1484    while (length-- > 0)
1485      {
1486      int c = *p++ & 0xffff;
1487    #if !defined NOUTF
1488      if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1489        {
1490        int d = *p & 0xffff;
1491        if (d >= 0xDC00 && d < 0xDFFF)
1492          {
1493          c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1494          length--;
1495          p++;
1496          }
1497        }
1498    #endif
1499      yield += pchar(c, f);
1500      }
1501    
1502    return yield;
1503    }
1504    #endif  /* SUPPORT_PCRE16 */
1505    
1506    
1507    
1508    #ifdef SUPPORT_PCRE8
1509    /*************************************************
1510    *     Read a capture name (8-bit) and check it   *
1511    *************************************************/
1512    
1513    static pcre_uint8 *
1514    read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1515    {
1516    pcre_uint8 *npp = *pp;
1517    while (isalnum(*p)) *npp++ = *p++;
1518    *npp++ = 0;
1519    *npp = 0;
1520    if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1521      {
1522      fprintf(outfile, "no parentheses with name \"");
1523      PCHARSV(*pp, 0, -1, outfile);
1524      fprintf(outfile, "\"\n");
1525      }
1526    
1527    *pp = npp;
1528    return p;
1529    }
1530    #endif  /* SUPPORT_PCRE8 */
1531    
1532    
1533    
1534    #ifdef SUPPORT_PCRE16
1535    /*************************************************
1536    *     Read a capture name (16-bit) and check it  *
1537    *************************************************/
1538    
1539    /* Note that the text being read is 8-bit. */
1540    
1541    static pcre_uint8 *
1542    read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1543    {
1544    pcre_uint16 *npp = *pp;
1545    while (isalnum(*p)) *npp++ = *p++;
1546    *npp++ = 0;
1547    *npp = 0;
1548    if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
1549      {
1550      fprintf(outfile, "no parentheses with name \"");
1551      PCHARSV(*pp, 0, -1, outfile);
1552      fprintf(outfile, "\"\n");
1553      }
1554    *pp = npp;
1555    return p;
1556    }
1557    #endif  /* SUPPORT_PCRE16 */
1558    
1559    
1560    
1561    /*************************************************
1562    *              Callout function                  *
1563    *************************************************/
1564    
1565    /* Called from PCRE as a result of the (?C) item. We print out where we are in
1566    the match. Yield zero unless more callouts than the fail count, or the callout
1567    data is not zero. */
1568    
1569    static int callout(pcre_callout_block *cb)
1570    {
1571    FILE *f = (first_callout | callout_extra)? outfile : NULL;
1572    int i, pre_start, post_start, subject_length;
1573    
1574    if (callout_extra)
1575      {
1576      fprintf(f, "Callout %d: last capture = %d\n",
1577        cb->callout_number, cb->capture_last);
1578    
1579      for (i = 0; i < cb->capture_top * 2; i += 2)
1580        {
1581        if (cb->offset_vector[i] < 0)
1582          fprintf(f, "%2d: <unset>\n", i/2);
1583        else
1584          {
1585          fprintf(f, "%2d: ", i/2);
1586          PCHARSV(cb->subject, cb->offset_vector[i],
1587            cb->offset_vector[i+1] - cb->offset_vector[i], f);
1588          fprintf(f, "\n");
1589          }
1590        }
1591      }
1592    
1593    /* Re-print the subject in canonical form, the first time or if giving full
1594    datails. On subsequent calls in the same match, we use pchars just to find the
1595    printed lengths of the substrings. */
1596    
1597    if (f != NULL) fprintf(f, "--->");
1598    
1599    PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
1600    PCHARS(post_start, cb->subject, cb->start_match,
1601      cb->current_position - cb->start_match, f);
1602    
1603    PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
1604    
1605    PCHARSV(cb->subject, cb->current_position,
1606      cb->subject_length - cb->current_position, f);
1607    
1608    if (f != NULL) fprintf(f, "\n");
1609    
1610    /* Always print appropriate indicators, with callout number if not already
1611    shown. For automatic callouts, show the pattern offset. */
1612    
1613    if (cb->callout_number == 255)
1614      {
1615      fprintf(outfile, "%+3d ", cb->pattern_position);
1616      if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
1617      }
1618    else
1619      {
1620      if (callout_extra) fprintf(outfile, "    ");
1621        else fprintf(outfile, "%3d ", cb->callout_number);
1622      }
1623    
1624    for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
1625    fprintf(outfile, "^");
1626    
1627    if (post_start > 0)
1628      {
1629      for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
1630      fprintf(outfile, "^");
1631      }
1632    
1633    for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
1634      fprintf(outfile, " ");
1635    
1636    fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
1637      pbuffer + cb->pattern_position);
1638    
1639    fprintf(outfile, "\n");
1640    first_callout = 0;
1641    
1642    if (cb->mark != last_callout_mark)
1643      {
1644      if (cb->mark == NULL)
1645        fprintf(outfile, "Latest Mark: <unset>\n");
1646      else
1647        {
1648        fprintf(outfile, "Latest Mark: ");
1649        PCHARSV(cb->mark, 0, -1, outfile);
1650        putc('\n', outfile);
1651        }
1652      last_callout_mark = cb->mark;
1653      }
1654    
1655    if (cb->callout_data != NULL)
1656      {
1657      int callout_data = *((int *)(cb->callout_data));
1658      if (callout_data != 0)
1659        {
1660        fprintf(outfile, "Callout data = %d\n", callout_data);
1661        return callout_data;
1662        }
1663      }
1664    
1665    return (cb->callout_number != callout_fail_id)? 0 :
1666           (++callout_count >= callout_fail_count)? 1 : 0;
1667    }
1668    
1669    
1670    /*************************************************
1671    *            Local malloc functions              *
1672    *************************************************/
1673    
1674    /* Alternative malloc function, to test functionality and save the size of a
1675    compiled re, which is the first store request that pcre_compile() makes. The
1676    show_malloc variable is set only during matching. */
1677    
1678    static void *new_malloc(size_t size)
1679    {
1680    void *block = malloc(size);
1681    gotten_store = size;
1682    if (first_gotten_store == 0) first_gotten_store = size;
1683    if (show_malloc)
1684      fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
1685    return block;
1686    }
1687    
1688    static void new_free(void *block)
1689    {
1690    if (show_malloc)
1691      fprintf(outfile, "free             %p\n", block);
1692    free(block);
1693    }
1694    
1695    /* For recursion malloc/free, to test stacking calls */
1696    
1697    static void *stack_malloc(size_t size)
1698    {
1699    void *block = malloc(size);
1700    if (show_malloc)
1701      fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1702    return block;
1703    }
1704    
1705    static void stack_free(void *block)
1706    {
1707    if (show_malloc)
1708      fprintf(outfile, "stack_free       %p\n", block);
1709    free(block);
1710    }
1711    
1712    
1713    /*************************************************
1714    *          Call pcre_fullinfo()                  *
1715    *************************************************/
1716    
1717    /* Get one piece of information from the pcre_fullinfo() function. When only
1718    one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1719    value, but the code is defensive.
1720    
1721    Arguments:
1722      re        compiled regex
1723      study     study data
1724      option    PCRE_INFO_xxx option
1725      ptr       where to put the data
1726    
1727    Returns:    0 when OK, < 0 on error
1728    */
1729    
1730    static int
1731    new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1732    {
1733    int rc;
1734    
1735    if (use_pcre16)
1736    #ifdef SUPPORT_PCRE16
1737      rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
1738    #else
1739      rc = PCRE_ERROR_BADMODE;
1740    #endif
1741    else
1742    #ifdef SUPPORT_PCRE8
1743      rc = pcre_fullinfo(re, study, option, ptr);
1744    #else
1745      rc = PCRE_ERROR_BADMODE;
1746    #endif
1747    
1748    if (rc < 0)
1749      {
1750      fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1751        use_pcre16? "16" : "", option);
1752      if (rc == PCRE_ERROR_BADMODE)
1753        fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
1754          "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
1755      }
1756    
1757    return rc;
1758    }
1759    
1760    
1761    
1762    /*************************************************
1763    *             Swap byte functions                *
1764    *************************************************/
1765    
1766    /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
1767    value, respectively.
1768    
1769    Arguments:
1770      value        any number
1771    
1772    Returns:       the byte swapped value
1773    */
1774    
1775    static pcre_uint32
1776    swap_uint32(pcre_uint32 value)
1777    {
1778    return ((value & 0x000000ff) << 24) |
1779           ((value & 0x0000ff00) <<  8) |
1780           ((value & 0x00ff0000) >>  8) |
1781           (value >> 24);
1782    }
1783    
1784    static pcre_uint16
1785    swap_uint16(pcre_uint16 value)
1786    {
1787    return (value >> 8) | (value << 8);
1788    }
1789    
1790    
1791    
1792    /*************************************************
1793    *        Flip bytes in a compiled pattern        *
1794    *************************************************/
1795    
1796    /* This function is called if the 'F' option was present on a pattern that is
1797    to be written to a file. We flip the bytes of all the integer fields in the
1798    regex data block and the study block. In 16-bit mode this also flips relevant
1799    bytes in the pattern itself. This is to make it possible to test PCRE's
1800    ability to reload byte-flipped patterns, e.g. those compiled on a different
1801    architecture. */
1802    
1803    static void
1804    regexflip(pcre *ere, pcre_extra *extra)
1805    {
1806    REAL_PCRE *re = (REAL_PCRE *)ere;
1807    #ifdef SUPPORT_PCRE16
1808    int op;
1809    pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1810    int length = re->name_count * re->name_entry_size;
1811    #ifdef SUPPORT_UTF
1812    BOOL utf = (re->options & PCRE_UTF16) != 0;
1813    BOOL utf16_char = FALSE;
1814    #endif /* SUPPORT_UTF */
1815    #endif /* SUPPORT_PCRE16 */
1816    
1817    /* Always flip the bytes in the main data block and study blocks. */
1818    
1819    re->magic_number = REVERSED_MAGIC_NUMBER;
1820    re->size = swap_uint32(re->size);
1821    re->options = swap_uint32(re->options);
1822    re->flags = swap_uint16(re->flags);
1823    re->top_bracket = swap_uint16(re->top_bracket);
1824    re->top_backref = swap_uint16(re->top_backref);
1825    re->first_char = swap_uint16(re->first_char);
1826    re->req_char = swap_uint16(re->req_char);
1827    re->name_table_offset = swap_uint16(re->name_table_offset);
1828    re->name_entry_size = swap_uint16(re->name_entry_size);
1829    re->name_count = swap_uint16(re->name_count);
1830    
1831    if (extra != NULL)
1832      {
1833      pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1834      rsd->size = swap_uint32(rsd->size);
1835      rsd->flags = swap_uint32(rsd->flags);
1836      rsd->minlength = swap_uint32(rsd->minlength);
1837      }
1838    
1839    /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1840    in the name table, if present, and then in the pattern itself. */
1841    
1842    #ifdef SUPPORT_PCRE16
1843    if (!use_pcre16) return;
1844    
1845    while(TRUE)
1846      {
1847      /* Swap previous characters. */
1848      while (length-- > 0)
1849        {
1850        *ptr = swap_uint16(*ptr);
1851        ptr++;
1852        }
1853    #ifdef SUPPORT_UTF
1854      if (utf16_char)
1855        {
1856        if ((ptr[-1] & 0xfc00) == 0xd800)
1857          {
1858          /* We know that there is only one extra character in UTF-16. */
1859          *ptr = swap_uint16(*ptr);
1860          ptr++;
1861          }
1862        }
1863      utf16_char = FALSE;
1864    #endif /* SUPPORT_UTF */
1865    
1866    fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));    /* Get next opcode. */
1867    
1868    if (*code >= OP_BRA)    length = 0;
1869      {    op = *ptr;
1870      fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);    *ptr++ = swap_uint16(op);
     code += 2;  
     }  
1871    
1872    else switch(*code)    switch (op)
1873      {      {
1874      case OP_END:      case OP_END:
     fprintf(outfile, "    %s\n", OP_names[*code]);  
     fprintf(outfile, "------------------------------------------------------------------\n");  
1875      return;      return;
1876    
1877      case OP_OPT:  #ifdef SUPPORT_UTF
1878      fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);      case OP_CHAR:
1879      code++;      case OP_CHARI:
1880      break;      case OP_NOT:
1881        case OP_NOTI:
     case OP_COND:  
     fprintf(outfile, "%3d Cond", (code[1] << 8) + code[2]);  
     code += 2;  
     break;  
   
     case OP_CREF:  
     fprintf(outfile, " %.2d %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
     case OP_CHARS:  
     charlength = *(++code);  
     fprintf(outfile, "%3d ", charlength);  
     while (charlength-- > 0)  
       if (isprint(c = *(++code))) fprintf(outfile, "%c", c);  
         else fprintf(outfile, "\\x%02x", c);  
     break;  
   
     case OP_KETRMAX:  
     case OP_KETRMIN:  
     case OP_ALT:  
     case OP_KET:  
     case OP_ASSERT:  
     case OP_ASSERT_NOT:  
     case OP_ASSERTBACK:  
     case OP_ASSERTBACK_NOT:  
     case OP_ONCE:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_REVERSE:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
1882      case OP_STAR:      case OP_STAR:
1883      case OP_MINSTAR:      case OP_MINSTAR:
1884      case OP_PLUS:      case OP_PLUS:
1885      case OP_MINPLUS:      case OP_MINPLUS:
1886      case OP_QUERY:      case OP_QUERY:
1887      case OP_MINQUERY:      case OP_MINQUERY:
     case OP_TYPESTAR:  
     case OP_TYPEMINSTAR:  
     case OP_TYPEPLUS:  
     case OP_TYPEMINPLUS:  
     case OP_TYPEQUERY:  
     case OP_TYPEMINQUERY:  
     if (*code >= OP_TYPESTAR)  
       fprintf(outfile, "    %s", OP_names[code[1]]);  
     else if (isprint(c = code[1])) fprintf(outfile, "    %c", c);  
       else fprintf(outfile, "    \\x%02x", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_EXACT:  
1888      case OP_UPTO:      case OP_UPTO:
1889      case OP_MINUPTO:      case OP_MINUPTO:
1890      if (isprint(c = code[3])) fprintf(outfile, "    %c{", c);      case OP_EXACT:
1891        else fprintf(outfile, "    \\x%02x{", c);      case OP_POSSTAR:
1892      if (*code != OP_EXACT) fprintf(outfile, ",");      case OP_POSPLUS:
1893      fprintf(outfile, "%d}", (code[1] << 8) + code[2]);      case OP_POSQUERY:
1894      if (*code == OP_MINUPTO) fprintf(outfile, "?");      case OP_POSUPTO:
1895      code += 3;      case OP_STARI:
1896      break;      case OP_MINSTARI:
1897        case OP_PLUSI:
1898      case OP_TYPEEXACT:      case OP_MINPLUSI:
1899      case OP_TYPEUPTO:      case OP_QUERYI:
1900      case OP_TYPEMINUPTO:      case OP_MINQUERYI:
1901      fprintf(outfile, "    %s{", OP_names[code[3]]);      case OP_UPTOI:
1902      if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");      case OP_MINUPTOI:
1903      fprintf(outfile, "%d}", (code[1] << 8) + code[2]);      case OP_EXACTI:
1904      if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");      case OP_POSSTARI:
1905      code += 3;      case OP_POSPLUSI:
1906      break;      case OP_POSQUERYI:
1907        case OP_POSUPTOI:
     case OP_NOT:  
     if (isprint(c = *(++code))) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     break;  
   
1908      case OP_NOTSTAR:      case OP_NOTSTAR:
1909      case OP_NOTMINSTAR:      case OP_NOTMINSTAR:
1910      case OP_NOTPLUS:      case OP_NOTPLUS:
1911      case OP_NOTMINPLUS:      case OP_NOTMINPLUS:
1912      case OP_NOTQUERY:      case OP_NOTQUERY:
1913      case OP_NOTMINQUERY:      case OP_NOTMINQUERY:
     if (isprint(c = code[1])) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_NOTEXACT:  
1914      case OP_NOTUPTO:      case OP_NOTUPTO:
1915      case OP_NOTMINUPTO:      case OP_NOTMINUPTO:
1916      if (isprint(c = code[3])) fprintf(outfile, "    [^%c]{", c);      case OP_NOTEXACT:
1917        else fprintf(outfile, "    [^\\x%02x]{", c);      case OP_NOTPOSSTAR:
1918      if (*code != OP_NOTEXACT) fprintf(outfile, ",");      case OP_NOTPOSPLUS:
1919      fprintf(outfile, "%d}", (code[1] << 8) + code[2]);      case OP_NOTPOSQUERY:
1920      if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");      case OP_NOTPOSUPTO:
1921      code += 3;      case OP_NOTSTARI:
1922      break;      case OP_NOTMINSTARI:
1923        case OP_NOTPLUSI:
1924        case OP_NOTMINPLUSI:
1925        case OP_NOTQUERYI:
1926        case OP_NOTMINQUERYI:
1927        case OP_NOTUPTOI:
1928        case OP_NOTMINUPTOI:
1929        case OP_NOTEXACTI:
1930        case OP_NOTPOSSTARI:
1931        case OP_NOTPOSPLUSI:
1932        case OP_NOTPOSQUERYI:
1933        case OP_NOTPOSUPTOI:
1934        if (utf) utf16_char = TRUE;
1935    #endif
1936        /* Fall through. */
1937    
1938      case OP_REF:      default:
1939      fprintf(outfile, "    \\%d", *(++code));      length = OP_lengths16[op] - 1;
1940      code++;      break;
     goto CLASS_REF_REPEAT;  
1941    
1942      case OP_CLASS:      case OP_CLASS:
1943        case OP_NCLASS:
1944        /* Skip the character bit map. */
1945        ptr += 32/sizeof(pcre_uint16);
1946        length = 0;
1947        break;
1948    
1949        case OP_XCLASS:
1950        /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
1951        if (LINK_SIZE > 1)
1952          length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
1953            - (1 + LINK_SIZE + 1));
1954        else
1955          length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
1956    
1957        /* Reverse the size of the XCLASS instance. */
1958        *ptr = swap_uint16(*ptr);
1959        ptr++;
1960        if (LINK_SIZE > 1)
1961        {        {
1962        int i, min, max;        *ptr = swap_uint16(*ptr);
1963        code++;        ptr++;
1964        fprintf(outfile, "    [");        }
1965    
1966        for (i = 0; i < 256; i++)      op = *ptr;
1967          {      *ptr = swap_uint16(op);
1968          if ((code[i/8] & (1 << (i&7))) != 0)      ptr++;
1969            {      if ((op & XCL_MAP) != 0)
1970            int j;        {
1971            for (j = i+1; j < 256; j++)        /* Skip the character bit map. */
1972              if ((code[j/8] & (1 << (j&7))) == 0) break;        ptr += 32/sizeof(pcre_uint16);
1973            if (i == '-' || i == ']') fprintf(outfile, "\\");        length -= 32/sizeof(pcre_uint16);
1974            if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);        }
1975            if (--j > i)      break;
1976              {      }
1977              fprintf(outfile, "-");    }
1978              if (j == '-' || j == ']') fprintf(outfile, "\\");  /* Control should never reach here in 16 bit mode. */
1979              if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);  #endif /* SUPPORT_PCRE16 */
1980              }  }
           i = j;  
           }  
         }  
       fprintf(outfile, "]");  
       code += 32;  
1981    
       CLASS_REF_REPEAT:  
1982    
       switch(*code)  
         {  
         case OP_CRSTAR:  
         case OP_CRMINSTAR:  
         case OP_CRPLUS:  
         case OP_CRMINPLUS:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         fprintf(outfile, "%s", OP_names[*code]);  
         break;  
1983    
1984          case OP_CRRANGE:  /*************************************************
1985          case OP_CRMINRANGE:  *        Check match or recursion limit          *
1986          min = (code[1] << 8) + code[2];  *************************************************/
         max = (code[3] << 8) + code[4];  
         if (max == 0) fprintf(outfile, "{%d,}", min);  
         else fprintf(outfile, "{%d,%d}", min, max);  
         if (*code == OP_CRMINRANGE) fprintf(outfile, "?");  
         code += 4;  
         break;  
1987    
1988          default:  static int
1989          code--;  check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1990          }    int start_offset, int options, int *use_offsets, int use_size_offsets,
1991        }    int flag, unsigned long int *limit, int errnumber, const char *msg)
1992      break;  {
1993    int count;
1994    int min = 0;
1995    int mid = 64;
1996    int max = -1;
1997    
1998      /* Anything else is just a one-node item */  extra->flags |= flag;
1999    
2000      default:  for (;;)
2001      fprintf(outfile, "    %s", OP_names[*code]);    {
2002      break;    *limit = mid;
2003    
2004      PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2005        use_offsets, use_size_offsets);
2006    
2007      if (count == errnumber)
2008        {
2009        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2010        min = mid;
2011        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
2012      }      }
2013    
2014    code++;    else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
2015    fprintf(outfile, "\n");                           count == PCRE_ERROR_PARTIAL)
2016        {
2017        if (mid == min + 1)
2018          {
2019          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
2020          break;
2021          }
2022        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2023        max = mid;
2024        mid = (min + mid)/2;
2025        }
2026      else break;    /* Some other error */
2027    }    }
2028    
2029    extra->flags &= ~flag;
2030    return count;
2031  }  }
2032    
2033    
2034    
2035  /* Character string printing function. */  /*************************************************
2036    *         Case-independent strncmp() function    *
2037    *************************************************/
2038    
2039    /*
2040    Arguments:
2041      s         first string
2042      t         second string
2043      n         number of characters to compare
2044    
2045    Returns:    < 0, = 0, or > 0, according to the comparison
2046    */
2047    
2048  static void pchars(unsigned char *p, int length)  static int
2049    strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2050  {  {
2051  int c;  while (n--)
2052  while (length-- > 0)    {
2053    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);    int c = tolower(*s++) - tolower(*t++);
2054      else fprintf(outfile, "\\x%02x", c);    if (c) return c;
2055      }
2056    return 0;
2057  }  }
2058    
2059    
2060    
2061  /* Alternative malloc function, to test functionality and show the size of the  /*************************************************
2062  compiled re. */  *         Check newline indicator                *
2063    *************************************************/
2064    
2065  static void *new_malloc(size_t size)  /* This is used both at compile and run-time to check for <xxx> escapes. Print
2066    a message and return 0 if there is no match.
2067    
2068    Arguments:
2069      p           points after the leading '<'
2070      f           file for error message
2071    
2072    Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
2073    */
2074    
2075    static int
2076    check_newline(pcre_uint8 *p, FILE *f)
2077    {
2078    if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2079    if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2080    if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2081    if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2082    if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2083    if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2084    if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2085    fprintf(f, "Unknown newline type at: <%s\n", p);
2086    return 0;
2087    }
2088    
2089    
2090    
2091    /*************************************************
2092    *             Usage function                     *
2093    *************************************************/
2094    
2095    static void
2096    usage(void)
2097  {  {
2098  if (log_store)  printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
2099    fprintf(outfile, "Memory allocation request: %d (code space %d)\n",  printf("Input and output default to stdin and stdout.\n");
2100      (int)size, (int)size - offsetof(real_pcre, code[0]));  #ifdef SUPPORT_LIBREADLINE
2101  return malloc(size);  printf("If input is a terminal, readline() is used to read from it.\n");
2102    #else
2103    printf("This version of pcretest is not linked with readline().\n");
2104    #endif
2105    printf("\nOptions:\n");
2106    #ifdef SUPPORT_PCRE16
2107    printf("  -16      use the 16-bit library\n");
2108    #endif
2109    printf("  -b       show compiled code\n");
2110    printf("  -C       show PCRE compile-time options and exit\n");
2111    printf("  -C arg   show a specific compile-time option\n");
2112    printf("           and exit with its value. The arg can be:\n");
2113    printf("     linksize     internal link size [2, 3, 4]\n");
2114    printf("     pcre8        8 bit library support enabled [0, 1]\n");
2115    printf("     pcre16       16 bit library support enabled [0, 1]\n");
2116    printf("     utf          Unicode Transformation Format supported [0, 1]\n");
2117    printf("     ucp          Unicode Properties supported [0, 1]\n");
2118    printf("     jit          Just-in-time compiler supported [0, 1]\n");
2119    printf("     newline      Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2120    printf("  -d       debug: show compiled code and information (-b and -i)\n");
2121    #if !defined NODFA
2122    printf("  -dfa     force DFA matching for all subjects\n");
2123    #endif
2124    printf("  -help    show usage information\n");
2125    printf("  -i       show information about compiled patterns\n"
2126           "  -M       find MATCH_LIMIT minimum for each subject\n"
2127           "  -m       output memory used information\n"
2128           "  -o <n>   set size of offsets vector to <n>\n");
2129    #if !defined NOPOSIX
2130    printf("  -p       use POSIX interface\n");
2131    #endif
2132    printf("  -q       quiet: do not output PCRE version number at start\n");
2133    printf("  -S <n>   set stack size to <n> megabytes\n");
2134    printf("  -s       force each pattern to be studied at basic level\n"
2135           "  -s+      force each pattern to be studied, using JIT if available\n"
2136           "  -t       time compilation and execution\n");
2137    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
2138    printf("  -tm      time execution (matching) only\n");
2139    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
2140  }  }
2141    
2142    
2143    
2144    /*************************************************
2145    *                Main Program                    *
2146    *************************************************/
2147    
2148  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
2149  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
2150  options, followed by a set of test data, terminated by an empty line. */  options, followed by a set of test data, terminated by an empty line. */
# Line 289  options, followed by a set of test data, Line 2152  options, followed by a set of test data,
2152  int main(int argc, char **argv)  int main(int argc, char **argv)
2153  {  {
2154  FILE *infile = stdin;  FILE *infile = stdin;
2155    const char *version;
2156  int options = 0;  int options = 0;
2157  int study_options = 0;  int study_options = 0;
2158    int default_find_match_limit = FALSE;
2159  int op = 1;  int op = 1;
2160  int timeit = 0;  int timeit = 0;
2161    int timeitm = 0;
2162  int showinfo = 0;  int showinfo = 0;
2163  int showstore = 0;  int showstore = 0;
2164    int force_study = -1;
2165    int force_study_options = 0;
2166    int quiet = 0;
2167    int size_offsets = 45;
2168    int size_offsets_max;
2169    int *offsets = NULL;
2170    #if !defined NOPOSIX
2171  int posix = 0;  int posix = 0;
2172    #endif
2173  int debug = 0;  int debug = 0;
2174  int done = 0;  int done = 0;
2175  unsigned char buffer[30000];  int all_use_dfa = 0;
2176  unsigned char dbuffer[1024];  int yield = 0;
2177    int stack_size;
2178    
2179    pcre_jit_stack *jit_stack = NULL;
2180    
2181    /* These vectors store, end-to-end, a list of zero-terminated captured
2182    substring names, each list itself being terminated by an empty name. Assume
2183    that 1024 is plenty long enough for the few names we'll be testing. It is
2184    easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
2185    for the actual memory, to ensure alignment. */
2186    
2187    pcre_uint16 copynames[1024];
2188    pcre_uint16 getnames[1024];
2189    
2190    #ifdef SUPPORT_PCRE16
2191    pcre_uint16 *cn16ptr;
2192    pcre_uint16 *gn16ptr;
2193    #endif
2194    
2195    #ifdef SUPPORT_PCRE8
2196    pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2197    pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2198    pcre_uint8 *cn8ptr;
2199    pcre_uint8 *gn8ptr;
2200    #endif
2201    
2202  /* Static so that new_malloc can use it. */  /* Get buffers from malloc() so that valgrind will check their misuse when
2203    debugging. They grow automatically when very long lines are read. The 16-bit
2204    buffer (buffer16) is obtained only if needed. */
2205    
2206    buffer = (pcre_uint8 *)malloc(buffer_size);
2207    dbuffer = (pcre_uint8 *)malloc(buffer_size);
2208    pbuffer = (pcre_uint8 *)malloc(buffer_size);
2209    
2210    /* The outfile variable is static so that new_malloc can use it. */
2211    
2212  outfile = stdout;  outfile = stdout;
2213    
2214    /* The following  _setmode() stuff is some Windows magic that tells its runtime
2215    library to translate CRLF into a single LF character. At least, that's what
2216    I've been told: never having used Windows I take this all on trust. Originally
2217    it set 0x8000, but then I was advised that _O_BINARY was better. */
2218    
2219    #if defined(_WIN32) || defined(WIN32)
2220    _setmode( _fileno( stdout ), _O_BINARY );
2221    #endif
2222    
2223    /* Get the version number: both pcre_version() and pcre16_version() give the
2224    same answer. We just need to ensure that we call one that is available. */
2225    
2226    #ifdef SUPPORT_PCRE8
2227    version = pcre_version();
2228    #else
2229    version = pcre16_version();
2230    #endif
2231    
2232  /* Scan options */  /* Scan options */
2233    
2234  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
2235    {    {
2236    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    pcre_uint8 *endptr;
2237      showstore = 1;  
2238    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    if (strcmp(argv[op], "-m") == 0) showstore = 1;
2239      else if (strcmp(argv[op], "-s") == 0) force_study = 0;
2240      else if (strcmp(argv[op], "-s+") == 0)
2241        {
2242        force_study = 1;
2243        force_study_options = PCRE_STUDY_JIT_COMPILE;
2244        }
2245      else if (strcmp(argv[op], "-16") == 0)
2246        {
2247    #ifdef SUPPORT_PCRE16
2248        use_pcre16 = 1;
2249    #else
2250        printf("** This version of PCRE was built without 16-bit support\n");
2251        exit(1);
2252    #endif
2253        }
2254      else if (strcmp(argv[op], "-q") == 0) quiet = 1;
2255      else if (strcmp(argv[op], "-b") == 0) debug = 1;
2256    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
2257    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
2258      else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
2259    #if !defined NODFA
2260      else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
2261    #endif
2262      else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
2263          ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2264            *endptr == 0))
2265        {
2266        op++;
2267        argc--;
2268        }
2269      else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
2270        {
2271        int both = argv[op][2] == 0;
2272        int temp;
2273        if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2274                         *endptr == 0))
2275          {
2276          timeitm = temp;
2277          op++;
2278          argc--;
2279          }
2280        else timeitm = LOOPREPEAT;
2281        if (both) timeit = timeitm;
2282        }
2283      else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
2284          ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2285            *endptr == 0))
2286        {
2287    #if defined(_WIN32) || defined(WIN32) || defined(__minix)
2288        printf("PCRE: -S not supported on this OS\n");
2289        exit(1);
2290    #else
2291        int rc;
2292        struct rlimit rlim;
2293        getrlimit(RLIMIT_STACK, &rlim);
2294        rlim.rlim_cur = stack_size * 1024 * 1024;
2295        rc = setrlimit(RLIMIT_STACK, &rlim);
2296        if (rc != 0)
2297          {
2298        printf("PCRE: setrlimit() failed with error %d\n", rc);
2299        exit(1);
2300          }
2301        op++;
2302        argc--;
2303    #endif
2304        }
2305    #if !defined NOPOSIX
2306    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
2307    #endif
2308      else if (strcmp(argv[op], "-C") == 0)
2309        {
2310        int rc;
2311        unsigned long int lrc;
2312    
2313        if (argc > 2)
2314          {
2315          if (strcmp(argv[op + 1], "linksize") == 0)
2316            {
2317            (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2318            printf("%d\n", rc);
2319            yield = rc;
2320            goto EXIT;
2321            }
2322          if (strcmp(argv[op + 1], "pcre8") == 0)
2323            {
2324    #ifdef SUPPORT_PCRE8
2325            printf("1\n");
2326            yield = 1;
2327    #else
2328            printf("0\n");
2329            yield = 0;
2330    #endif
2331            goto EXIT;
2332            }
2333          if (strcmp(argv[op + 1], "pcre16") == 0)
2334            {
2335    #ifdef SUPPORT_PCRE16
2336            printf("1\n");
2337            yield = 1;
2338    #else
2339            printf("0\n");
2340            yield = 0;
2341    #endif
2342            goto EXIT;
2343            }
2344          if (strcmp(argv[op + 1], "utf") == 0)
2345            {
2346    #ifdef SUPPORT_PCRE8
2347            (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2348            printf("%d\n", rc);
2349            yield = rc;
2350    #else
2351            (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2352            printf("%d\n", rc);
2353            yield = rc;
2354    #endif
2355            goto EXIT;
2356            }
2357          if (strcmp(argv[op + 1], "ucp") == 0)
2358            {
2359            (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2360            printf("%d\n", rc);
2361            yield = rc;
2362            goto EXIT;
2363            }
2364          if (strcmp(argv[op + 1], "jit") == 0)
2365            {
2366            (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2367            printf("%d\n", rc);
2368            yield = rc;
2369            goto EXIT;
2370            }
2371          if (strcmp(argv[op + 1], "newline") == 0)
2372            {
2373            (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2374            /* Note that these values are always the ASCII values, even
2375            in EBCDIC environments. CR is 13 and NL is 10. */
2376            printf("%s\n", (rc == 13)? "CR" :
2377              (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2378              (rc == -2)? "ANYCRLF" :
2379              (rc == -1)? "ANY" : "???");
2380            goto EXIT;
2381            }
2382          printf("Unknown -C option: %s\n", argv[op + 1]);
2383          goto EXIT;
2384          }
2385    
2386        printf("PCRE version %s\n", version);
2387        printf("Compiled with\n");
2388    
2389    /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2390    are set, either both UTFs are supported or both are not supported. */
2391    
2392    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2393        printf("  8-bit and 16-bit support\n");
2394        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2395        if (rc)
2396          printf("  UTF-8 and UTF-16 support\n");
2397        else
2398          printf("  No UTF-8 or UTF-16 support\n");
2399    #elif defined SUPPORT_PCRE8
2400        printf("  8-bit support only\n");
2401        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2402        printf("  %sUTF-8 support\n", rc? "" : "No ");
2403    #else
2404        printf("  16-bit support only\n");
2405        (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2406        printf("  %sUTF-16 support\n", rc? "" : "No ");
2407    #endif
2408    
2409        (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2410        printf("  %sUnicode properties support\n", rc? "" : "No ");
2411        (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2412        if (rc)
2413          {
2414          const char *arch;
2415          (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, &arch);
2416          printf("  Just-in-time compiler support: %s\n", arch);
2417          }
2418        else
2419          printf("  No just-in-time compiler support\n");
2420        (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2421        /* Note that these values are always the ASCII values, even
2422        in EBCDIC environments. CR is 13 and NL is 10. */
2423        printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
2424          (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2425          (rc == -2)? "ANYCRLF" :
2426          (rc == -1)? "ANY" : "???");
2427        (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
2428        printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
2429                                         "all Unicode newlines");
2430        (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2431        printf("  Internal link size = %d\n", rc);
2432        (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
2433        printf("  POSIX malloc threshold = %d\n", rc);
2434        (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
2435        printf("  Default match limit = %ld\n", lrc);
2436        (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
2437        printf("  Default recursion depth limit = %ld\n", lrc);
2438        (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
2439        printf("  Match recursion uses %s", rc? "stack" : "heap");
2440        if (showstore)
2441          {
2442          PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
2443          printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
2444          }
2445        printf("\n");
2446        goto EXIT;
2447        }
2448      else if (strcmp(argv[op], "-help") == 0 ||
2449               strcmp(argv[op], "--help") == 0)
2450        {
2451        usage();
2452        goto EXIT;
2453        }
2454    else    else
2455      {      {
2456      printf("*** Unknown option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
2457      printf("Usage: pcretest [-d] [-i] [-p] [-s] [-t] [<input> [<output>]]\n");      usage();
2458      printf("  -d   debug: show compiled code; implies -i\n"      yield = 1;
2459             "  -i   show information about compiled pattern\n"      goto EXIT;
            "  -p   use POSIX interface\n"  
            "  -s   output store information\n"  
            "  -t   time compilation and execution\n");  
     return 1;  
2460      }      }
2461    op++;    op++;
2462    argc--;    argc--;
2463    }    }
2464    
2465    /* Get the store for the offsets vector, and remember what it was */
2466    
2467    size_offsets_max = size_offsets;
2468    offsets = (int *)malloc(size_offsets_max * sizeof(int));
2469    if (offsets == NULL)
2470      {
2471      printf("** Failed to get %d bytes of memory for offsets vector\n",
2472        (int)(size_offsets_max * sizeof(int)));
2473      yield = 1;
2474      goto EXIT;
2475      }
2476    
2477  /* Sort out the input and output files */  /* Sort out the input and output files */
2478    
2479  if (argc > 1)  if (argc > 1)
2480    {    {
2481    infile = fopen(argv[op], "r");    infile = fopen(argv[op], INPUT_MODE);
2482    if (infile == NULL)    if (infile == NULL)
2483      {      {
2484      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
2485      return 1;      yield = 1;
2486        goto EXIT;
2487      }      }
2488    }    }
2489    
2490  if (argc > 2)  if (argc > 2)
2491    {    {
2492    outfile = fopen(argv[op+1], "w");    outfile = fopen(argv[op+1], OUTPUT_MODE);
2493    if (outfile == NULL)    if (outfile == NULL)
2494      {      {
2495      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
2496      return 1;      yield = 1;
2497        goto EXIT;
2498      }      }
2499    }    }
2500    
2501  /* Set alternative malloc function */  /* Set alternative malloc function */
2502    
2503    #ifdef SUPPORT_PCRE8
2504  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
2505    pcre_free = new_free;
2506    pcre_stack_malloc = stack_malloc;
2507    pcre_stack_free = stack_free;
2508    #endif
2509    
2510    #ifdef SUPPORT_PCRE16
2511    pcre16_malloc = new_malloc;
2512    pcre16_free = new_free;
2513    pcre16_stack_malloc = stack_malloc;
2514    pcre16_stack_free = stack_free;
2515    #endif
2516    
2517  /* Heading line, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
2518    
2519  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
2520    
2521  /* Main loop */  /* Main loop */
2522    
# Line 366  while (!done) Line 2524  while (!done)
2524    {    {
2525    pcre *re = NULL;    pcre *re = NULL;
2526    pcre_extra *extra = NULL;    pcre_extra *extra = NULL;
2527    
2528    #if !defined NOPOSIX  /* There are still compilers that require no indent */
2529    regex_t preg;    regex_t preg;
2530      int do_posix = 0;
2531    #endif
2532    
2533    const char *error;    const char *error;
2534    unsigned char *p, *pp, *ppp;    pcre_uint8 *markptr;
2535    unsigned const char *tables = NULL;    pcre_uint8 *p, *pp, *ppp;
2536      pcre_uint8 *to_file = NULL;
2537      const pcre_uint8 *tables = NULL;
2538      unsigned long int get_options;
2539      unsigned long int true_size, true_study_size = 0;
2540      size_t size, regex_gotten_store;
2541      int do_allcaps = 0;
2542      int do_mark = 0;
2543    int do_study = 0;    int do_study = 0;
2544      int no_force_study = 0;
2545    int do_debug = debug;    int do_debug = debug;
2546      int do_G = 0;
2547      int do_g = 0;
2548    int do_showinfo = showinfo;    int do_showinfo = showinfo;
2549    int do_posix = 0;    int do_showrest = 0;
2550    int erroroffset, len, delimiter;    int do_showcaprest = 0;
2551      int do_flip = 0;
2552      int erroroffset, len, delimiter, poffset;
2553    
2554      use_utf = 0;
2555      debug_lengths = 1;
2556    
2557    if (infile == stdin) printf("  re> ");    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
   if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;  
2558    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2559      fflush(outfile);
2560    
2561    p = buffer;    p = buffer;
2562    while (isspace(*p)) p++;    while (isspace(*p)) p++;
2563    if (*p == 0) continue;    if (*p == 0) continue;
2564    
2565    /* Get the delimiter and seek the end of the pattern; if is isn't    /* See if the pattern is to be loaded pre-compiled from a file. */
2566    complete, read more. */  
2567      if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2568        {
2569        pcre_uint32 magic;
2570        pcre_uint8 sbuf[8];
2571        FILE *f;
2572    
2573        p++;
2574        if (*p == '!')
2575          {
2576          do_debug = TRUE;
2577          do_showinfo = TRUE;
2578          p++;
2579          }
2580    
2581        pp = p + (int)strlen((char *)p);
2582        while (isspace(pp[-1])) pp--;
2583        *pp = 0;
2584    
2585        f = fopen((char *)p, "rb");
2586        if (f == NULL)
2587          {
2588          fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
2589          continue;
2590          }
2591    
2592        first_gotten_store = 0;
2593        if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2594    
2595        true_size =
2596          (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
2597        true_study_size =
2598          (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2599    
2600        re = (pcre *)new_malloc(true_size);
2601        regex_gotten_store = first_gotten_store;
2602    
2603        if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2604    
2605        magic = ((REAL_PCRE *)re)->magic_number;
2606        if (magic != MAGIC_NUMBER)
2607          {
2608          if (swap_uint32(magic) == MAGIC_NUMBER)
2609            {
2610            do_flip = 1;
2611            }
2612          else
2613            {
2614            fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
2615            fclose(f);
2616            continue;
2617            }
2618          }
2619    
2620        /* We hide the byte-invert info for little and big endian tests. */
2621        fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2622          do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
2623    
2624        /* Now see if there is any following study data. */
2625    
2626        if (true_study_size != 0)
2627          {
2628          pcre_study_data *psd;
2629    
2630          extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
2631          extra->flags = PCRE_EXTRA_STUDY_DATA;
2632    
2633          psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
2634          extra->study_data = psd;
2635    
2636          if (fread(psd, 1, true_study_size, f) != true_study_size)
2637            {
2638            FAIL_READ:
2639            fprintf(outfile, "Failed to read data from %s\n", p);
2640            if (extra != NULL)
2641              {
2642              PCRE_FREE_STUDY(extra);
2643              }
2644            if (re != NULL) new_free(re);
2645            fclose(f);
2646            continue;
2647            }
2648          fprintf(outfile, "Study data loaded from %s\n", p);
2649          do_study = 1;     /* To get the data output if requested */
2650          }
2651        else fprintf(outfile, "No study data\n");
2652    
2653        /* Flip the necessary bytes. */
2654        if (do_flip)
2655          {
2656          int rc;
2657          PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
2658          if (rc == PCRE_ERROR_BADMODE)
2659            {
2660            /* Simulate the result of the function call below. */
2661            fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2662              use_pcre16? "16" : "", PCRE_INFO_OPTIONS);
2663            fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
2664              "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
2665            continue;
2666            }
2667          }
2668    
2669        /* Need to know if UTF-8 for printing data strings. */
2670    
2671        if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0) continue;
2672        use_utf = (get_options & PCRE_UTF8) != 0;
2673    
2674        fclose(f);
2675        goto SHOW_INFO;
2676        }
2677    
2678      /* In-line pattern (the usual case). Get the delimiter and seek the end of
2679      the pattern; if it isn't complete, read more. */
2680    
2681    delimiter = *p++;    delimiter = *p++;
2682    
2683    if (isalnum(delimiter) || delimiter == '\\')    if (isalnum(delimiter) || delimiter == '\\')
2684      {      {
2685      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");      fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
2686      goto SKIP_DATA;      goto SKIP_DATA;
2687      }      }
2688    
2689    pp = p;    pp = p;
2690      poffset = (int)(p - buffer);
2691    
2692    for(;;)    for(;;)
2693      {      {
# Line 406  while (!done) Line 2698  while (!done)
2698        pp++;        pp++;
2699        }        }
2700      if (*pp != 0) break;      if (*pp != 0) break;
2701        if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
     len = sizeof(buffer) - (pp - buffer);  
     if (len < 256)  
       {  
       fprintf(outfile, "** Expression too long - missing delimiter?\n");  
       goto SKIP_DATA;  
       }  
   
     if (infile == stdin) printf("    > ");  
     if (fgets((char *)pp, len, infile) == NULL)  
2702        {        {
2703        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
2704        done = 1;        done = 1;
# Line 424  while (!done) Line 2707  while (!done)
2707      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
2708      }      }
2709    
2710      /* The buffer may have moved while being extended; reset the start of data
2711      pointer to the correct relative point in the buffer. */
2712    
2713      p = buffer + poffset;
2714    
2715    /* If the first character after the delimiter is backslash, make    /* If the first character after the delimiter is backslash, make
2716    the pattern end with backslash. This is purely to provide a way    the pattern end with backslash. This is purely to provide a way
2717    of testing for the error message when a pattern ends with backslash. */    of testing for the error message when a pattern ends with backslash. */
2718    
2719    if (pp[1] == '\\') *pp++ = '\\';    if (pp[1] == '\\') *pp++ = '\\';
2720    
2721    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter, and save a copy of the pattern
2722      for callouts. */
2723    
2724    *pp++ = 0;    *pp++ = 0;
2725      strcpy((char *)pbuffer, (char *)p);
2726    
2727    /* Look for options after final delimiter */    /* Look for options after final delimiter */
2728    
# Line 444  while (!done) Line 2734  while (!done)
2734      {      {
2735      switch (*pp++)      switch (*pp++)
2736        {        {
2737          case 'f': options |= PCRE_FIRSTLINE; break;
2738          case 'g': do_g = 1; break;
2739        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
2740        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
2741        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
2742        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
2743    
2744          case '+':
2745          if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2746          break;
2747    
2748          case '=': do_allcaps = 1; break;
2749        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
2750          case 'B': do_debug = 1; break;
2751          case 'C': options |= PCRE_AUTO_CALLOUT; break;
2752        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
2753        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
2754          case 'F': do_flip = 1; break;
2755          case 'G': do_G = 1; break;
2756        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
2757          case 'J': options |= PCRE_DUPNAMES; break;
2758          case 'K': do_mark = 1; break;
2759        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
2760          case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
2761    
2762    #if !defined NOPOSIX
2763        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
2764        case 'S': do_study = 1; break;  #endif
2765    
2766          case 'S':
2767          if (do_study == 0)
2768            {
2769            do_study = 1;
2770            if (*pp == '+')
2771              {
2772              study_options |= PCRE_STUDY_JIT_COMPILE;
2773              pp++;
2774              }
2775            }
2776          else
2777            {
2778            do_study = 0;
2779            no_force_study = 1;
2780            }
2781          break;
2782    
2783        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
2784          case 'W': options |= PCRE_UCP; break;
2785        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
2786          case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2787          case 'Z': debug_lengths = 0; break;
2788          case '8': options |= PCRE_UTF8; use_utf = 1; break;
2789          case '?': options |= PCRE_NO_UTF8_CHECK; break;
2790    
2791          case 'T':
2792          switch (*pp++)
2793            {
2794            case '0': tables = tables0; break;
2795            case '1': tables = tables1; break;
2796    
2797            case '\r':
2798            case '\n':
2799            case ' ':
2800            case 0:
2801            fprintf(outfile, "** Missing table number after /T\n");
2802            goto SKIP_DATA;
2803    
2804            default:
2805            fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2806            goto SKIP_DATA;
2807            }
2808          break;
2809    
2810        case 'L':        case 'L':
2811        ppp = pp;        ppp = pp;
2812        while (*ppp != '\n' && *ppp != ' ') ppp++;        /* The '\r' test here is so that it works on Windows. */
2813          /* The '0' test is just in case this is an unterminated line. */
2814          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
2815        *ppp = 0;        *ppp = 0;
2816        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
2817          {          {
2818          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
2819          goto SKIP_DATA;          goto SKIP_DATA;
2820          }          }
2821        tables = pcre_maketables();        locale_set = 1;
2822          tables = PCRE_MAKETABLES;
2823        pp = ppp;        pp = ppp;
2824        break;        break;
2825    
2826        case '\n': case ' ': break;        case '>':
2827          to_file = pp;
2828          while (*pp != 0) pp++;
2829          while (isspace(pp[-1])) pp--;
2830          *pp = 0;
2831          break;
2832    
2833          case '<':
2834            {
2835            if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2836              {
2837              options |= PCRE_JAVASCRIPT_COMPAT;
2838              pp += 3;
2839              }
2840            else
2841              {
2842              int x = check_newline(pp, outfile);
2843              if (x == 0) goto SKIP_DATA;
2844              options |= x;
2845              while (*pp++ != '>');
2846              }
2847            }
2848          break;
2849    
2850          case '\r':                      /* So that it works in Windows */
2851          case '\n':
2852          case ' ':
2853          break;
2854    
2855        default:        default:
2856        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
2857        goto SKIP_DATA;        goto SKIP_DATA;
# Line 481  while (!done) Line 2860  while (!done)
2860    
2861    /* Handle compiling via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
2862    timing, showing, or debugging options, nor the ability to pass over    timing, showing, or debugging options, nor the ability to pass over
2863    local character tables. */    local character tables. Neither does it have 16-bit support. */
2864    
2865    #if !defined NOPOSIX
2866    if (posix || do_posix)    if (posix || do_posix)
2867      {      {
2868      int rc;      int rc;
2869      int cflags = 0;      int cflags = 0;
2870    
2871      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
2872      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
2873        if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
2874        if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
2875        if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
2876        if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2877        if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2878    
2879        first_gotten_store = 0;
2880      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
2881    
2882      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 496  while (!done) Line 2884  while (!done)
2884    
2885      if (rc != 0)      if (rc != 0)
2886        {        {
2887        (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));        (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2888        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
2889        goto SKIP_DATA;        goto SKIP_DATA;
2890        }        }
# Line 505  while (!done) Line 2893  while (!done)
2893    /* Handle compiling via the native interface */    /* Handle compiling via the native interface */
2894    
2895    else    else
2896    #endif  /* !defined NOPOSIX */
2897    
2898      {      {
2899      if (timeit)      /* In 16-bit mode, convert the input. */
2900    
2901    #ifdef SUPPORT_PCRE16
2902        if (use_pcre16)
2903          {
2904          switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
2905            {
2906            case -1:
2907            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2908              "converted to UTF-16\n");
2909            goto SKIP_DATA;
2910    
2911            case -2:
2912            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
2913              "cannot be converted to UTF-16\n");
2914            goto SKIP_DATA;
2915    
2916            case -3: /* "Impossible error" when to16 is called arg1 FALSE */
2917            fprintf(outfile, "**Failed: character value greater than 0xffff "
2918              "cannot be converted to 16-bit in non-UTF mode\n");
2919            goto SKIP_DATA;
2920    
2921            default:
2922            break;
2923            }
2924          p = (pcre_uint8 *)buffer16;
2925          }
2926    #endif
2927    
2928        /* Compile many times when timing */
2929    
2930        if (timeit > 0)
2931        {        {
2932        register int i;        register int i;
2933        clock_t time_taken;        clock_t time_taken;
2934        clock_t start_time = clock();        clock_t start_time = clock();
2935        for (i = 0; i < LOOPREPEAT; i++)        for (i = 0; i < timeit; i++)
2936          {          {
2937          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2938          if (re != NULL) free(re);          if (re != NULL) free(re);
2939          }          }
2940        time_taken = clock() - start_time;        time_taken = clock() - start_time;
2941        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.4f milliseconds\n",
2942          ((double)time_taken * 1000.0) /          (((double)time_taken * 1000.0) / (double)timeit) /
2943          ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));            (double)CLOCKS_PER_SEC);
2944        }        }
2945    
2946      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      first_gotten_store = 0;
2947        PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2948    
2949      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
2950      if non-interactive. */      if non-interactive. */
# Line 535  while (!done) Line 2957  while (!done)
2957          {          {
2958          for (;;)          for (;;)
2959            {            {
2960            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (extend_inputline(infile, buffer, NULL) == NULL)
2961              {              {
2962              done = 1;              done = 1;
2963              goto CONTINUE;              goto CONTINUE;
# Line 549  while (!done) Line 2971  while (!done)
2971        goto CONTINUE;        goto CONTINUE;
2972        }        }
2973    
2974      /* Compilation succeeded; print data if required */      /* Compilation succeeded. It is now possible to set the UTF-8 option from
2975        within the regex; check for this so that we know how to process the data
2976        lines. */
2977    
2978      if (do_showinfo)      if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
2979        {        goto SKIP_DATA;
2980        int first_char, count;      if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
2981    
2982        if (do_debug) print_internals(re, outfile);      /* Extract the size for possible writing before possibly flipping it,
2983        and remember the store that was got. */
2984    
2985        count = pcre_info(re, &options, &first_char);      true_size = ((REAL_PCRE *)re)->size;
2986        if (count < 0) fprintf(outfile,      regex_gotten_store = first_gotten_store;
2987          "Error %d while reading info\n", count);  
2988        else      /* Output code size information if requested */
2989    
2990        if (log_store)
2991          fprintf(outfile, "Memory allocation (code space): %d\n",
2992            (int)(first_gotten_store -
2993                  sizeof(REAL_PCRE) -
2994                  ((REAL_PCRE *)re)->name_count * ((REAL_PCRE *)re)->name_entry_size));
2995    
2996        /* If -s or /S was present, study the regex to generate additional info to
2997        help with the matching, unless the pattern has the SS option, which
2998        suppresses the effect of /S (used for a few test patterns where studying is
2999        never sensible). */
3000    
3001        if (do_study || (force_study >= 0 && !no_force_study))
3002          {
3003          if (timeit > 0)
3004          {          {
3005          fprintf(outfile, "Identifying subpattern count = %d\n", count);          register int i;
3006          if (options == 0) fprintf(outfile, "No options\n");          clock_t time_taken;
3007            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s\n",          clock_t start_time = clock();
3008              ((options & PCRE_ANCHORED) != 0)? " anchored" : "",          for (i = 0; i < timeit; i++)
             ((options & PCRE_CASELESS) != 0)? " caseless" : "",  
             ((options & PCRE_EXTENDED) != 0)? " extended" : "",  
             ((options & PCRE_MULTILINE) != 0)? " multiline" : "",  
             ((options & PCRE_DOTALL) != 0)? " dotall" : "",  
             ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",  
             ((options & PCRE_EXTRA) != 0)? " extra" : "",  
             ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");  
         if (first_char == -1)  
3009            {            {
3010            fprintf(outfile, "First char at start or follows \\n\n");            PCRE_STUDY(extra, re, study_options | force_study_options, &error);
3011            }            }
3012          else if (first_char < 0)          time_taken = clock() - start_time;
3013            if (extra != NULL)
3014            {            {
3015            fprintf(outfile, "No first char\n");            PCRE_FREE_STUDY(extra);
3016            }            }
3017          else          fprintf(outfile, "  Study time %.4f milliseconds\n",
3018              (((double)time_taken * 1000.0) / (double)timeit) /
3019                (double)CLOCKS_PER_SEC);
3020            }
3021          PCRE_STUDY(extra, re, study_options | force_study_options, &error);
3022          if (error != NULL)
3023            fprintf(outfile, "Failed to study: %s\n", error);
3024          else if (extra != NULL)
3025            {
3026            true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3027            if (log_store)
3028            {            {
3029            if (isprint(first_char))            size_t jitsize;
3030              fprintf(outfile, "First char = \'%c\'\n", first_char);            if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3031            else                jitsize != 0)
3032              fprintf(outfile, "First char = %d\n", first_char);              fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3033            }            }
3034          }          }
3035        }        }
3036    
3037      /* If /S was present, study the regexp to generate additional info to      /* If /K was present, we set up for handling MARK data. */
     help with the matching. */  
3038    
3039      if (do_study)      if (do_mark)
3040        {        {
3041        if (timeit)        if (extra == NULL)
3042          {          {
3043          register int i;          extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3044          clock_t time_taken;          extra->flags = 0;
         clock_t start_time = clock();  
         for (i = 0; i < LOOPREPEAT; i++)  
           extra = pcre_study(re, study_options, &error);  
         time_taken = clock() - start_time;  
         if (extra != NULL) free(extra);  
         fprintf(outfile, "  Study time %.3f milliseconds\n",  
           ((double)time_taken * 1000.0)/  
           ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));  
3045          }          }
3046          extra->mark = &markptr;
3047          extra->flags |= PCRE_EXTRA_MARK;
3048          }
3049    
3050        extra = pcre_study(re, study_options, &error);      /* Extract and display information from the compiled data if required. */
3051        if (error != NULL)  
3052          fprintf(outfile, "Failed to study: %s\n", error);      SHOW_INFO:
3053        else if (extra == NULL)  
3054          fprintf(outfile, "Study returned NULL\n");      if (do_debug)
3055          {
3056          fprintf(outfile, "------------------------------------------------------------------\n");
3057          PCRE_PRINTINT(re, outfile, debug_lengths);
3058          }
3059    
3060        /* We already have the options in get_options (see above) */
3061    
3062        if (do_showinfo)
3063          {
3064          unsigned long int all_options;
3065          int count, backrefmax, first_char, need_char, okpartial, jchanged,
3066            hascrorlf;
3067          int nameentrysize, namecount;
3068          const pcre_uint8 *nametable;
3069    
3070          if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3071              new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3072              new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3073              new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +
3074              new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +
3075              new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3076              new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3077              new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3078              new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3079              new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3080              new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf)
3081              != 0)
3082            goto SKIP_DATA;
3083    
3084          if (size != regex_gotten_store) fprintf(outfile,
3085            "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
3086            (int)size, (int)regex_gotten_store);
3087    
3088          fprintf(outfile, "Capturing subpattern count = %d\n", count);
3089          if (backrefmax > 0)
3090            fprintf(outfile, "Max back reference = %d\n", backrefmax);
3091    
3092          if (namecount > 0)
3093            {
3094            fprintf(outfile, "Named capturing subpatterns:\n");
3095            while (namecount-- > 0)
3096              {
3097    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3098              int imm2_size = use_pcre16 ? 1 : 2;
3099    #else
3100              int imm2_size = IMM2_SIZE;
3101    #endif
3102              int length = (int)STRLEN(nametable + imm2_size);
3103              fprintf(outfile, "  ");
3104              PCHARSV(nametable, imm2_size, length, outfile);
3105              while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3106    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3107              fprintf(outfile, "%3d\n", use_pcre16?
3108                 (int)(((PCRE_SPTR16)nametable)[0])
3109                :((int)nametable[0] << 8) | (int)nametable[1]);
3110              nametable += nameentrysize * (use_pcre16 ? 2 : 1);
3111    #else
3112              fprintf(outfile, "%3d\n", GET2(nametable, 0));
3113    #ifdef SUPPORT_PCRE8
3114              nametable += nameentrysize;
3115    #else
3116              nametable += nameentrysize * 2;
3117    #endif
3118    #endif
3119              }
3120            }
3121    
3122          if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3123          if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3124    
3125          all_options = ((REAL_PCRE *)re)->options;
3126          if (do_flip) all_options = swap_uint32(all_options);
3127    
3128          if (get_options == 0) fprintf(outfile, "No options\n");
3129            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
3130              ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
3131              ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
3132              ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
3133              ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
3134              ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
3135              ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
3136              ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
3137              ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
3138              ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
3139              ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3140              ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3141              ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3142              ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3143              ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3144              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3145              ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3146              ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3147    
3148          if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
3149    
3150          switch (get_options & PCRE_NEWLINE_BITS)
3151            {
3152            case PCRE_NEWLINE_CR:
3153            fprintf(outfile, "Forced newline sequence: CR\n");
3154            break;
3155    
3156            case PCRE_NEWLINE_LF:
3157            fprintf(outfile, "Forced newline sequence: LF\n");
3158            break;
3159    
3160            case PCRE_NEWLINE_CRLF:
3161            fprintf(outfile, "Forced newline sequence: CRLF\n");
3162            break;
3163    
3164            case PCRE_NEWLINE_ANYCRLF:
3165            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
3166            break;
3167    
3168            case PCRE_NEWLINE_ANY:
3169            fprintf(outfile, "Forced newline sequence: ANY\n");
3170            break;
3171    
3172            default:
3173            break;
3174            }
3175    
3176          if (first_char == -1)
3177            {
3178            fprintf(outfile, "First char at start or follows newline\n");
3179            }
3180          else if (first_char < 0)
3181            {
3182            fprintf(outfile, "No first char\n");
3183            }
3184          else
3185            {
3186            const char *caseless =
3187              ((((REAL_PCRE *)re)->flags & PCRE_FCH_CASELESS) == 0)?
3188              "" : " (caseless)";
3189    
3190        /* This looks at internal information. A bit kludgy to do it this          if (PRINTOK(first_char))
3191        way, but it is useful for testing. */            fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3192            else
3193              {
3194              fprintf(outfile, "First char = ");
3195              pchar(first_char, outfile);
3196              fprintf(outfile, "%s\n", caseless);
3197              }
3198            }
3199    
3200          if (need_char < 0)
3201            {
3202            fprintf(outfile, "No need char\n");
3203            }
3204          else
3205            {
3206            const char *caseless =
3207              ((((REAL_PCRE *)re)->flags & PCRE_RCH_CASELESS) == 0)?
3208              "" : " (caseless)";
3209    
3210            if (PRINTOK(need_char))
3211              fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3212            else
3213              {
3214              fprintf(outfile, "Need char = ");
3215              pchar(need_char, outfile);
3216              fprintf(outfile, "%s\n", caseless);
3217              }
3218            }
3219    
3220        else if (do_showinfo)        /* Don't output study size; at present it is in any case a fixed
3221          value, but it varies, depending on the computer architecture, and
3222          so messes up the test suite. (And with the /F option, it might be
3223          flipped.) If study was forced by an external -s, don't show this
3224          information unless -i or -d was also present. This means that, except
3225          when auto-callouts are involved, the output from runs with and without
3226          -s should be identical. */
3227    
3228          if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3229          {          {
3230          real_pcre_extra *xx = (real_pcre_extra *)extra;          if (extra == NULL)
3231          if ((xx->options & PCRE_STUDY_MAPPED) == 0)            fprintf(outfile, "Study returned NULL\n");
           fprintf(outfile, "No starting character set\n");  
3232          else          else
3233            {            {
3234            int i;            pcre_uint8 *start_bits = NULL;
3235            int c = 24;            int minlength;
3236            fprintf(outfile, "Starting character set: ");  
3237            for (i = 0; i < 256; i++)            if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3238                fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3239    
3240              if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
3241              {              {
3242              if ((xx->start_bits[i/8] & (1<<(i%8))) != 0)              if (start_bits == NULL)
3243                  fprintf(outfile, "No set of starting bytes\n");
3244                else
3245                {                {
3246                if (c > 75)                int i;
3247                  {                int c = 24;
3248                  fprintf(outfile, "\n  ");                fprintf(outfile, "Starting byte set: ");
3249                  c = 2;                for (i = 0; i < 256; i++)
                 }  
               if (isprint(i) && i != ' ')  
3250                  {                  {
3251                  fprintf(outfile, "%c ", i);                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
3252                  c += 2;                    {
3253                  }                    if (c > 75)
3254                else                      {
3255                  {                      fprintf(outfile, "\n  ");
3256                  fprintf(outfile, "\\x%02x ", i);                      c = 2;
3257                  c += 5;                      }
3258                      if (PRINTOK(i) && i != ' ')
3259                        {
3260                        fprintf(outfile, "%c ", i);
3261                        c += 2;
3262                        }
3263                      else
3264                        {
3265                        fprintf(outfile, "\\x%02x ", i);
3266                        c += 5;
3267                        }
3268                      }
3269                  }                  }
3270                  fprintf(outfile, "\n");
3271                }                }
3272              }              }
3273            fprintf(outfile, "\n");            }
3274    
3275            /* Show this only if the JIT was set by /S, not by -s. */
3276    
3277            if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3278              {
3279              int jit;
3280              if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3281                {
3282                if (jit)
3283                  fprintf(outfile, "JIT study was successful\n");
3284                else
3285    #ifdef SUPPORT_JIT
3286                  fprintf(outfile, "JIT study was not successful\n");
3287    #else
3288                  fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3289    #endif
3290                }
3291            }            }
3292          }          }
3293        }        }
3294      }  
3295        /* If the '>' option was present, we write out the regex to a file, and
3296        that is all. The first 8 bytes of the file are the regex length and then
3297        the study length, in big-endian order. */
3298    
3299        if (to_file != NULL)
3300          {
3301          FILE *f = fopen((char *)to_file, "wb");
3302          if (f == NULL)
3303            {
3304            fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
3305            }
3306          else
3307            {
3308            pcre_uint8 sbuf[8];
3309    
3310            if (do_flip) regexflip(re, extra);
3311            sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
3312            sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
3313            sbuf[2] = (pcre_uint8)((true_size >>  8) & 255);
3314            sbuf[3] = (pcre_uint8)((true_size) & 255);
3315            sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
3316            sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
3317            sbuf[6] = (pcre_uint8)((true_study_size >>  8) & 255);
3318            sbuf[7] = (pcre_uint8)((true_study_size) & 255);
3319    
3320            if (fwrite(sbuf, 1, 8, f) < 8 ||
3321                fwrite(re, 1, true_size, f) < true_size)
3322              {
3323              fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
3324              }
3325            else
3326              {
3327              fprintf(outfile, "Compiled pattern written to %s\n", to_file);
3328    
3329              /* If there is study data, write it. */
3330    
3331              if (extra != NULL)
3332                {
3333                if (fwrite(extra->study_data, 1, true_study_size, f) <
3334                    true_study_size)
3335                  {
3336                  fprintf(outfile, "Write error on %s: %s\n", to_file,
3337                    strerror(errno));
3338                  }
3339                else fprintf(outfile, "Study data written to %s\n", to_file);
3340                }
3341              }
3342            fclose(f);
3343            }
3344    
3345          new_free(re);
3346          if (extra != NULL)
3347            {
3348            PCRE_FREE_STUDY(extra);
3349            }
3350          if (locale_set)
3351            {
3352            new_free((void *)tables);
3353            setlocale(LC_CTYPE, "C");
3354            locale_set = 0;
3355            }
3356          continue;  /* With next regex */
3357          }
3358        }        /* End of non-POSIX compile */
3359    
3360    /* Read data lines and test them */    /* Read data lines and test them */
3361    
3362    for (;;)    for (;;)
3363      {      {
3364      unsigned char *q;      pcre_uint8 *q;
3365        pcre_uint8 *bptr;
3366        int *use_offsets = offsets;
3367        int use_size_offsets = size_offsets;
3368        int callout_data = 0;
3369        int callout_data_set = 0;
3370      int count, c;      int count, c;
3371      int copystrings = 0;      int copystrings = 0;
3372        int find_match_limit = default_find_match_limit;
3373      int getstrings = 0;      int getstrings = 0;
3374      int getlist = 0;      int getlist = 0;
3375      int offsets[45];      int gmatched = 0;
3376      int size_offsets = sizeof(offsets)/sizeof(int);      int start_offset = 0;
3377        int start_offset_sign = 1;
3378        int g_notempty = 0;
3379        int use_dfa = 0;
3380    
3381        *copynames = 0;
3382        *getnames = 0;
3383    
3384    #ifdef SUPPORT_PCRE16
3385        cn16ptr = copynames;
3386        gn16ptr = getnames;
3387    #endif
3388    #ifdef SUPPORT_PCRE8
3389        cn8ptr = copynames8;
3390        gn8ptr = getnames8;
3391    #endif
3392    
3393        SET_PCRE_CALLOUT(callout);
3394        first_callout = 1;
3395        last_callout_mark = NULL;
3396        callout_extra = 0;
3397        callout_count = 0;
3398        callout_fail_count = 999999;
3399        callout_fail_id = -1;
3400        show_malloc = 0;
3401      options = 0;      options = 0;
3402    
3403      if (infile == stdin) printf("  data> ");      if (extra != NULL) extra->flags &=
3404      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
3405    
3406        len = 0;
3407        for (;;)
3408        {        {
3409        done = 1;        if (extend_inputline(infile, buffer + len, "data> ") == NULL)
3410        goto CONTINUE;          {
3411            if (len > 0)    /* Reached EOF without hitting a newline */
3412              {
3413              fprintf(outfile, "\n");
3414              break;
3415              }
3416            done = 1;
3417            goto CONTINUE;
3418            }
3419          if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3420          len = (int)strlen((char *)buffer);
3421          if (buffer[len-1] == '\n') break;
3422        }        }
     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);  
3423    
     len = (int)strlen((char *)buffer);  
3424      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
3425      buffer[len] = 0;      buffer[len] = 0;
3426      if (len == 0) break;      if (len == 0) break;
# Line 686  while (!done) Line 3428  while (!done)
3428      p = buffer;      p = buffer;
3429      while (isspace(*p)) p++;      while (isspace(*p)) p++;
3430    
3431      q = dbuffer;      bptr = q = dbuffer;
3432      while ((c = *p++) != 0)      while ((c = *p++) != 0)
3433        {        {
3434        int i = 0;        int i = 0;
3435        int n = 0;        int n = 0;
3436        if (c == '\\') switch ((c = *p++))  
3437          /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
3438          In non-UTF mode, allow the value of the byte to fall through to later,
3439          where values greater than 127 are turned into UTF-8 when running in
3440          16-bit mode. */
3441    
3442          if (c != '\\')
3443            {
3444            if (use_utf)
3445              {
3446              *q++ = c;
3447              continue;
3448              }
3449            }
3450    
3451          /* Handle backslash escapes */
3452    
3453          else switch ((c = *p++))
3454          {          {
3455          case 'a': c =    7; break;          case 'a': c =    7; break;
3456          case 'b': c = '\b'; break;          case 'b': c = '\b'; break;
# Line 709  while (!done) Line 3468  while (!done)
3468            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
3469          break;          break;
3470    
3471          case 'x':          case 'x':
3472          c = 0;          if (*p == '{')
3473          while (i++ < 2 && isxdigit(*p))            {
3474              pcre_uint8 *pt = p;
3475              c = 0;
3476    
3477              /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
3478              when isxdigit() is a macro that refers to its argument more than
3479              once. This is banned by the C Standard, but apparently happens in at
3480              least one MacOS environment. */
3481    
3482              for (pt++; isxdigit(*pt); pt++)
3483                {
3484                if (++i == 9)
3485                  fprintf(outfile, "** Too many hex digits in \\x{...} item; "
3486                                   "using only the first eight.\n");
3487                else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
3488                }
3489              if (*pt == '}')
3490                {
3491                p = pt + 1;
3492                break;
3493                }
3494              /* Not correct form for \x{...}; fall through */
3495              }
3496    
3497            /* \x without {} always defines just one byte in 8-bit mode. This
3498            allows UTF-8 characters to be constructed byte by byte, and also allows
3499            invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
3500            Otherwise, pass it down to later code so that it can be turned into
3501            UTF-8 when running in 16-bit mode. */
3502    
3503            c = 0;