/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 43 by nigel, Sat Feb 24 21:39:21 2007 UTC revision 839 by zherczeg, Fri Dec 30 13:22:28 2011 UTC
# Line 2  Line 2 
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5    /* This program was hacked up as a tester for PCRE. I really should have
6    written it more tidily in the first place. Will I ever learn? It has grown and
7    been extended and consequently is now rather, er, *very* untidy in places. The
8    addition of 16-bit support has made it even worse. :-(
9    
10    -----------------------------------------------------------------------------
11    Redistribution and use in source and binary forms, with or without
12    modification, are permitted provided that the following conditions are met:
13    
14        * Redistributions of source code must retain the above copyright notice,
15          this list of conditions and the following disclaimer.
16    
17        * Redistributions in binary form must reproduce the above copyright
18          notice, this list of conditions and the following disclaimer in the
19          documentation and/or other materials provided with the distribution.
20    
21        * Neither the name of the University of Cambridge nor the names of its
22          contributors may be used to endorse or promote products derived from
23          this software without specific prior written permission.
24    
25    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35    POSSIBILITY OF SUCH DAMAGE.
36    -----------------------------------------------------------------------------
37    */
38    
39    /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40    libraries in a single program. This is different from the modules such as
41    pcre_compile.c in the library itself, which are compiled separately for each
42    mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43    (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44    compiled only once. Therefore, it must not make use of any of the macros from
45    pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46    however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47    only supported library functions. */
48    
49    
50    #ifdef HAVE_CONFIG_H
51    #include "config.h"
52    #endif
53    
54  #include <ctype.h>  #include <ctype.h>
55  #include <stdio.h>  #include <stdio.h>
56  #include <string.h>  #include <string.h>
57  #include <stdlib.h>  #include <stdlib.h>
58  #include <time.h>  #include <time.h>
59  #include <locale.h>  #include <locale.h>
60    #include <errno.h>
61    
62    #ifdef SUPPORT_LIBREADLINE
63    #ifdef HAVE_UNISTD_H
64    #include <unistd.h>
65    #endif
66    #include <readline/readline.h>
67    #include <readline/history.h>
68    #endif
69    
70    
71    /* A number of things vary for Windows builds. Originally, pcretest opened its
72    input and output without "b"; then I was told that "b" was needed in some
73    environments, so it was added for release 5.0 to both the input and output. (It
74    makes no difference on Unix-like systems.) Later I was told that it is wrong
75    for the input on Windows. I've now abstracted the modes into two macros that
76    are set here, to make it easier to fiddle with them, and removed "b" from the
77    input mode under Windows. */
78    
79    #if defined(_WIN32) || defined(WIN32)
80    #include <io.h>                /* For _setmode() */
81    #include <fcntl.h>             /* For _O_BINARY */
82    #define INPUT_MODE   "r"
83    #define OUTPUT_MODE  "wb"
84    
85    #ifndef isatty
86    #define isatty _isatty         /* This is what Windows calls them, I'm told, */
87    #endif                         /* though in some environments they seem to   */
88                                   /* be already defined, hence the #ifndefs.    */
89    #ifndef fileno
90    #define fileno _fileno
91    #endif
92    
93    /* A user sent this fix for Borland Builder 5 under Windows. */
94    
95    #ifdef __BORLANDC__
96    #define _setmode(handle, mode) setmode(handle, mode)
97    #endif
98    
99    /* Not Windows */
100    
101    #else
102    #include <sys/time.h>          /* These two includes are needed */
103    #include <sys/resource.h>      /* for setrlimit(). */
104    #define INPUT_MODE   "rb"
105    #define OUTPUT_MODE  "wb"
106    #endif
107    
108    
109    /* We have to include pcre_internal.h because we need the internal info for
110    displaying the results of pcre_study() and we also need to know about the
111    internal macros, structures, and other internal data values; pcretest has
112    "inside information" compared to a program that strictly follows the PCRE API.
113    
114    Although pcre_internal.h does itself include pcre.h, we explicitly include it
115    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
116    appropriately for an application, not for building PCRE. */
117    
118    #include "pcre.h"
119    
120    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8
121    /* Configure internal macros to 16 bit mode. */
122    #define COMPILE_PCRE16
123    #endif
124    
125    #include "pcre_internal.h"
126    
127    /* The pcre_printint() function, which prints the internal form of a compiled
128    regex, is held in a separate file so that (a) it can be compiled in either
129    8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
130    when that is compiled in debug mode. */
131    
132    #ifdef SUPPORT_PCRE8
133    void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
134    #endif
135    #ifdef SUPPORT_PCRE16
136    void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
137    #endif
138    
139    /* We need access to some of the data tables that PCRE uses. So as not to have
140    to keep two copies, we include the source file here, changing the names of the
141    external symbols to prevent clashes. */
142    
143    #define PCRE_INCLUDED
144    #undef PRIV
145    #define PRIV(name) name
146    
147    #include "pcre_tables.c"
148    
149    /* The definition of the macro PRINTABLE, which determines whether to print an
150    output character as-is or as a hex value when showing compiled patterns, is
151    the same as in the printint.src file. We uses it here in cases when the locale
152    has not been explicitly changed, so as to get consistent output from systems
153    that differ in their output from isprint() even in the "C" locale. */
154    
155    #ifdef EBCDIC
156    #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
157    #else
158    #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
159    #endif
160    
161    #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
162    
163    /* Posix support is disabled in 16 bit only mode. */
164    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX
165    #define NOPOSIX
166    #endif
167    
168    /* It is possible to compile this test program without including support for
169    testing the POSIX interface, though this is not available via the standard
170    Makefile. */
171    
172    #if !defined NOPOSIX
173    #include "pcreposix.h"
174    #endif
175    
176    /* It is also possible, originally for the benefit of a version that was
177    imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
178    NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
179    automatically cut out the UTF support if PCRE is built without it. */
180    
181    #ifndef SUPPORT_UTF
182    #ifndef NOUTF
183    #define NOUTF
184    #endif
185    #endif
186    
187    /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
188    for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
189    only from one place and is handled differently). I couldn't dream up any way of
190    using a single macro to do this in a generic way, because of the many different
191    argument requirements. We know that at least one of SUPPORT_PCRE8 and
192    SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
193    use these in the definitions of generic macros.
194    
195    **** Special note about the PCHARSxxx macros: the address of the string to be
196    printed is always given as two arguments: a base address followed by an offset.
197    The base address is cast to the correct data size for 8 or 16 bit data; the
198    offset is in units of this size. If the string were given as base+offset in one
199    argument, the casting might be incorrectly applied. */
200    
201    #ifdef SUPPORT_PCRE8
202    
203    #define PCHARS8(lv, p, offset, len, f) \
204      lv = pchars((pcre_uint8 *)(p) + offset, len, f)
205    
206    #define PCHARSV8(p, offset, len, f) \
207      (void)pchars((pcre_uint8 *)(p) + offset, len, f)
208    
209    #define READ_CAPTURE_NAME8(p, cn8, cn16, re) \
210      p = read_capture_name8(p, cn8, re)
211    
212    #define SET_PCRE_CALLOUT8(callout) \
213      pcre_callout = callout
214    
215    #define STRLEN8(p) ((int)strlen((char *)p))
216    
217    
218    #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
219      re = pcre_compile((char *)pat, options, error, erroffset, tables)
220    
221    #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
222        namesptr, cbuffer, size) \
223      rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
224        (char *)namesptr, cbuffer, size)
225    
226    #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
227      rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
228    
229    #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
230        offsets, size_offsets, workspace, size_workspace) \
231      count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
232        offsets, size_offsets, workspace, size_workspace)
233    
234    #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
235        offsets, size_offsets) \
236      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
237        offsets, size_offsets)
238    
239    #define PCRE_FREE_STUDY8(extra) \
240      pcre_free_study(extra)
241    
242    #define PCRE_FREE_SUBSTRING8(substring) \
243      pcre_free_substring(substring)
244    
245    #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
246      pcre_free_substring_list(listptr)
247    
248    #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
249        getnamesptr, subsptr) \
250      rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
251        (char *)getnamesptr, subsptr)
252    
253    #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
254      n = pcre_get_stringnumber(re, (char *)ptr)
255    
256    #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
257      rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
258    
259    #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
260      rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
261    
262    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
263      rc = pcre_pattern_to_host_byte_order(re, extra, tables)
264    
265    #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
266      pcre_printint(re, outfile, debug_lengths)
267    
268    #define PCRE_STUDY8(extra, re, options, error) \
269      extra = pcre_study(re, options, error)
270    
271    #endif /* SUPPORT_PCRE8 */
272    
273    /* -----------------------------------------------------------*/
274    
275    #ifdef SUPPORT_PCRE16
276    
277    #define PCHARS16(lv, p, offset, len, f) \
278      lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
279    
280    #define PCHARSV16(p, offset, len, f) \
281      (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
282    
283    #define READ_CAPTURE_NAME16(p, cn8, cn16, re) \
284      p = read_capture_name16(p, cn16, re)
285    
286    #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
287    
288    #define SET_PCRE_CALLOUT16(callout) \
289      pcre16_callout = callout
290    
291    
292    #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
293      re = pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, tables)
294    
295    #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
296        namesptr, cbuffer, size) \
297      rc = pcre16_copy_named_substring(re, (PCRE_SPTR16)bptr, offsets, count, \
298        (PCRE_SPTR16)namesptr, (PCRE_SCHAR16 *)cbuffer, size/2)
299    
300    #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
301      rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
302        (PCRE_SCHAR16 *)cbuffer, size/2)
303    
304    #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
305        offsets, size_offsets, workspace, size_workspace) \
306      count = pcre16_dfa_exec(re, extra, (PCRE_SPTR16)bptr, len, start_offset, \
307        options, offsets, size_offsets, workspace, size_workspace)
308    
309    #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
310        offsets, size_offsets) \
311      count = pcre16_exec(re, extra, (PCRE_SPTR16)bptr, len, start_offset, \
312        options, offsets, size_offsets)
313    
314    #define PCRE_FREE_STUDY16(extra) \
315      pcre16_free_study(extra)
316    
317    #define PCRE_FREE_SUBSTRING16(substring) \
318      pcre16_free_substring((PCRE_SPTR16)substring)
319    
320    #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
321      pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
322    
323    #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
324        getnamesptr, subsptr) \
325      rc = pcre16_get_named_substring(re, (PCRE_SPTR16)bptr, offsets, count, \
326        (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
327    
328    #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
329      n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
330    
331    #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
332      rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
333        (PCRE_SPTR16 *)(void*)subsptr)
334    
335    #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
336      rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
337        (PCRE_SPTR16 **)(void*)listptr)
338    
339    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
340      rc = pcre16_pattern_to_host_byte_order(re, extra, tables)
341    
342    #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
343      pcre16_printint(re, outfile, debug_lengths)
344    
345    #define PCRE_STUDY16(extra, re, options, error) \
346      extra = pcre16_study(re, options, error)
347    
348    #endif /* SUPPORT_PCRE16 */
349    
350    
351    /* ----- Both modes are supported; a runtime test is needed, except for
352    pcre_config(), and the JIT stack functions, when it doesn't matter which
353    version is called. ----- */
354    
355    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
356    
357    #define CHAR_SIZE (use_pcre16? 2:1)
358    
359    #define PCHARS(lv, p, offset, len, f) \
360      if (use_pcre16) \
361        PCHARS16(lv, p, offset, len, f); \
362      else \
363        PCHARS8(lv, p, offset, len, f)
364    
365    #define PCHARSV(p, offset, len, f) \
366      if (use_pcre16) \
367        PCHARSV16(p, offset, len, f); \
368      else \
369        PCHARSV8(p, offset, len, f)
370    
371    #define READ_CAPTURE_NAME(p, cn8, cn16, re) \
372      if (use_pcre16) \
373        READ_CAPTURE_NAME16(p, cn8, cn16, re); \
374      else \
375        READ_CAPTURE_NAME8(p, cn8, cn16, re)
376    
377    #define SET_PCRE_CALLOUT(callout) \
378      if (use_pcre16) \
379        SET_PCRE_CALLOUT16(callout); \
380      else \
381        SET_PCRE_CALLOUT8(callout)
382    
383    #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
384    
385    #define PCRE_ASSIGN_JIT_STACK pcre_assign_jit_stack
386    
387    #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
388      if (use_pcre16) \
389        PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
390      else \
391        PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
392    
393    #define PCRE_CONFIG pcre_config
394    
395    #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
396        namesptr, cbuffer, size) \
397      if (use_pcre16) \
398        PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
399          namesptr, cbuffer, size); \
400      else \
401        PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
402          namesptr, cbuffer, size)
403    
404    #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
405      if (use_pcre16) \
406        PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
407      else \
408        PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
409    
410    #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
411        offsets, size_offsets, workspace, size_workspace) \
412      if (use_pcre16) \
413        PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
414          offsets, size_offsets, workspace, size_workspace); \
415      else \
416        PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
417          offsets, size_offsets, workspace, size_workspace)
418    
419    #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
420        offsets, size_offsets) \
421      if (use_pcre16) \
422        PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
423          offsets, size_offsets); \
424      else \
425        PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
426          offsets, size_offsets)
427    
428    #define PCRE_FREE_STUDY(extra) \
429      if (use_pcre16) \
430        PCRE_FREE_STUDY16(extra); \
431      else \
432        PCRE_FREE_STUDY8(extra)
433    
434    #define PCRE_FREE_SUBSTRING(substring) \
435      if (use_pcre16) \
436        PCRE_FREE_SUBSTRING16(substring); \
437      else \
438        PCRE_FREE_SUBSTRING8(substring)
439    
440    #define PCRE_FREE_SUBSTRING_LIST(listptr) \
441      if (use_pcre16) \
442        PCRE_FREE_SUBSTRING_LIST16(listptr); \
443      else \
444        PCRE_FREE_SUBSTRING_LIST8(listptr)
445    
446    #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
447        getnamesptr, subsptr) \
448      if (use_pcre16) \
449        PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
450          getnamesptr, subsptr); \
451      else \
452        PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
453          getnamesptr, subsptr)
454    
455    #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
456      if (use_pcre16) \
457        PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
458      else \
459        PCRE_GET_STRINGNUMBER8(n, rc, ptr)
460    
461    #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
462      if (use_pcre16) \
463        PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
464      else \
465        PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
466    
467    #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
468      if (use_pcre16) \
469        PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
470      else \
471        PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
472    
473    #define PCRE_JIT_STACK_ALLOC pcre_jit_stack_alloc
474    #define PCRE_JIT_STACK_FREE pcre_jit_stack_free
475    
476    #define PCRE_MAKETABLES \
477      (use_pcre16? pcre16_maketables() : pcre_maketables())
478    
479    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
480      if (use_pcre16) \
481        PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
482      else \
483        PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
484    
485    #define PCRE_PRINTINT(re, outfile, debug_lengths) \
486      if (use_pcre16) \
487        PCRE_PRINTINT16(re, outfile, debug_lengths); \
488      else \
489        PCRE_PRINTINT8(re, outfile, debug_lengths)
490    
491    #define PCRE_STUDY(extra, re, options, error) \
492      if (use_pcre16) \
493        PCRE_STUDY16(extra, re, options, error); \
494      else \
495        PCRE_STUDY8(extra, re, options, error)
496    
497    /* ----- Only 8-bit mode is supported ----- */
498    
499    #elif defined SUPPORT_PCRE8
500    #define CHAR_SIZE                 1
501    #define PCHARS                    PCHARS8
502    #define PCHARSV                   PCHARSV8
503    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME8
504    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT8
505    #define STRLEN                    STRLEN8
506    #define PCRE_ASSIGN_JIT_STACK     pcre_assign_jit_stack
507    #define PCRE_COMPILE              PCRE_COMPILE8
508    #define PCRE_CONFIG               pcre_config
509    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
510    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING8
511    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC8
512    #define PCRE_EXEC                 PCRE_EXEC8
513    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY8
514    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING8
515    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST8
516    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING8
517    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER8
518    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING8
519    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST8
520    #define PCRE_JIT_STACK_ALLOC      pcre_jit_stack_alloc
521    #define PCRE_JIT_STACK_FREE       pcre_jit_stack_free
522    #define PCRE_MAKETABLES           pcre_maketables()
523    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
524    #define PCRE_PRINTINT             PCRE_PRINTINT8
525    #define PCRE_STUDY                PCRE_STUDY8
526    
527    /* ----- Only 16-bit mode is supported ----- */
528    
529    #else
530    #define CHAR_SIZE                 2
531    #define PCHARS                    PCHARS16
532    #define PCHARSV                   PCHARSV16
533    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME16
534    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT16
535    #define STRLEN                    STRLEN16
536    #define PCRE_ASSIGN_JIT_STACK     pcre16_assign_jit_stack
537    #define PCRE_COMPILE              PCRE_COMPILE16
538    #define PCRE_CONFIG               pcre16_config
539    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
540    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING16
541    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC16
542    #define PCRE_EXEC                 PCRE_EXEC16
543    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY16
544    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING16
545    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST16
546    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING16
547    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER16
548    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING16
549    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST16
550    #define PCRE_JIT_STACK_ALLOC      pcre16_jit_stack_alloc
551    #define PCRE_JIT_STACK_FREE       pcre16_jit_stack_free
552    #define PCRE_MAKETABLES           pcre16_maketables()
553    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
554    #define PCRE_PRINTINT             PCRE_PRINTINT16
555    #define PCRE_STUDY                PCRE_STUDY16
556    #endif
557    
558    /* ----- End of mode-specific function call macros ----- */
559    
560    
561    /* Other parameters */
562    
563    #ifndef CLOCKS_PER_SEC
564    #ifdef CLK_TCK
565    #define CLOCKS_PER_SEC CLK_TCK
566    #else
567    #define CLOCKS_PER_SEC 100
568    #endif
569    #endif
570    
571    /* This is the default loop count for timing. */
572    
573    #define LOOPREPEAT 500000
574    
575    /* Static variables */
576    
577    static FILE *outfile;
578    static int log_store = 0;
579    static int callout_count;
580    static int callout_extra;
581    static int callout_fail_count;
582    static int callout_fail_id;
583    static int debug_lengths;
584    static int first_callout;
585    static int locale_set = 0;
586    static int show_malloc;
587    static int use_utf;
588    static size_t gotten_store;
589    static size_t first_gotten_store = 0;
590    static const unsigned char *last_callout_mark = NULL;
591    
592    /* The buffers grow automatically if very long input lines are encountered. */
593    
594    static int buffer_size = 50000;
595    static pcre_uint8 *buffer = NULL;
596    static pcre_uint8 *dbuffer = NULL;
597    static pcre_uint8 *pbuffer = NULL;
598    
599    /* Another buffer is needed translation to 16-bit character strings. It will
600    obtained and extended as required. */
601    
602    #ifdef SUPPORT_PCRE16
603    static int buffer16_size = 0;
604    static pcre_uint16 *buffer16 = NULL;
605    
606    #ifdef SUPPORT_PCRE8
607    
608    /* We need the table of operator lengths that is used for 16-bit compiling, in
609    order to swap bytes in a pattern for saving/reloading testing. Luckily, the
610    data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
611    appropriately for the 16-bit world. Just as a safety check, make sure that
612    COMPILE_PCRE16 is *not* set. */
613    
614    #ifdef COMPILE_PCRE16
615    #error COMPILE_PCRE16 must not be set when compiling pcretest.c
616    #endif
617    
618    #if LINK_SIZE == 2
619    #undef LINK_SIZE
620    #define LINK_SIZE 1
621    #elif LINK_SIZE == 3 || LINK_SIZE == 4
622    #undef LINK_SIZE
623    #define LINK_SIZE 2
624    #else
625    #error LINK_SIZE must be either 2, 3, or 4
626    #endif
627    
628    #undef IMM2_SIZE
629    #define IMM2_SIZE 1
630    
631    #endif /* SUPPORT_PCRE8 */
632    
633    static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
634    #endif  /* SUPPORT_PCRE16 */
635    
636    /* If we have 8-bit support, default use_pcre16 to false; if there is also
637    16-bit support, it can be changed by an option. If there is no 8-bit support,
638    there must be 16-bit support, so default it to 1. */
639    
640    #ifdef SUPPORT_PCRE8
641    static int use_pcre16 = 0;
642    #else
643    static int use_pcre16 = 1;
644    #endif
645    
646    /* Textual explanations for runtime error codes */
647    
648    static const char *errtexts[] = {
649      NULL,  /* 0 is no error */
650      NULL,  /* NOMATCH is handled specially */
651      "NULL argument passed",
652      "bad option value",
653      "magic number missing",
654      "unknown opcode - pattern overwritten?",
655      "no more memory",
656      NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */
657      "match limit exceeded",
658      "callout error code",
659      NULL,  /* BADUTF8/16 is handled specially */
660      NULL,  /* BADUTF8/16 offset is handled specially */
661      NULL,  /* PARTIAL is handled specially */
662      "not used - internal error",
663      "internal error - pattern overwritten?",
664      "bad count value",
665      "item unsupported for DFA matching",
666      "backreference condition or recursion test not supported for DFA matching",
667      "match limit not supported for DFA matching",
668      "workspace size exceeded in DFA matching",
669      "too much recursion for DFA matching",
670      "recursion limit exceeded",
671      "not used - internal error",
672      "invalid combination of newline options",
673      "bad offset value",
674      NULL,  /* SHORTUTF8/16 is handled specially */
675      "nested recursion at the same subject position",
676      "JIT stack limit reached",
677      "pattern compiled in wrong mode: 8-bit/16-bit error"
678    };
679    
680    
681    /*************************************************
682    *         Alternate character tables             *
683    *************************************************/
684    
685    /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
686    using the default tables of the library. However, the T option can be used to
687    select alternate sets of tables, for different kinds of testing. Note also that
688    the L (locale) option also adjusts the tables. */
689    
690    /* This is the set of tables distributed as default with PCRE. It recognizes
691    only ASCII characters. */
692    
693    static const pcre_uint8 tables0[] = {
694    
695    /* This table is a lower casing table. */
696    
697        0,  1,  2,  3,  4,  5,  6,  7,
698        8,  9, 10, 11, 12, 13, 14, 15,
699       16, 17, 18, 19, 20, 21, 22, 23,
700       24, 25, 26, 27, 28, 29, 30, 31,
701       32, 33, 34, 35, 36, 37, 38, 39,
702       40, 41, 42, 43, 44, 45, 46, 47,
703       48, 49, 50, 51, 52, 53, 54, 55,
704       56, 57, 58, 59, 60, 61, 62, 63,
705       64, 97, 98, 99,100,101,102,103,
706      104,105,106,107,108,109,110,111,
707      112,113,114,115,116,117,118,119,
708      120,121,122, 91, 92, 93, 94, 95,
709       96, 97, 98, 99,100,101,102,103,
710      104,105,106,107,108,109,110,111,
711      112,113,114,115,116,117,118,119,
712      120,121,122,123,124,125,126,127,
713      128,129,130,131,132,133,134,135,
714      136,137,138,139,140,141,142,143,
715      144,145,146,147,148,149,150,151,
716      152,153,154,155,156,157,158,159,
717      160,161,162,163,164,165,166,167,
718      168,169,170,171,172,173,174,175,
719      176,177,178,179,180,181,182,183,
720      184,185,186,187,188,189,190,191,
721      192,193,194,195,196,197,198,199,
722      200,201,202,203,204,205,206,207,
723      208,209,210,211,212,213,214,215,
724      216,217,218,219,220,221,222,223,
725      224,225,226,227,228,229,230,231,
726      232,233,234,235,236,237,238,239,
727      240,241,242,243,244,245,246,247,
728      248,249,250,251,252,253,254,255,
729    
730    /* This table is a case flipping table. */
731    
732        0,  1,  2,  3,  4,  5,  6,  7,
733        8,  9, 10, 11, 12, 13, 14, 15,
734       16, 17, 18, 19, 20, 21, 22, 23,
735       24, 25, 26, 27, 28, 29, 30, 31,
736       32, 33, 34, 35, 36, 37, 38, 39,
737       40, 41, 42, 43, 44, 45, 46, 47,
738       48, 49, 50, 51, 52, 53, 54, 55,
739       56, 57, 58, 59, 60, 61, 62, 63,
740       64, 97, 98, 99,100,101,102,103,
741      104,105,106,107,108,109,110,111,
742      112,113,114,115,116,117,118,119,
743      120,121,122, 91, 92, 93, 94, 95,
744       96, 65, 66, 67, 68, 69, 70, 71,
745       72, 73, 74, 75, 76, 77, 78, 79,
746       80, 81, 82, 83, 84, 85, 86, 87,
747       88, 89, 90,123,124,125,126,127,
748      128,129,130,131,132,133,134,135,
749      136,137,138,139,140,141,142,143,
750      144,145,146,147,148,149,150,151,
751      152,153,154,155,156,157,158,159,
752      160,161,162,163,164,165,166,167,
753      168,169,170,171,172,173,174,175,
754      176,177,178,179,180,181,182,183,
755      184,185,186,187,188,189,190,191,
756      192,193,194,195,196,197,198,199,
757      200,201,202,203,204,205,206,207,
758      208,209,210,211,212,213,214,215,
759      216,217,218,219,220,221,222,223,
760      224,225,226,227,228,229,230,231,
761      232,233,234,235,236,237,238,239,
762      240,241,242,243,244,245,246,247,
763      248,249,250,251,252,253,254,255,
764    
765    /* This table contains bit maps for various character classes. Each map is 32
766    bytes long and the bits run from the least significant end of each byte. The
767    classes that have their own maps are: space, xdigit, digit, upper, lower, word,
768    graph, print, punct, and cntrl. Other classes are built from combinations. */
769    
770      0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
771      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
772      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
773      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
774    
775      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
776      0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
777      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
778      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
779    
780      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
781      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
782      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
783      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
784    
785      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
786      0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
787      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
788      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
789    
790      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
791      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
792      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
793      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
794    
795      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
796      0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
797      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
798      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
799    
800      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
801      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
802      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
803      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
804    
805      0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
806      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
807      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
808      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
809    
810      0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
811      0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
812      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
813      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
814    
815      0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
816      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
817      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
818      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
819    
820    /* This table identifies various classes of character by individual bits:
821      0x01   white space character
822      0x02   letter
823      0x04   decimal digit
824      0x08   hexadecimal digit
825      0x10   alphanumeric or '_'
826      0x80   regular expression metacharacter or binary zero
827    */
828    
829      0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
830      0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
831      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
832      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
833      0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
834      0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
835      0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
836      0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
837      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
838      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
839      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
840      0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
841      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
842      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
843      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
844      0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
845      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
846      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
847      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
848      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
849      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
850      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
851      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
852      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
853      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
854      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
855      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
856      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
857      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
858      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
859      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
860      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
861    
862    /* This is a set of tables that came orginally from a Windows user. It seems to
863    be at least an approximation of ISO 8859. In particular, there are characters
864    greater than 128 that are marked as spaces, letters, etc. */
865    
866    static const pcre_uint8 tables1[] = {
867    0,1,2,3,4,5,6,7,
868    8,9,10,11,12,13,14,15,
869    16,17,18,19,20,21,22,23,
870    24,25,26,27,28,29,30,31,
871    32,33,34,35,36,37,38,39,
872    40,41,42,43,44,45,46,47,
873    48,49,50,51,52,53,54,55,
874    56,57,58,59,60,61,62,63,
875    64,97,98,99,100,101,102,103,
876    104,105,106,107,108,109,110,111,
877    112,113,114,115,116,117,118,119,
878    120,121,122,91,92,93,94,95,
879    96,97,98,99,100,101,102,103,
880    104,105,106,107,108,109,110,111,
881    112,113,114,115,116,117,118,119,
882    120,121,122,123,124,125,126,127,
883    128,129,130,131,132,133,134,135,
884    136,137,138,139,140,141,142,143,
885    144,145,146,147,148,149,150,151,
886    152,153,154,155,156,157,158,159,
887    160,161,162,163,164,165,166,167,
888    168,169,170,171,172,173,174,175,
889    176,177,178,179,180,181,182,183,
890    184,185,186,187,188,189,190,191,
891    224,225,226,227,228,229,230,231,
892    232,233,234,235,236,237,238,239,
893    240,241,242,243,244,245,246,215,
894    248,249,250,251,252,253,254,223,
895    224,225,226,227,228,229,230,231,
896    232,233,234,235,236,237,238,239,
897    240,241,242,243,244,245,246,247,
898    248,249,250,251,252,253,254,255,
899    0,1,2,3,4,5,6,7,
900    8,9,10,11,12,13,14,15,
901    16,17,18,19,20,21,22,23,
902    24,25,26,27,28,29,30,31,
903    32,33,34,35,36,37,38,39,
904    40,41,42,43,44,45,46,47,
905    48,49,50,51,52,53,54,55,
906    56,57,58,59,60,61,62,63,
907    64,97,98,99,100,101,102,103,
908    104,105,106,107,108,109,110,111,
909    112,113,114,115,116,117,118,119,
910    120,121,122,91,92,93,94,95,
911    96,65,66,67,68,69,70,71,
912    72,73,74,75,76,77,78,79,
913    80,81,82,83,84,85,86,87,
914    88,89,90,123,124,125,126,127,
915    128,129,130,131,132,133,134,135,
916    136,137,138,139,140,141,142,143,
917    144,145,146,147,148,149,150,151,
918    152,153,154,155,156,157,158,159,
919    160,161,162,163,164,165,166,167,
920    168,169,170,171,172,173,174,175,
921    176,177,178,179,180,181,182,183,
922    184,185,186,187,188,189,190,191,
923    224,225,226,227,228,229,230,231,
924    232,233,234,235,236,237,238,239,
925    240,241,242,243,244,245,246,215,
926    248,249,250,251,252,253,254,223,
927    192,193,194,195,196,197,198,199,
928    200,201,202,203,204,205,206,207,
929    208,209,210,211,212,213,214,247,
930    216,217,218,219,220,221,222,255,
931    0,62,0,0,1,0,0,0,
932    0,0,0,0,0,0,0,0,
933    32,0,0,0,1,0,0,0,
934    0,0,0,0,0,0,0,0,
935    0,0,0,0,0,0,255,3,
936    126,0,0,0,126,0,0,0,
937    0,0,0,0,0,0,0,0,
938    0,0,0,0,0,0,0,0,
939    0,0,0,0,0,0,255,3,
940    0,0,0,0,0,0,0,0,
941    0,0,0,0,0,0,12,2,
942    0,0,0,0,0,0,0,0,
943    0,0,0,0,0,0,0,0,
944    254,255,255,7,0,0,0,0,
945    0,0,0,0,0,0,0,0,
946    255,255,127,127,0,0,0,0,
947    0,0,0,0,0,0,0,0,
948    0,0,0,0,254,255,255,7,
949    0,0,0,0,0,4,32,4,
950    0,0,0,128,255,255,127,255,
951    0,0,0,0,0,0,255,3,
952    254,255,255,135,254,255,255,7,
953    0,0,0,0,0,4,44,6,
954    255,255,127,255,255,255,127,255,
955    0,0,0,0,254,255,255,255,
956    255,255,255,255,255,255,255,127,
957    0,0,0,0,254,255,255,255,
958    255,255,255,255,255,255,255,255,
959    0,2,0,0,255,255,255,255,
960    255,255,255,255,255,255,255,127,
961    0,0,0,0,255,255,255,255,
962    255,255,255,255,255,255,255,255,
963    0,0,0,0,254,255,0,252,
964    1,0,0,248,1,0,0,120,
965    0,0,0,0,254,255,255,255,
966    0,0,128,0,0,0,128,0,
967    255,255,255,255,0,0,0,0,
968    0,0,0,0,0,0,0,128,
969    255,255,255,255,0,0,0,0,
970    0,0,0,0,0,0,0,0,
971    128,0,0,0,0,0,0,0,
972    0,1,1,0,1,1,0,0,
973    0,0,0,0,0,0,0,0,
974    0,0,0,0,0,0,0,0,
975    1,0,0,0,128,0,0,0,
976    128,128,128,128,0,0,128,0,
977    28,28,28,28,28,28,28,28,
978    28,28,0,0,0,0,0,128,
979    0,26,26,26,26,26,26,18,
980    18,18,18,18,18,18,18,18,
981    18,18,18,18,18,18,18,18,
982    18,18,18,128,128,0,128,16,
983    0,26,26,26,26,26,26,18,
984    18,18,18,18,18,18,18,18,
985    18,18,18,18,18,18,18,18,
986    18,18,18,128,128,0,0,0,
987    0,0,0,0,0,1,0,0,
988    0,0,0,0,0,0,0,0,
989    0,0,0,0,0,0,0,0,
990    0,0,0,0,0,0,0,0,
991    1,0,0,0,0,0,0,0,
992    0,0,18,0,0,0,0,0,
993    0,0,20,20,0,18,0,0,
994    0,20,18,0,0,0,0,0,
995    18,18,18,18,18,18,18,18,
996    18,18,18,18,18,18,18,18,
997    18,18,18,18,18,18,18,0,
998    18,18,18,18,18,18,18,18,
999    18,18,18,18,18,18,18,18,
1000    18,18,18,18,18,18,18,18,
1001    18,18,18,18,18,18,18,0,
1002    18,18,18,18,18,18,18,18
1003    };
1004    
1005    
1006    
1007    
1008    #ifndef HAVE_STRERROR
1009    /*************************************************
1010    *     Provide strerror() for non-ANSI libraries  *
1011    *************************************************/
1012    
1013    /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1014    in their libraries, but can provide the same facility by this simple
1015    alternative function. */
1016    
1017    extern int   sys_nerr;
1018    extern char *sys_errlist[];
1019    
1020    char *
1021    strerror(int n)
1022    {
1023    if (n < 0 || n >= sys_nerr) return "unknown error number";
1024    return sys_errlist[n];
1025    }
1026    #endif /* HAVE_STRERROR */
1027    
1028    
1029    /*************************************************
1030    *         JIT memory callback                    *
1031    *************************************************/
1032    
1033    static pcre_jit_stack* jit_callback(void *arg)
1034    {
1035    return (pcre_jit_stack *)arg;
1036    }
1037    
1038    
1039    #if !defined NOUTF || defined SUPPORT_PCRE16
1040    /*************************************************
1041    *            Convert UTF-8 string to value       *
1042    *************************************************/
1043    
1044    /* This function takes one or more bytes that represents a UTF-8 character,
1045    and returns the value of the character.
1046    
1047    Argument:
1048      utf8bytes   a pointer to the byte vector
1049      vptr        a pointer to an int to receive the value
1050    
1051    Returns:      >  0 => the number of bytes consumed
1052                  -6 to 0 => malformed UTF-8 character at offset = (-return)
1053    */
1054    
1055    static int
1056    utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1057    {
1058    int c = *utf8bytes++;
1059    int d = c;
1060    int i, j, s;
1061    
1062    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
1063      {
1064      if ((d & 0x80) == 0) break;
1065      d <<= 1;
1066      }
1067    
1068    if (i == -1) { *vptr = c; return 1; }  /* ascii character */
1069    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
1070    
1071    /* i now has a value in the range 1-5 */
1072    
1073    s = 6*i;
1074    d = (c & utf8_table3[i]) << s;
1075    
1076    for (j = 0; j < i; j++)
1077      {
1078      c = *utf8bytes++;
1079      if ((c & 0xc0) != 0x80) return -(j+1);
1080      s -= 6;
1081      d |= (c & 0x3f) << s;
1082      }
1083    
1084    /* Check that encoding was the correct unique one */
1085    
1086    for (j = 0; j < utf8_table1_size; j++)
1087      if (d <= utf8_table1[j]) break;
1088    if (j != i) return -(i+1);
1089    
1090    /* Valid value */
1091    
1092    *vptr = d;
1093    return i+1;
1094    }
1095    #endif /* NOUTF || SUPPORT_PCRE16 */
1096    
1097    
1098    
1099    #if !defined NOUTF || defined SUPPORT_PCRE16
1100    /*************************************************
1101    *       Convert character value to UTF-8         *
1102    *************************************************/
1103    
1104    /* This function takes an integer value in the range 0 - 0x7fffffff
1105    and encodes it as a UTF-8 character in 0 to 6 bytes.
1106    
1107    Arguments:
1108      cvalue     the character value
1109      utf8bytes  pointer to buffer for result - at least 6 bytes long
1110    
1111    Returns:     number of characters placed in the buffer
1112    */
1113    
1114    static int
1115    ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1116    {
1117    register int i, j;
1118    for (i = 0; i < utf8_table1_size; i++)
1119      if (cvalue <= utf8_table1[i]) break;
1120    utf8bytes += i;
1121    for (j = i; j > 0; j--)
1122     {
1123     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1124     cvalue >>= 6;
1125     }
1126    *utf8bytes = utf8_table2[i] | cvalue;
1127    return i + 1;
1128    }
1129    #endif /* NOUTF || SUPPORT_PCRE16 */
1130    
1131    
1132    
1133    #ifdef SUPPORT_PCRE16
1134    /*************************************************
1135    *         Convert a string to 16-bit             *
1136    *************************************************/
1137    
1138    /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1139    8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1140    double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1141    in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1142    result is always left in buffer16.
1143    
1144    Note that this function does not object to surrogate values. This is
1145    deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1146    for the purpose of testing that they are correctly faulted.
1147    
1148    Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1149    in UTF-8 so that values greater than 255 can be handled.
1150    
1151    Arguments:
1152      data       TRUE if converting a data line; FALSE for a regex
1153      p          points to a byte string
1154      utf        true if UTF-8 (to be converted to UTF-16)
1155      len        number of bytes in the string (excluding trailing zero)
1156    
1157    Returns:     number of 16-bit data items used (excluding trailing zero)
1158                 OR -1 if a UTF-8 string is malformed
1159                 OR -2 if a value > 0x10ffff is encountered
1160                 OR -3 if a value > 0xffff is encountered when not in UTF mode
1161    */
1162    
1163    static int
1164    to16(int data, pcre_uint8 *p, int utf, int len)
1165    {
1166    pcre_uint16 *pp;
1167    
1168    if (buffer16_size < 2*len + 2)
1169      {
1170      if (buffer16 != NULL) free(buffer16);
1171      buffer16_size = 2*len + 2;
1172      buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1173      if (buffer16 == NULL)
1174        {
1175        fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1176        exit(1);
1177        }
1178      }
1179    
1180    pp = buffer16;
1181    
1182    if (!utf && !data)
1183      {
1184      while (len-- > 0) *pp++ = *p++;
1185      }
1186    
1187    else
1188      {
1189      int c = 0;
1190      while (len > 0)
1191        {
1192        int chlen = utf82ord(p, &c);
1193        if (chlen <= 0) return -1;
1194        if (c > 0x10ffff) return -2;
1195        p += chlen;
1196        len -= chlen;
1197        if (c < 0x10000) *pp++ = c; else
1198          {
1199          if (!utf) return -3;
1200          c -= 0x10000;
1201          *pp++ = 0xD800 | (c >> 10);
1202          *pp++ = 0xDC00 | (c & 0x3ff);
1203          }
1204        }
1205      }
1206    
1207    *pp = 0;
1208    return pp - buffer16;
1209    }
1210    #endif
1211    
1212    
1213    /*************************************************
1214    *        Read or extend an input line            *
1215    *************************************************/
1216    
1217    /* Input lines are read into buffer, but both patterns and data lines can be
1218    continued over multiple input lines. In addition, if the buffer fills up, we
1219    want to automatically expand it so as to be able to handle extremely large
1220    lines that are needed for certain stress tests. When the input buffer is
1221    expanded, the other two buffers must also be expanded likewise, and the
1222    contents of pbuffer, which are a copy of the input for callouts, must be
1223    preserved (for when expansion happens for a data line). This is not the most
1224    optimal way of handling this, but hey, this is just a test program!
1225    
1226    Arguments:
1227      f            the file to read
1228      start        where in buffer to start (this *must* be within buffer)
1229      prompt       for stdin or readline()
1230    
1231    Returns:       pointer to the start of new data
1232                   could be a copy of start, or could be moved
1233                   NULL if no data read and EOF reached
1234    */
1235    
1236    static pcre_uint8 *
1237    extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1238    {
1239    pcre_uint8 *here = start;
1240    
1241    for (;;)
1242      {
1243      int rlen = (int)(buffer_size - (here - buffer));
1244    
1245      if (rlen > 1000)
1246        {
1247        int dlen;
1248    
1249        /* If libreadline support is required, use readline() to read a line if the
1250        input is a terminal. Note that readline() removes the trailing newline, so
1251        we must put it back again, to be compatible with fgets(). */
1252    
1253    #ifdef SUPPORT_LIBREADLINE
1254        if (isatty(fileno(f)))
1255          {
1256          size_t len;
1257          char *s = readline(prompt);
1258          if (s == NULL) return (here == start)? NULL : start;
1259          len = strlen(s);
1260          if (len > 0) add_history(s);
1261          if (len > rlen - 1) len = rlen - 1;
1262          memcpy(here, s, len);
1263          here[len] = '\n';
1264          here[len+1] = 0;
1265          free(s);
1266          }
1267        else
1268    #endif
1269    
1270        /* Read the next line by normal means, prompting if the file is stdin. */
1271    
1272          {
1273          if (f == stdin) printf("%s", prompt);
1274          if (fgets((char *)here, rlen,  f) == NULL)
1275            return (here == start)? NULL : start;
1276          }
1277    
1278        dlen = (int)strlen((char *)here);
1279        if (dlen > 0 && here[dlen - 1] == '\n') return start;
1280        here += dlen;
1281        }
1282    
1283      else
1284        {
1285        int new_buffer_size = 2*buffer_size;
1286        pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1287        pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1288        pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1289    
1290        if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1291          {
1292          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1293          exit(1);
1294          }
1295    
1296        memcpy(new_buffer, buffer, buffer_size);
1297        memcpy(new_pbuffer, pbuffer, buffer_size);
1298    
1299        buffer_size = new_buffer_size;
1300    
1301        start = new_buffer + (start - buffer);
1302        here = new_buffer + (here - buffer);
1303    
1304        free(buffer);
1305        free(dbuffer);
1306        free(pbuffer);
1307    
1308        buffer = new_buffer;
1309        dbuffer = new_dbuffer;
1310        pbuffer = new_pbuffer;
1311        }
1312      }
1313    
1314    return NULL;  /* Control never gets here */
1315    }
1316    
1317    
1318    
1319    /*************************************************
1320    *          Read number from string               *
1321    *************************************************/
1322    
1323    /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1324    around with conditional compilation, just do the job by hand. It is only used
1325    for unpicking arguments, so just keep it simple.
1326    
1327    Arguments:
1328      str           string to be converted
1329      endptr        where to put the end pointer
1330    
1331    Returns:        the unsigned long
1332    */
1333    
1334    static int
1335    get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1336    {
1337    int result = 0;
1338    while(*str != 0 && isspace(*str)) str++;
1339    while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1340    *endptr = str;
1341    return(result);
1342    }
1343    
1344    
1345    
1346    /*************************************************
1347    *             Print one character                *
1348    *************************************************/
1349    
1350    /* Print a single character either literally, or as a hex escape. */
1351    
1352    static int pchar(int c, FILE *f)
1353    {
1354    if (PRINTOK(c))
1355      {
1356      if (f != NULL) fprintf(f, "%c", c);
1357      return 1;
1358      }
1359    
1360    if (c < 0x100)
1361      {
1362      if (use_utf)
1363        {
1364        if (f != NULL) fprintf(f, "\\x{%02x}", c);
1365        return 6;
1366        }
1367      else
1368        {
1369        if (f != NULL) fprintf(f, "\\x%02x", c);
1370        return 4;
1371        }
1372      }
1373    
1374    if (f != NULL) fprintf(f, "\\x{%02x}", c);
1375    return (c <= 0x000000ff)? 6 :
1376           (c <= 0x00000fff)? 7 :
1377           (c <= 0x0000ffff)? 8 :
1378           (c <= 0x000fffff)? 9 : 10;
1379    }
1380    
1381    
1382    
1383    #ifdef SUPPORT_PCRE8
1384    /*************************************************
1385    *         Print 8-bit character string           *
1386    *************************************************/
1387    
1388    /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1389    If handed a NULL file, just counts chars without printing. */
1390    
1391    static int pchars(pcre_uint8 *p, int length, FILE *f)
1392    {
1393    int c = 0;
1394    int yield = 0;
1395    
1396    if (length < 0)
1397      length = strlen((char *)p);
1398    
1399    while (length-- > 0)
1400      {
1401    #if !defined NOUTF
1402      if (use_utf)
1403        {
1404        int rc = utf82ord(p, &c);
1405        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
1406          {
1407          length -= rc - 1;
1408          p += rc;
1409          yield += pchar(c, f);
1410          continue;
1411          }
1412        }
1413    #endif
1414      c = *p++;
1415      yield += pchar(c, f);
1416      }
1417    
1418    return yield;
1419    }
1420    #endif
1421    
1422    
1423    
1424    #ifdef SUPPORT_PCRE16
1425    /*************************************************
1426    *    Find length of 0-terminated 16-bit string   *
1427    *************************************************/
1428    
1429    static int strlen16(PCRE_SPTR16 p)
1430    {
1431    int len = 0;
1432    while (*p++ != 0) len++;
1433    return len;
1434    }
1435    #endif  /* SUPPORT_PCRE16 */
1436    
1437    
1438    #ifdef SUPPORT_PCRE16
1439    /*************************************************
1440    *           Print 16-bit character string        *
1441    *************************************************/
1442    
1443    /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1444    If handed a NULL file, just counts chars without printing. */
1445    
1446    static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1447    {
1448    int yield = 0;
1449    
1450    if (length < 0)
1451      length = strlen16(p);
1452    
1453    while (length-- > 0)
1454      {
1455      int c = *p++ & 0xffff;
1456    #if !defined NOUTF
1457      if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1458        {
1459        int d = *p & 0xffff;
1460        if (d >= 0xDC00 && d < 0xDFFF)
1461          {
1462          c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1463          length--;
1464          p++;
1465          }
1466        }
1467    #endif
1468      yield += pchar(c, f);
1469      }
1470    
1471    return yield;
1472    }
1473    #endif  /* SUPPORT_PCRE16 */
1474    
1475    
1476    
1477    #ifdef SUPPORT_PCRE8
1478    /*************************************************
1479    *     Read a capture name (8-bit) and check it   *
1480    *************************************************/
1481    
1482    static pcre_uint8 *
1483    read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1484    {
1485    pcre_uint8 *npp = *pp;
1486    while (isalnum(*p)) *npp++ = *p++;
1487    *npp++ = 0;
1488    *npp = 0;
1489    if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1490      {
1491      fprintf(outfile, "no parentheses with name \"");
1492      PCHARSV(*pp, 0, -1, outfile);
1493      fprintf(outfile, "\"\n");
1494      }
1495    
1496    *pp = npp;
1497    return p;
1498    }
1499    #endif  /* SUPPORT_PCRE8 */
1500    
1501    
1502    
1503    #ifdef SUPPORT_PCRE16
1504    /*************************************************
1505    *     Read a capture name (16-bit) and check it  *
1506    *************************************************/
1507    
1508    /* Note that the text being read is 8-bit. */
1509    
1510    static pcre_uint8 *
1511    read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1512    {
1513    pcre_uint16 *npp = *pp;
1514    while (isalnum(*p)) *npp++ = *p++;
1515    *npp++ = 0;
1516    *npp = 0;
1517    if (pcre16_get_stringnumber(re, (PCRE_SPTR16)(*pp)) < 0)
1518      {
1519      fprintf(outfile, "no parentheses with name \"");
1520      PCHARSV(*pp, 0, -1, outfile);
1521      fprintf(outfile, "\"\n");
1522      }
1523    *pp = npp;
1524    return p;
1525    }
1526    #endif  /* SUPPORT_PCRE16 */
1527    
1528    
1529    
1530    /*************************************************
1531    *              Callout function                  *
1532    *************************************************/
1533    
1534    /* Called from PCRE as a result of the (?C) item. We print out where we are in
1535    the match. Yield zero unless more callouts than the fail count, or the callout
1536    data is not zero. */
1537    
1538    static int callout(pcre_callout_block *cb)
1539    {
1540    FILE *f = (first_callout | callout_extra)? outfile : NULL;
1541    int i, pre_start, post_start, subject_length;
1542    
1543    if (callout_extra)
1544      {
1545      fprintf(f, "Callout %d: last capture = %d\n",
1546        cb->callout_number, cb->capture_last);
1547    
1548      for (i = 0; i < cb->capture_top * 2; i += 2)
1549        {
1550        if (cb->offset_vector[i] < 0)
1551          fprintf(f, "%2d: <unset>\n", i/2);
1552        else
1553          {
1554          fprintf(f, "%2d: ", i/2);
1555          PCHARSV(cb->subject, cb->offset_vector[i],
1556            cb->offset_vector[i+1] - cb->offset_vector[i], f);
1557          fprintf(f, "\n");
1558          }
1559        }
1560      }
1561    
1562  /* Use the internal info for displaying the results of pcre_study(). */  /* Re-print the subject in canonical form, the first time or if giving full
1563    datails. On subsequent calls in the same match, we use pchars just to find the
1564    printed lengths of the substrings. */
1565    
1566  #include "internal.h"  if (f != NULL) fprintf(f, "--->");
1567    
1568  /* It is possible to compile this test program without including support for  PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
1569  testing the POSIX interface, though this is not available via the standard  PCHARS(post_start, cb->subject, cb->start_match,
1570  Makefile. */    cb->current_position - cb->start_match, f);
1571    
1572  #if !defined NOPOSIX  PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
 #include "pcreposix.h"  
 #endif  
1573    
1574  #ifndef CLOCKS_PER_SEC  PCHARSV(cb->subject, cb->current_position,
1575  #ifdef CLK_TCK    cb->subject_length - cb->current_position, f);
1576  #define CLOCKS_PER_SEC CLK_TCK  
1577    if (f != NULL) fprintf(f, "\n");
1578    
1579    /* Always print appropriate indicators, with callout number if not already
1580    shown. For automatic callouts, show the pattern offset. */
1581    
1582    if (cb->callout_number == 255)
1583      {
1584      fprintf(outfile, "%+3d ", cb->pattern_position);
1585      if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
1586      }
1587    else
1588      {
1589      if (callout_extra) fprintf(outfile, "    ");
1590        else fprintf(outfile, "%3d ", cb->callout_number);
1591      }
1592    
1593    for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
1594    fprintf(outfile, "^");
1595    
1596    if (post_start > 0)
1597      {
1598      for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
1599      fprintf(outfile, "^");
1600      }
1601    
1602    for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
1603      fprintf(outfile, " ");
1604    
1605    fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
1606      pbuffer + cb->pattern_position);
1607    
1608    fprintf(outfile, "\n");
1609    first_callout = 0;
1610    
1611    if (cb->mark != last_callout_mark)
1612      {
1613      if (cb->mark == NULL)
1614        fprintf(outfile, "Latest Mark: <unset>\n");
1615      else
1616        {
1617        fprintf(outfile, "Latest Mark: ");
1618        PCHARSV(cb->mark, 0, -1, outfile);
1619        putc('\n', outfile);
1620        }
1621      last_callout_mark = cb->mark;
1622      }
1623    
1624    if (cb->callout_data != NULL)
1625      {
1626      int callout_data = *((int *)(cb->callout_data));
1627      if (callout_data != 0)
1628        {
1629        fprintf(outfile, "Callout data = %d\n", callout_data);
1630        return callout_data;
1631        }
1632      }
1633    
1634    return (cb->callout_number != callout_fail_id)? 0 :
1635           (++callout_count >= callout_fail_count)? 1 : 0;
1636    }
1637    
1638    
1639    /*************************************************
1640    *            Local malloc functions              *
1641    *************************************************/
1642    
1643    /* Alternative malloc function, to test functionality and save the size of a
1644    compiled re, which is the first store request that pcre_compile() makes. The
1645    show_malloc variable is set only during matching. */
1646    
1647    static void *new_malloc(size_t size)
1648    {
1649    void *block = malloc(size);
1650    gotten_store = size;
1651    if (first_gotten_store == 0) first_gotten_store = size;
1652    if (show_malloc)
1653      fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
1654    return block;
1655    }
1656    
1657    static void new_free(void *block)
1658    {
1659    if (show_malloc)
1660      fprintf(outfile, "free             %p\n", block);
1661    free(block);
1662    }
1663    
1664    /* For recursion malloc/free, to test stacking calls */
1665    
1666    static void *stack_malloc(size_t size)
1667    {
1668    void *block = malloc(size);
1669    if (show_malloc)
1670      fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1671    return block;
1672    }
1673    
1674    static void stack_free(void *block)
1675    {
1676    if (show_malloc)
1677      fprintf(outfile, "stack_free       %p\n", block);
1678    free(block);
1679    }
1680    
1681    
1682    /*************************************************
1683    *          Call pcre_fullinfo()                  *
1684    *************************************************/
1685    
1686    /* Get one piece of information from the pcre_fullinfo() function. When only
1687    one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1688    value, but the code is defensive.
1689    
1690    Arguments:
1691      re        compiled regex
1692      study     study data
1693      option    PCRE_INFO_xxx option
1694      ptr       where to put the data
1695    
1696    Returns:    0 when OK, < 0 on error
1697    */
1698    
1699    static int
1700    new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1701    {
1702    int rc;
1703    
1704    if (use_pcre16)
1705    #ifdef SUPPORT_PCRE16
1706      rc = pcre16_fullinfo(re, study, option, ptr);
1707  #else  #else
1708  #define CLOCKS_PER_SEC 100    rc = PCRE_ERROR_BADMODE;
1709  #endif  #endif
1710    else
1711    #ifdef SUPPORT_PCRE8
1712      rc = pcre_fullinfo(re, study, option, ptr);
1713    #else
1714      rc = PCRE_ERROR_BADMODE;
1715  #endif  #endif
1716    
1717  #define LOOPREPEAT 20000  if (rc < 0)
1718      {
1719      fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1720        use_pcre16? "16" : "", option);
1721      if (rc == PCRE_ERROR_BADMODE)
1722        fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
1723          "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
1724      }
1725    
1726    return rc;
1727    }
1728    
 static FILE *outfile;  
 static int log_store = 0;  
 static size_t gotten_store;  
1729    
1730    
1731    /*************************************************
1732    *             Swap byte functions                *
1733    *************************************************/
1734    
1735  /* Debugging function to print the internal form of the regex. This is the same  /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
1736  code as contained in pcre.c under the DEBUG macro. */  value, respectively.
1737    
1738  static const char *OP_names[] = {  Arguments:
1739    "End", "\\A", "\\B", "\\b", "\\D", "\\d",    value        any number
   "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",  
   "Opt", "^", "$", "Any", "chars", "not",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{",  
   "class", "Ref", "Recurse",  
   "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",  
   "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",  
   "Brazero", "Braminzero", "Bra"  
 };  
1740    
1741    Returns:       the byte swapped value
1742    */
1743    
1744  static void print_internals(pcre *re)  static pcre_uint32
1745    swap_uint32(pcre_uint32 value)
1746  {  {
1747  unsigned char *code = ((real_pcre *)re)->code;  return ((value & 0x000000ff) << 24) |
1748           ((value & 0x0000ff00) <<  8) |
1749           ((value & 0x00ff0000) >>  8) |
1750           (value >> 24);
1751    }
1752    
1753  fprintf(outfile, "------------------------------------------------------------------\n");  static pcre_uint16
1754    swap_uint16(pcre_uint16 value)
1755    {
1756    return (value >> 8) | (value << 8);
1757    }
1758    
 for(;;)  
   {  
   int c;  
   int charlength;  
1759    
   fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));  
1760    
1761    if (*code >= OP_BRA)  /*************************************************
1762      {  *        Flip bytes in a compiled pattern        *
1763      fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);  *************************************************/
     code += 2;  
     }  
1764    
1765    else switch(*code)  /* This function is called if the 'F' option was present on a pattern that is
1766      {  to be written to a file. We flip the bytes of all the integer fields in the
1767      case OP_END:  regex data block and the study block. In 16-bit mode this also flips relevant
1768      fprintf(outfile, "    %s\n", OP_names[*code]);  bytes in the pattern itself. This is to make it possible to test PCRE's
1769      fprintf(outfile, "------------------------------------------------------------------\n");  ability to reload byte-flipped patterns, e.g. those compiled on a different
1770      return;  architecture. */
1771    
1772      case OP_OPT:  static void
1773      fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);  regexflip(pcre *ere, pcre_extra *extra)
1774      code++;  {
1775      break;  real_pcre *re = (real_pcre *)ere;
1776    #ifdef SUPPORT_PCRE16
1777    int op;
1778    pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1779    int length = re->name_count * re->name_entry_size;
1780    #ifdef SUPPORT_UTF
1781    BOOL utf = (re->options & PCRE_UTF16) != 0;
1782    BOOL utf16_char = FALSE;
1783    #endif /* SUPPORT_UTF */
1784    #endif /* SUPPORT_PCRE16 */
1785    
1786    /* Always flip the bytes in the main data block and study blocks. */
1787    
1788    re->magic_number = REVERSED_MAGIC_NUMBER;
1789    re->size = swap_uint32(re->size);
1790    re->options = swap_uint32(re->options);
1791    re->flags = swap_uint16(re->flags);
1792    re->top_bracket = swap_uint16(re->top_bracket);
1793    re->top_backref = swap_uint16(re->top_backref);
1794    re->first_char = swap_uint16(re->first_char);
1795    re->req_char = swap_uint16(re->req_char);
1796    re->name_table_offset = swap_uint16(re->name_table_offset);
1797    re->name_entry_size = swap_uint16(re->name_entry_size);
1798    re->name_count = swap_uint16(re->name_count);
1799    
1800      case OP_COND:  if (extra != NULL)
1801      fprintf(outfile, "%3d Cond", (code[1] << 8) + code[2]);    {
1802      code += 2;    pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1803      break;    rsd->size = swap_uint32(rsd->size);
1804      rsd->flags = swap_uint32(rsd->flags);
1805      rsd->minlength = swap_uint32(rsd->minlength);
1806      }
1807    
1808      case OP_CREF:  /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1809      fprintf(outfile, " %.2d %s", code[1], OP_names[*code]);  in the name table, if present, and then in the pattern itself. */
     code++;  
     break;  
1810    
1811      case OP_CHARS:  #ifdef SUPPORT_PCRE16
1812      charlength = *(++code);  if (!use_pcre16) return;
     fprintf(outfile, "%3d ", charlength);  
     while (charlength-- > 0)  
       if (isprint(c = *(++code))) fprintf(outfile, "%c", c);  
         else fprintf(outfile, "\\x%02x", c);  
     break;  
1813    
1814      case OP_KETRMAX:  while(TRUE)
1815      case OP_KETRMIN:    {
1816      case OP_ALT:    /* Swap previous characters. */
1817      case OP_KET:    while (length-- > 0)
1818      case OP_ASSERT:      {
1819      case OP_ASSERT_NOT:      *ptr = swap_uint16(*ptr);
1820      case OP_ASSERTBACK:      ptr++;
1821      case OP_ASSERTBACK_NOT:      }
1822      case OP_ONCE:  #ifdef SUPPORT_UTF
1823      fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);    if (utf16_char)
1824      code += 2;      {
1825      break;      if ((ptr[-1] & 0xfc00) == 0xd800)
1826          {
1827          /* We know that there is only one extra character in UTF-16. */
1828          *ptr = swap_uint16(*ptr);
1829          ptr++;
1830          }
1831        }
1832      utf16_char = FALSE;
1833    #endif /* SUPPORT_UTF */
1834    
1835      case OP_REVERSE:    /* Get next opcode. */
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
1836    
1837      length = 0;
1838      op = *ptr;
1839      *ptr++ = swap_uint16(op);
1840    
1841      switch (op)
1842        {
1843        case OP_END:
1844        return;
1845    
1846    #ifdef SUPPORT_UTF
1847        case OP_CHAR:
1848        case OP_CHARI:
1849        case OP_NOT:
1850        case OP_NOTI:
1851      case OP_STAR:      case OP_STAR:
1852      case OP_MINSTAR:      case OP_MINSTAR:
1853      case OP_PLUS:      case OP_PLUS:
1854      case OP_MINPLUS:      case OP_MINPLUS:
1855      case OP_QUERY:      case OP_QUERY:
1856      case OP_MINQUERY:      case OP_MINQUERY:
     case OP_TYPESTAR:  
     case OP_TYPEMINSTAR:  
     case OP_TYPEPLUS:  
     case OP_TYPEMINPLUS:  
     case OP_TYPEQUERY:  
     case OP_TYPEMINQUERY:  
     if (*code >= OP_TYPESTAR)  
       fprintf(outfile, "    %s", OP_names[code[1]]);  
     else if (isprint(c = code[1])) fprintf(outfile, "    %c", c);  
       else fprintf(outfile, "    \\x%02x", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_EXACT:  
1857      case OP_UPTO:      case OP_UPTO:
1858      case OP_MINUPTO:      case OP_MINUPTO:
1859      if (isprint(c = code[3])) fprintf(outfile, "    %c{", c);      case OP_EXACT:
1860        else fprintf(outfile, "    \\x%02x{", c);      case OP_POSSTAR:
1861      if (*code != OP_EXACT) fprintf(outfile, ",");      case OP_POSPLUS:
1862      fprintf(outfile, "%d}", (code[1] << 8) + code[2]);      case OP_POSQUERY:
1863      if (*code == OP_MINUPTO) fprintf(outfile, "?");      case OP_POSUPTO:
1864      code += 3;      case OP_STARI:
1865      break;      case OP_MINSTARI:
1866        case OP_PLUSI:
1867      case OP_TYPEEXACT:      case OP_MINPLUSI:
1868      case OP_TYPEUPTO:      case OP_QUERYI:
1869      case OP_TYPEMINUPTO:      case OP_MINQUERYI:
1870      fprintf(outfile, "    %s{", OP_names[code[3]]);      case OP_UPTOI:
1871      if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");      case OP_MINUPTOI:
1872      fprintf(outfile, "%d}", (code[1] << 8) + code[2]);      case OP_EXACTI:
1873      if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");      case OP_POSSTARI:
1874      code += 3;      case OP_POSPLUSI:
1875      break;      case OP_POSQUERYI:
1876        case OP_POSUPTOI:
     case OP_NOT:  
     if (isprint(c = *(++code))) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     break;  
   
1877      case OP_NOTSTAR:      case OP_NOTSTAR:
1878      case OP_NOTMINSTAR:      case OP_NOTMINSTAR:
1879      case OP_NOTPLUS:      case OP_NOTPLUS:
1880      case OP_NOTMINPLUS:      case OP_NOTMINPLUS:
1881      case OP_NOTQUERY:      case OP_NOTQUERY:
1882      case OP_NOTMINQUERY:      case OP_NOTMINQUERY:
     if (isprint(c = code[1])) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_NOTEXACT:  
1883      case OP_NOTUPTO:      case OP_NOTUPTO:
1884      case OP_NOTMINUPTO:      case OP_NOTMINUPTO:
1885      if (isprint(c = code[3])) fprintf(outfile, "    [^%c]{", c);      case OP_NOTEXACT:
1886        else fprintf(outfile, "    [^\\x%02x]{", c);      case OP_NOTPOSSTAR:
1887      if (*code != OP_NOTEXACT) fprintf(outfile, ",");      case OP_NOTPOSPLUS:
1888      fprintf(outfile, "%d}", (code[1] << 8) + code[2]);      case OP_NOTPOSQUERY:
1889      if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");      case OP_NOTPOSUPTO:
1890      code += 3;      case OP_NOTSTARI:
1891      break;      case OP_NOTMINSTARI:
1892        case OP_NOTPLUSI:
1893        case OP_NOTMINPLUSI:
1894        case OP_NOTQUERYI:
1895        case OP_NOTMINQUERYI:
1896        case OP_NOTUPTOI:
1897        case OP_NOTMINUPTOI:
1898        case OP_NOTEXACTI:
1899        case OP_NOTPOSSTARI:
1900        case OP_NOTPOSPLUSI:
1901        case OP_NOTPOSQUERYI:
1902        case OP_NOTPOSUPTOI:
1903        if (utf) utf16_char = TRUE;
1904    #endif
1905        /* Fall through. */
1906    
1907      case OP_REF:      default:
1908      fprintf(outfile, "    \\%d", *(++code));      length = OP_lengths16[op] - 1;
1909      code++;      break;
     goto CLASS_REF_REPEAT;  
1910    
1911      case OP_CLASS:      case OP_CLASS:
1912        case OP_NCLASS:
1913        /* Skip the character bit map. */
1914        ptr += 32/sizeof(pcre_uint16);
1915        length = 0;
1916        break;
1917    
1918        case OP_XCLASS:
1919        /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
1920        if (LINK_SIZE > 1)
1921          length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
1922            - (1 + LINK_SIZE + 1));
1923        else
1924          length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
1925    
1926        /* Reverse the size of the XCLASS instance. */
1927        *ptr = swap_uint16(*ptr);
1928        ptr++;
1929        if (LINK_SIZE > 1)
1930        {        {
1931        int i, min, max;        *ptr = swap_uint16(*ptr);
1932        code++;        ptr++;
1933        fprintf(outfile, "    [");        }
1934    
1935        for (i = 0; i < 256; i++)      op = *ptr;
1936          {      *ptr = swap_uint16(op);
1937          if ((code[i/8] & (1 << (i&7))) != 0)      ptr++;
1938            {      if ((op & XCL_MAP) != 0)
1939            int j;        {
1940            for (j = i+1; j < 256; j++)        /* Skip the character bit map. */
1941              if ((code[j/8] & (1 << (j&7))) == 0) break;        ptr += 32/sizeof(pcre_uint16);
1942            if (i == '-' || i == ']') fprintf(outfile, "\\");        length -= 32/sizeof(pcre_uint16);
1943            if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);        }
1944            if (--j > i)      break;
1945              {      }
1946              fprintf(outfile, "-");    }
1947              if (j == '-' || j == ']') fprintf(outfile, "\\");  /* Control should never reach here in 16 bit mode. */
1948              if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);  #endif /* SUPPORT_PCRE16 */
1949              }  }
           i = j;  
           }  
         }  
       fprintf(outfile, "]");  
       code += 32;  
1950    
       CLASS_REF_REPEAT:  
1951    
       switch(*code)  
         {  
         case OP_CRSTAR:  
         case OP_CRMINSTAR:  
         case OP_CRPLUS:  
         case OP_CRMINPLUS:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         fprintf(outfile, "%s", OP_names[*code]);  
         break;  
1952    
1953          case OP_CRRANGE:  /*************************************************
1954          case OP_CRMINRANGE:  *        Check match or recursion limit          *
1955          min = (code[1] << 8) + code[2];  *************************************************/
         max = (code[3] << 8) + code[4];  
         if (max == 0) fprintf(outfile, "{%d,}", min);  
         else fprintf(outfile, "{%d,%d}", min, max);  
         if (*code == OP_CRMINRANGE) fprintf(outfile, "?");  
         code += 4;  
         break;  
1956    
1957          default:  static int
1958          code--;  check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1959          }    int start_offset, int options, int *use_offsets, int use_size_offsets,
1960        }    int flag, unsigned long int *limit, int errnumber, const char *msg)
1961      break;  {
1962    int count;
1963    int min = 0;
1964    int mid = 64;
1965    int max = -1;
1966    
1967      /* Anything else is just a one-node item */  extra->flags |= flag;
1968    
1969      default:  for (;;)
1970      fprintf(outfile, "    %s", OP_names[*code]);    {
1971      break;    *limit = mid;
1972    
1973      PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
1974        use_offsets, use_size_offsets);
1975    
1976      if (count == errnumber)
1977        {
1978        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1979        min = mid;
1980        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1981      }      }
1982    
1983    code++;    else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1984    fprintf(outfile, "\n");                           count == PCRE_ERROR_PARTIAL)
1985        {
1986        if (mid == min + 1)
1987          {
1988          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1989          break;
1990          }
1991        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1992        max = mid;
1993        mid = (min + mid)/2;
1994        }
1995      else break;    /* Some other error */
1996    }    }
1997    
1998    extra->flags &= ~flag;
1999    return count;
2000  }  }
2001    
2002    
2003    
2004  /* Character string printing function. */  /*************************************************
2005    *         Case-independent strncmp() function    *
2006    *************************************************/
2007    
2008    /*
2009    Arguments:
2010      s         first string
2011      t         second string
2012      n         number of characters to compare
2013    
2014    Returns:    < 0, = 0, or > 0, according to the comparison
2015    */
2016    
2017  static void pchars(unsigned char *p, int length)  static int
2018    strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2019  {  {
2020  int c;  while (n--)
2021  while (length-- > 0)    {
2022    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);    int c = tolower(*s++) - tolower(*t++);
2023      else fprintf(outfile, "\\x%02x", c);    if (c) return c;
2024      }
2025    return 0;
2026  }  }
2027    
2028    
2029    
2030  /* Alternative malloc function, to test functionality and show the size of the  /*************************************************
2031  compiled re. */  *         Check newline indicator                *
2032    *************************************************/
2033    
2034  static void *new_malloc(size_t size)  /* This is used both at compile and run-time to check for <xxx> escapes. Print
2035    a message and return 0 if there is no match.
2036    
2037    Arguments:
2038      p           points after the leading '<'
2039      f           file for error message
2040    
2041    Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
2042    */
2043    
2044    static int
2045    check_newline(pcre_uint8 *p, FILE *f)
2046  {  {
2047  gotten_store = size;  if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2048  if (log_store)  if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2049    fprintf(outfile, "Memory allocation (code space): %d\n",  if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2050      (int)((int)size - offsetof(real_pcre, code[0])));  if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2051  return malloc(size);  if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2052    if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2053    if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2054    fprintf(f, "Unknown newline type at: <%s\n", p);
2055    return 0;
2056  }  }
2057    
2058    
2059    
2060    /*************************************************
2061    *             Usage function                     *
2062    *************************************************/
2063    
2064  /* Get one piece of information from the pcre_fullinfo() function */  static void
2065    usage(void)
 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)  
2066  {  {
2067  int rc;  printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
2068  if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)  printf("Input and output default to stdin and stdout.\n");
2069    fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);  #ifdef SUPPORT_LIBREADLINE
2070    printf("If input is a terminal, readline() is used to read from it.\n");
2071    #else
2072    printf("This version of pcretest is not linked with readline().\n");
2073    #endif
2074    printf("\nOptions:\n");
2075    #ifdef SUPPORT_PCRE16
2076    printf("  -16      use 16-bit interface\n");
2077    #endif
2078    printf("  -b       show compiled code (bytecode)\n");
2079    printf("  -C       show PCRE compile-time options and exit\n");
2080    printf("  -C arg   show a specific compile-time option\n");
2081    printf("           and exit with its value. The arg can be:\n");
2082    printf("     linksize     internal link size [2, 3, 4]\n");
2083    printf("     pcre8        8 bit library support enabled [0, 1]\n");
2084    printf("     pcre16       16 bit library support enabled [0, 1]\n");
2085    printf("     utf          Unicode Transformation Format supported [0, 1]\n");
2086    printf("     ucp          Unicode Properties supported [0, 1]\n");
2087    printf("     jit          Just-in-time compiler supported [0, 1]\n");
2088    printf("     newline      Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2089    printf("  -d       debug: show compiled code and information (-b and -i)\n");
2090    #if !defined NODFA
2091    printf("  -dfa     force DFA matching for all subjects\n");
2092    #endif
2093    printf("  -help    show usage information\n");
2094    printf("  -i       show information about compiled patterns\n"
2095           "  -M       find MATCH_LIMIT minimum for each subject\n"
2096           "  -m       output memory used information\n"
2097           "  -o <n>   set size of offsets vector to <n>\n");
2098    #if !defined NOPOSIX
2099    printf("  -p       use POSIX interface\n");
2100    #endif
2101    printf("  -q       quiet: do not output PCRE version number at start\n");
2102    printf("  -S <n>   set stack size to <n> megabytes\n");
2103    printf("  -s       force each pattern to be studied at basic level\n"
2104           "  -s+      force each pattern to be studied, using JIT if available\n"
2105           "  -t       time compilation and execution\n");
2106    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
2107    printf("  -tm      time execution (matching) only\n");
2108    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
2109  }  }
2110    
2111    
2112    
2113    /*************************************************
2114    *                Main Program                    *
2115    *************************************************/
2116    
2117  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
2118  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
# Line 311  options, followed by a set of test data, Line 2121  options, followed by a set of test data,
2121  int main(int argc, char **argv)  int main(int argc, char **argv)
2122  {  {
2123  FILE *infile = stdin;  FILE *infile = stdin;
2124    const char *version;
2125  int options = 0;  int options = 0;
2126  int study_options = 0;  int study_options = 0;
2127    int default_find_match_limit = FALSE;
2128  int op = 1;  int op = 1;
2129  int timeit = 0;  int timeit = 0;
2130    int timeitm = 0;
2131  int showinfo = 0;  int showinfo = 0;
2132  int showstore = 0;  int showstore = 0;
2133    int force_study = -1;
2134    int force_study_options = 0;
2135    int quiet = 0;
2136    int size_offsets = 45;
2137    int size_offsets_max;
2138    int *offsets = NULL;
2139    #if !defined NOPOSIX
2140  int posix = 0;  int posix = 0;
2141    #endif
2142  int debug = 0;  int debug = 0;
2143  int done = 0;  int done = 0;
2144  unsigned char buffer[30000];  int all_use_dfa = 0;
2145  unsigned char dbuffer[1024];  int yield = 0;
2146    int stack_size;
2147    
2148    pcre_jit_stack *jit_stack = NULL;
2149    
2150    /* These vectors store, end-to-end, a list of zero-terminated captured
2151    substring names, each list itself being terminated by an empty name. Assume
2152    that 1024 is plenty long enough for the few names we'll be testing. It is
2153    easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
2154    for the actual memory, to ensure alignment. By defining these variables always
2155    (whether or not 8-bit or 16-bit is supported), we avoid too much mess with
2156    #ifdefs in the code. */
2157    
2158    pcre_uint16 copynames[1024];
2159    pcre_uint16 getnames[1024];
2160    
2161    pcre_uint16 *cn16ptr;
2162    pcre_uint16 *gn16ptr;
2163    
2164    pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2165    pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2166    pcre_uint8 *cn8ptr;
2167    pcre_uint8 *gn8ptr;
2168    
2169    /* Get buffers from malloc() so that valgrind will check their misuse when
2170    debugging. They grow automatically when very long lines are read. The 16-bit
2171    buffer (buffer16) is obtained only if needed. */
2172    
2173    buffer = (pcre_uint8 *)malloc(buffer_size);
2174    dbuffer = (pcre_uint8 *)malloc(buffer_size);
2175    pbuffer = (pcre_uint8 *)malloc(buffer_size);
2176    
2177  /* Static so that new_malloc can use it. */  /* The outfile variable is static so that new_malloc can use it. */
2178    
2179  outfile = stdout;  outfile = stdout;
2180    
2181    /* The following  _setmode() stuff is some Windows magic that tells its runtime
2182    library to translate CRLF into a single LF character. At least, that's what
2183    I've been told: never having used Windows I take this all on trust. Originally
2184    it set 0x8000, but then I was advised that _O_BINARY was better. */
2185    
2186    #if defined(_WIN32) || defined(WIN32)
2187    _setmode( _fileno( stdout ), _O_BINARY );
2188    #endif
2189    
2190    /* Get the version number: both pcre_version() and pcre16_version() give the
2191    same answer. We just need to ensure that we call one that is available. */
2192    
2193    #ifdef SUPPORT_PCRE8
2194    version = pcre_version();
2195    #else
2196    version = pcre16_version();
2197    #endif
2198    
2199  /* Scan options */  /* Scan options */
2200    
2201  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
2202    {    {
2203    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    pcre_uint8 *endptr;
2204      showstore = 1;  
2205    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    if (strcmp(argv[op], "-m") == 0) showstore = 1;
2206      else if (strcmp(argv[op], "-s") == 0) force_study = 0;
2207      else if (strcmp(argv[op], "-s+") == 0)
2208        {
2209        force_study = 1;
2210        force_study_options = PCRE_STUDY_JIT_COMPILE;
2211        }
2212      else if (strcmp(argv[op], "-16") == 0)
2213        {
2214    #ifdef SUPPORT_PCRE16
2215        use_pcre16 = 1;
2216    #else
2217        printf("** This version of PCRE was built without 16-bit support\n");
2218        exit(1);
2219    #endif
2220        }
2221      else if (strcmp(argv[op], "-q") == 0) quiet = 1;
2222      else if (strcmp(argv[op], "-b") == 0) debug = 1;
2223    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
2224    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
2225      else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
2226    #if !defined NODFA
2227      else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
2228    #endif
2229      else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
2230          ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2231            *endptr == 0))
2232        {
2233        op++;
2234        argc--;
2235        }
2236      else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
2237        {
2238        int both = argv[op][2] == 0;
2239        int temp;
2240        if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2241                         *endptr == 0))
2242          {
2243          timeitm = temp;
2244          op++;
2245          argc--;
2246          }
2247        else timeitm = LOOPREPEAT;
2248        if (both) timeit = timeitm;
2249        }
2250      else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
2251          ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2252            *endptr == 0))
2253        {
2254    #if defined(_WIN32) || defined(WIN32) || defined(__minix)
2255        printf("PCRE: -S not supported on this OS\n");
2256        exit(1);
2257    #else
2258        int rc;
2259        struct rlimit rlim;
2260        getrlimit(RLIMIT_STACK, &rlim);
2261        rlim.rlim_cur = stack_size * 1024 * 1024;
2262        rc = setrlimit(RLIMIT_STACK, &rlim);
2263        if (rc != 0)
2264          {
2265        printf("PCRE: setrlimit() failed with error %d\n", rc);
2266        exit(1);
2267          }
2268        op++;
2269        argc--;
2270    #endif
2271        }
2272    #if !defined NOPOSIX
2273    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
2274    #endif
2275      else if (strcmp(argv[op], "-C") == 0)
2276        {
2277        int rc;
2278        unsigned long int lrc;
2279    
2280        if (argc > 2)
2281          {
2282          if (strcmp(argv[op + 1], "linksize") == 0)
2283            {
2284            (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2285            printf("%d\n", rc);
2286            yield = rc;
2287            goto EXIT;
2288            }
2289          if (strcmp(argv[op + 1], "pcre8") == 0)
2290            {
2291    #ifdef SUPPORT_PCRE8
2292            printf("1\n");
2293            yield = 1;
2294    #else
2295            printf("0\n");
2296            yield = 0;
2297    #endif
2298            goto EXIT;
2299            }
2300          if (strcmp(argv[op + 1], "pcre16") == 0)
2301            {
2302    #ifdef SUPPORT_PCRE16
2303            printf("1\n");
2304            yield = 1;
2305    #else
2306            printf("0\n");
2307            yield = 0;
2308    #endif
2309            goto EXIT;
2310            }
2311          if (strcmp(argv[op + 1], "utf") == 0)
2312            {
2313    #ifdef SUPPORT_PCRE8
2314            (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2315            printf("%d\n", rc);
2316            yield = rc;
2317    #else
2318            (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2319            printf("%d\n", rc);
2320            yield = rc;
2321    #endif
2322            goto EXIT;
2323            }
2324          if (strcmp(argv[op + 1], "ucp") == 0)
2325            {
2326            (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2327            printf("%d\n", rc);
2328            yield = rc;
2329            goto EXIT;
2330            }
2331          if (strcmp(argv[op + 1], "jit") == 0)
2332            {
2333            (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2334            printf("%d\n", rc);
2335            yield = rc;
2336            goto EXIT;
2337            }
2338          if (strcmp(argv[op + 1], "newline") == 0)
2339            {
2340            (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2341            /* Note that these values are always the ASCII values, even
2342            in EBCDIC environments. CR is 13 and NL is 10. */
2343            printf("%s\n", (rc == 13)? "CR" :
2344              (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2345              (rc == -2)? "ANYCRLF" :
2346              (rc == -1)? "ANY" : "???");
2347            goto EXIT;
2348            }
2349          printf("Unknown -C option: %s\n", argv[op + 1]);
2350          goto EXIT;
2351          }
2352    
2353        printf("PCRE version %s\n", version);
2354        printf("Compiled with\n");
2355    
2356    /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2357    are set, either both UTFs are supported or both are not supported. */
2358    
2359    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2360        printf("  8-bit and 16-bit support\n");
2361        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2362        if (rc)
2363          printf("  UTF-8 and UTF-16 support\n");
2364        else
2365          printf("  No UTF-8 or UTF-16 support\n");
2366    #elif defined SUPPORT_PCRE8
2367        printf("  8-bit support only\n");
2368        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2369        printf("  %sUTF-8 support\n", rc? "" : "No ");
2370    #else
2371        printf("  16-bit support only\n");
2372        (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2373        printf("  %sUTF-16 support\n", rc? "" : "No ");
2374    #endif
2375    
2376        (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2377        printf("  %sUnicode properties support\n", rc? "" : "No ");
2378        (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2379        if (rc)
2380          printf("  Just-in-time compiler support\n");
2381        else
2382          printf("  No just-in-time compiler support\n");
2383        (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2384        /* Note that these values are always the ASCII values, even
2385        in EBCDIC environments. CR is 13 and NL is 10. */
2386        printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
2387          (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2388          (rc == -2)? "ANYCRLF" :
2389          (rc == -1)? "ANY" : "???");
2390        (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
2391        printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
2392                                         "all Unicode newlines");
2393        (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2394        printf("  Internal link size = %d\n", rc);
2395        (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
2396        printf("  POSIX malloc threshold = %d\n", rc);
2397        (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
2398        printf("  Default match limit = %ld\n", lrc);
2399        (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
2400        printf("  Default recursion depth limit = %ld\n", lrc);
2401        (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
2402        printf("  Match recursion uses %s\n", rc? "stack" : "heap");
2403        goto EXIT;
2404        }
2405      else if (strcmp(argv[op], "-help") == 0 ||
2406               strcmp(argv[op], "--help") == 0)
2407        {
2408        usage();
2409        goto EXIT;
2410        }
2411    else    else
2412      {      {
2413      printf("*** Unknown option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
2414      printf("Usage: pcretest [-d] [-i] [-p] [-s] [-t] [<input> [<output>]]\n");      usage();
2415      printf("  -d   debug: show compiled code; implies -i\n"      yield = 1;
2416             "  -i   show information about compiled pattern\n"      goto EXIT;
            "  -p   use POSIX interface\n"  
            "  -s   output store information\n"  
            "  -t   time compilation and execution\n");  
     return 1;  
2417      }      }
2418    op++;    op++;
2419    argc--;    argc--;
2420    }    }
2421    
2422    /* Get the store for the offsets vector, and remember what it was */
2423    
2424    size_offsets_max = size_offsets;
2425    offsets = (int *)malloc(size_offsets_max * sizeof(int));
2426    if (offsets == NULL)
2427      {
2428      printf("** Failed to get %d bytes of memory for offsets vector\n",
2429        (int)(size_offsets_max * sizeof(int)));
2430      yield = 1;
2431      goto EXIT;
2432      }
2433    
2434  /* Sort out the input and output files */  /* Sort out the input and output files */
2435    
2436  if (argc > 1)  if (argc > 1)
2437    {    {
2438    infile = fopen(argv[op], "r");    infile = fopen(argv[op], INPUT_MODE);
2439    if (infile == NULL)    if (infile == NULL)
2440      {      {
2441      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
2442      return 1;      yield = 1;
2443        goto EXIT;
2444      }      }
2445    }    }
2446    
2447  if (argc > 2)  if (argc > 2)
2448    {    {
2449    outfile = fopen(argv[op+1], "w");    outfile = fopen(argv[op+1], OUTPUT_MODE);
2450    if (outfile == NULL)    if (outfile == NULL)
2451      {      {
2452      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
2453      return 1;      yield = 1;
2454        goto EXIT;
2455      }      }
2456    }    }
2457    
2458  /* Set alternative malloc function */  /* Set alternative malloc function */
2459    
2460    #ifdef SUPPORT_PCRE8
2461  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
2462    pcre_free = new_free;
2463    pcre_stack_malloc = stack_malloc;
2464    pcre_stack_free = stack_free;
2465    #endif
2466    
2467    #ifdef SUPPORT_PCRE16
2468    pcre16_malloc = new_malloc;
2469    pcre16_free = new_free;
2470    pcre16_stack_malloc = stack_malloc;
2471    pcre16_stack_free = stack_free;
2472    #endif
2473    
2474  /* Heading line, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
2475    
2476  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
2477    
2478  /* Main loop */  /* Main loop */
2479    
# Line 391  while (!done) Line 2484  while (!done)
2484    
2485  #if !defined NOPOSIX  /* There are still compilers that require no indent */  #if !defined NOPOSIX  /* There are still compilers that require no indent */
2486    regex_t preg;    regex_t preg;
2487      int do_posix = 0;
2488  #endif  #endif
2489    
2490    const char *error;    const char *error;
2491    unsigned char *p, *pp, *ppp;    pcre_uint8 *markptr;
2492    unsigned const char *tables = NULL;    pcre_uint8 *p, *pp, *ppp;
2493      pcre_uint8 *to_file = NULL;
2494      const pcre_uint8 *tables = NULL;
2495      unsigned long int true_size, true_study_size = 0;
2496      size_t size, regex_gotten_store;
2497      int do_allcaps = 0;
2498      int do_mark = 0;
2499    int do_study = 0;    int do_study = 0;
2500      int no_force_study = 0;
2501    int do_debug = debug;    int do_debug = debug;
2502    int do_G = 0;    int do_G = 0;
2503    int do_g = 0;    int do_g = 0;
2504    int do_showinfo = showinfo;    int do_showinfo = showinfo;
2505    int do_showrest = 0;    int do_showrest = 0;
2506    int do_posix = 0;    int do_showcaprest = 0;
2507    int erroroffset, len, delimiter;    int do_flip = 0;
2508      int erroroffset, len, delimiter, poffset;
2509    
2510      use_utf = 0;
2511      debug_lengths = 1;
2512    
2513    if (infile == stdin) printf("  re> ");    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
   if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;  
2514    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2515      fflush(outfile);
2516    
2517    p = buffer;    p = buffer;
2518    while (isspace(*p)) p++;    while (isspace(*p)) p++;
2519    if (*p == 0) continue;    if (*p == 0) continue;
2520    
2521    /* Get the delimiter and seek the end of the pattern; if is isn't    /* See if the pattern is to be loaded pre-compiled from a file. */
2522    complete, read more. */  
2523      if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2524        {
2525        unsigned long int magic, get_options;
2526        pcre_uint8 sbuf[8];
2527        FILE *f;
2528    
2529        p++;
2530        if (*p == '!')
2531          {
2532          do_debug = TRUE;
2533          do_showinfo = TRUE;
2534          p++;
2535          }
2536    
2537        pp = p + (int)strlen((char *)p);
2538        while (isspace(pp[-1])) pp--;
2539        *pp = 0;
2540    
2541        f = fopen((char *)p, "rb");
2542        if (f == NULL)
2543          {
2544          fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
2545          continue;
2546          }
2547    
2548        first_gotten_store = 0;
2549        if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2550    
2551        true_size =
2552          (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
2553        true_study_size =
2554          (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2555    
2556        re = (real_pcre *)new_malloc(true_size);
2557        regex_gotten_store = first_gotten_store;
2558    
2559        if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2560    
2561        magic = ((real_pcre *)re)->magic_number;
2562        if (magic != MAGIC_NUMBER)
2563          {
2564          if (swap_uint32(magic) == MAGIC_NUMBER)
2565            {
2566            do_flip = 1;
2567            }
2568          else
2569            {
2570            fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
2571            fclose(f);
2572            continue;
2573            }
2574          }
2575    
2576        /* We hide the byte-invert info for little and big endian tests. */
2577        fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2578          do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
2579    
2580        /* Now see if there is any following study data. */
2581    
2582        if (true_study_size != 0)
2583          {
2584          pcre_study_data *psd;
2585    
2586          extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
2587          extra->flags = PCRE_EXTRA_STUDY_DATA;
2588    
2589          psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
2590          extra->study_data = psd;
2591    
2592          if (fread(psd, 1, true_study_size, f) != true_study_size)
2593            {
2594            FAIL_READ:
2595            fprintf(outfile, "Failed to read data from %s\n", p);
2596            if (extra != NULL)
2597              {
2598              PCRE_FREE_STUDY(extra);
2599              }
2600            if (re != NULL) new_free(re);
2601            fclose(f);
2602            continue;
2603            }
2604          fprintf(outfile, "Study data loaded from %s\n", p);
2605          do_study = 1;     /* To get the data output if requested */
2606          }
2607        else fprintf(outfile, "No study data\n");
2608    
2609        /* Flip the necessary bytes. */
2610        if (do_flip)
2611          {
2612          int rc;
2613          PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
2614          if (rc == PCRE_ERROR_BADMODE)
2615            {
2616            /* Simulate the result of the function call below. */
2617            fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2618              use_pcre16? "16" : "", PCRE_INFO_OPTIONS);
2619            fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
2620              "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
2621            continue;
2622            }
2623          }
2624    
2625        /* Need to know if UTF-8 for printing data strings. */
2626    
2627        if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0) continue;
2628        use_utf = (get_options & PCRE_UTF8) != 0;
2629    
2630        fclose(f);
2631        goto SHOW_INFO;
2632        }
2633    
2634      /* In-line pattern (the usual case). Get the delimiter and seek the end of
2635      the pattern; if it isn't complete, read more. */
2636    
2637    delimiter = *p++;    delimiter = *p++;
2638    
2639    if (isalnum(delimiter) || delimiter == '\\')    if (isalnum(delimiter) || delimiter == '\\')
2640      {      {
2641      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");      fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
2642      goto SKIP_DATA;      goto SKIP_DATA;
2643      }      }
2644    
2645    pp = p;    pp = p;
2646      poffset = (int)(p - buffer);
2647    
2648    for(;;)    for(;;)
2649      {      {
# Line 435  while (!done) Line 2654  while (!done)
2654        pp++;        pp++;
2655        }        }
2656      if (*pp != 0) break;      if (*pp != 0) break;
2657        if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
     len = sizeof(buffer) - (pp - buffer);  
     if (len < 256)  
       {  
       fprintf(outfile, "** Expression too long - missing delimiter?\n");  
       goto SKIP_DATA;  
       }  
   
     if (infile == stdin) printf("    > ");  
     if (fgets((char *)pp, len, infile) == NULL)  
2658        {        {
2659        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
2660        done = 1;        done = 1;
# Line 453  while (!done) Line 2663  while (!done)
2663      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
2664      }      }
2665    
2666      /* The buffer may have moved while being extended; reset the start of data
2667      pointer to the correct relative point in the buffer. */
2668    
2669      p = buffer + poffset;
2670    
2671    /* If the first character after the delimiter is backslash, make    /* If the first character after the delimiter is backslash, make
2672    the pattern end with backslash. This is purely to provide a way    the pattern end with backslash. This is purely to provide a way
2673    of testing for the error message when a pattern ends with backslash. */    of testing for the error message when a pattern ends with backslash. */
2674    
2675    if (pp[1] == '\\') *pp++ = '\\';    if (pp[1] == '\\') *pp++ = '\\';
2676    
2677    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter, and save a copy of the pattern
2678      for callouts. */
2679    
2680    *pp++ = 0;    *pp++ = 0;
2681      strcpy((char *)pbuffer, (char *)p);
2682    
2683    /* Look for options after final delimiter */    /* Look for options after final delimiter */
2684    
# Line 473  while (!done) Line 2690  while (!done)
2690      {      {
2691      switch (*pp++)      switch (*pp++)
2692        {        {
2693          case 'f': options |= PCRE_FIRSTLINE; break;
2694        case 'g': do_g = 1; break;        case 'g': do_g = 1; break;
2695        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
2696        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
2697        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
2698        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
2699    
2700        case '+': do_showrest = 1; break;        case '+':
2701          if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2702          break;
2703    
2704          case '=': do_allcaps = 1; break;
2705        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
2706          case 'B': do_debug = 1; break;
2707          case 'C': options |= PCRE_AUTO_CALLOUT; break;
2708        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
2709        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
2710          case 'F': do_flip = 1; break;
2711        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
2712        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
2713          case 'J': options |= PCRE_DUPNAMES; break;
2714          case 'K': do_mark = 1; break;
2715        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
2716          case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
2717    
2718  #if !defined NOPOSIX  #if !defined NOPOSIX
2719        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
2720  #endif  #endif
2721    
2722        case 'S': do_study = 1; break;        case 'S':
2723          if (do_study == 0)
2724            {
2725            do_study = 1;
2726            if (*pp == '+')
2727              {
2728              study_options |= PCRE_STUDY_JIT_COMPILE;
2729              pp++;
2730              }
2731            }
2732          else
2733            {
2734            do_study = 0;
2735            no_force_study = 1;
2736            }
2737          break;
2738    
2739        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
2740          case 'W': options |= PCRE_UCP; break;
2741        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
2742          case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2743          case 'Z': debug_lengths = 0; break;
2744          case '8': options |= PCRE_UTF8; use_utf = 1; break;
2745          case '?': options |= PCRE_NO_UTF8_CHECK; break;
2746    
2747          case 'T':
2748          switch (*pp++)
2749            {
2750            case '0': tables = tables0; break;
2751            case '1': tables = tables1; break;
2752    
2753            case '\r':
2754            case '\n':
2755            case ' ':
2756            case 0:
2757            fprintf(outfile, "** Missing table number after /T\n");
2758            goto SKIP_DATA;
2759    
2760            default:
2761            fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2762            goto SKIP_DATA;
2763            }
2764          break;
2765    
2766        case 'L':        case 'L':
2767        ppp = pp;        ppp = pp;
2768        while (*ppp != '\n' && *ppp != ' ') ppp++;        /* The '\r' test here is so that it works on Windows. */
2769          /* The '0' test is just in case this is an unterminated line. */
2770          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
2771        *ppp = 0;        *ppp = 0;
2772        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
2773          {          {
2774          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
2775          goto SKIP_DATA;          goto SKIP_DATA;
2776          }          }
2777        tables = pcre_maketables();        locale_set = 1;
2778          tables = PCRE_MAKETABLES;
2779        pp = ppp;        pp = ppp;
2780        break;        break;
2781    
2782        case '\n': case ' ': break;        case '>':
2783          to_file = pp;
2784          while (*pp != 0) pp++;
2785          while (isspace(pp[-1])) pp--;
2786          *pp = 0;
2787          break;
2788    
2789          case '<':
2790            {
2791            if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2792              {
2793              options |= PCRE_JAVASCRIPT_COMPAT;
2794              pp += 3;
2795              }
2796            else
2797              {
2798              int x = check_newline(pp, outfile);
2799              if (x == 0) goto SKIP_DATA;
2800              options |= x;
2801              while (*pp++ != '>');
2802              }
2803            }
2804          break;
2805    
2806          case '\r':                      /* So that it works in Windows */
2807          case '\n':
2808          case ' ':
2809          break;
2810    
2811        default:        default:
2812        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
2813        goto SKIP_DATA;        goto SKIP_DATA;
# Line 517  while (!done) Line 2816  while (!done)
2816    
2817    /* Handle compiling via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
2818    timing, showing, or debugging options, nor the ability to pass over    timing, showing, or debugging options, nor the ability to pass over
2819    local character tables. */    local character tables. Neither does it have 16-bit support. */
2820    
2821  #if !defined NOPOSIX  #if !defined NOPOSIX
2822    if (posix || do_posix)    if (posix || do_posix)
2823      {      {
2824      int rc;      int rc;
2825      int cflags = 0;      int cflags = 0;
2826    
2827      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
2828      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
2829        if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
2830        if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
2831        if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
2832        if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2833        if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2834    
2835        first_gotten_store = 0;
2836      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
2837    
2838      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 533  while (!done) Line 2840  while (!done)
2840    
2841      if (rc != 0)      if (rc != 0)
2842        {        {
2843        (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));        (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2844        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
2845        goto SKIP_DATA;        goto SKIP_DATA;
2846        }        }
# Line 545  while (!done) Line 2852  while (!done)
2852  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
2853    
2854      {      {
2855      if (timeit)      unsigned long int get_options;
2856    
2857        /* In 16-bit mode, convert the input. */
2858    
2859    #ifdef SUPPORT_PCRE16
2860        if (use_pcre16)
2861          {
2862          switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
2863            {
2864            case -1:
2865            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2866              "converted to UTF-16\n");
2867            goto SKIP_DATA;
2868    
2869            case -2:
2870            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
2871              "cannot be converted to UTF-16\n");
2872            goto SKIP_DATA;
2873    
2874            case -3: /* "Impossible error" when to16 is called arg1 FALSE */
2875            fprintf(outfile, "**Failed: character value greater than 0xffff "
2876              "cannot be converted to 16-bit in non-UTF mode\n");
2877            goto SKIP_DATA;
2878    
2879            default:
2880            break;
2881            }
2882          p = (pcre_uint8 *)buffer16;
2883          }
2884    #endif
2885    
2886        /* Compile many times when timing */
2887    
2888        if (timeit > 0)
2889        {        {
2890        register int i;        register int i;
2891        clock_t time_taken;        clock_t time_taken;
2892        clock_t start_time = clock();        clock_t start_time = clock();
2893        for (i = 0; i < LOOPREPEAT; i++)        for (i = 0; i < timeit; i++)
2894          {          {
2895          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2896          if (re != NULL) free(re);          if (re != NULL) free(re);
2897          }          }
2898        time_taken = clock() - start_time;        time_taken = clock() - start_time;
2899        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.4f milliseconds\n",
2900          ((double)time_taken * 1000.0) /          (((double)time_taken * 1000.0) / (double)timeit) /
2901          ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));            (double)CLOCKS_PER_SEC);
2902        }        }
2903    
2904      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      first_gotten_store = 0;
2905        PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2906    
2907      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
2908      if non-interactive. */      if non-interactive. */
# Line 574  while (!done) Line 2915  while (!done)
2915          {          {
2916          for (;;)          for (;;)
2917            {            {
2918            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (extend_inputline(infile, buffer, NULL) == NULL)
2919              {              {
2920              done = 1;              done = 1;
2921              goto CONTINUE;              goto CONTINUE;
# Line 588  while (!done) Line 2929  while (!done)
2929        goto CONTINUE;        goto CONTINUE;
2930        }        }
2931    
2932      /* Compilation succeeded; print data if required. There are now two      /* Compilation succeeded. It is now possible to set the UTF-8 option from
2933      info-returning functions. The old one has a limited interface and      within the regex; check for this so that we know how to process the data
2934      returns only limited data. Check that it agrees with the newer one. */      lines. */
2935    
2936      if (do_showinfo)      if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
2937          goto SKIP_DATA;
2938        if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
2939    
2940        /* Extract the size for possible writing before possibly flipping it,
2941        and remember the store that was got. */
2942    
2943        true_size = ((real_pcre *)re)->size;
2944        regex_gotten_store = first_gotten_store;
2945    
2946        /* Output code size information if requested */
2947    
2948        if (log_store)
2949          fprintf(outfile, "Memory allocation (code space): %d\n",
2950            (int)(first_gotten_store -
2951                  sizeof(real_pcre) -
2952                  ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
2953    
2954        /* If -s or /S was present, study the regex to generate additional info to
2955        help with the matching, unless the pattern has the SS option, which
2956        suppresses the effect of /S (used for a few test patterns where studying is
2957        never sensible). */
2958    
2959        if (do_study || (force_study >= 0 && !no_force_study))
2960        {        {
2961        int old_first_char, old_options, old_count;        if (timeit > 0)
2962        int count, backrefmax, first_char, need_char;          {
2963        size_t size;          register int i;
2964            clock_t time_taken;
2965        if (do_debug) print_internals(re);          clock_t start_time = clock();
2966            for (i = 0; i < timeit; i++)
2967        new_info(re, NULL, PCRE_INFO_OPTIONS, &options);            {
2968        new_info(re, NULL, PCRE_INFO_SIZE, &size);            PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2969        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);            }
2970        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);          time_taken = clock() - start_time;
2971        new_info(re, NULL, PCRE_INFO_FIRSTCHAR, &first_char);          if (extra != NULL)
2972        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);            {
2973              PCRE_FREE_STUDY(extra);
2974        old_count = pcre_info(re, &old_options, &old_first_char);            }
2975        if (count < 0) fprintf(outfile,          fprintf(outfile, "  Study time %.4f milliseconds\n",
2976          "Error %d from pcre_info()\n", count);            (((double)time_taken * 1000.0) / (double)timeit) /
2977        else              (double)CLOCKS_PER_SEC);
2978            }
2979          PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2980          if (error != NULL)
2981            fprintf(outfile, "Failed to study: %s\n", error);
2982          else if (extra != NULL)
2983          {          {
2984          if (old_count != count) fprintf(outfile,          true_study_size = ((pcre_study_data *)(extra->study_data))->size;
2985            "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,          if (log_store)
2986              old_count);            {
2987              size_t jitsize;
2988              if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
2989                  jitsize != 0)
2990                fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
2991              }
2992            }
2993          }
2994    
2995          if (old_first_char != first_char) fprintf(outfile,      /* If /K was present, we set up for handling MARK data. */
           "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",  
             first_char, old_first_char);  
2996    
2997          if (old_options != options) fprintf(outfile,      if (do_mark)
2998            "Options disagreement: pcre_fullinfo=%d pcre_info=%d\n", options,        {
2999              old_options);        if (extra == NULL)
3000            {
3001            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3002            extra->flags = 0;
3003          }          }
3004          extra->mark = &markptr;
3005          extra->flags |= PCRE_EXTRA_MARK;
3006          }
3007    
3008        if (size != gotten_store) fprintf(outfile,      /* Extract and display information from the compiled data if required. */
3009    
3010        SHOW_INFO:
3011    
3012        if (do_debug)
3013          {
3014          fprintf(outfile, "------------------------------------------------------------------\n");
3015          PCRE_PRINTINT(re, outfile, debug_lengths);
3016          }
3017    
3018        /* We already have the options in get_options (see above) */
3019    
3020        if (do_showinfo)
3021          {
3022          unsigned long int all_options;
3023          int count, backrefmax, first_char, need_char, okpartial, jchanged,
3024            hascrorlf;
3025          int nameentrysize, namecount;
3026          const pcre_uint8 *nametable;
3027    
3028          if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3029              new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3030              new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3031              new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +
3032              new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +
3033              new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3034              new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3035              new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3036              new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3037              new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3038              new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf)
3039              != 0)
3040            goto SKIP_DATA;
3041    
3042          if (size != regex_gotten_store) fprintf(outfile,
3043          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
3044          size, gotten_store);          (int)size, (int)regex_gotten_store);
3045    
3046        fprintf(outfile, "Capturing subpattern count = %d\n", count);        fprintf(outfile, "Capturing subpattern count = %d\n", count);
3047        if (backrefmax > 0)        if (backrefmax > 0)
3048          fprintf(outfile, "Max back reference = %d\n", backrefmax);          fprintf(outfile, "Max back reference = %d\n", backrefmax);
       if (options == 0) fprintf(outfile, "No options\n");  
         else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s\n",  
           ((options & PCRE_ANCHORED) != 0)? " anchored" : "",  
           ((options & PCRE_CASELESS) != 0)? " caseless" : "",  
           ((options & PCRE_EXTENDED) != 0)? " extended" : "",  
           ((options & PCRE_MULTILINE) != 0)? " multiline" : "",  
           ((options & PCRE_DOTALL) != 0)? " dotall" : "",  
           ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",  
           ((options & PCRE_EXTRA) != 0)? " extra" : "",  
           ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");  
3049    
3050        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)        if (namecount > 0)
3051          fprintf(outfile, "Case state changes\n");          {
3052            fprintf(outfile, "Named capturing subpatterns:\n");
3053            while (namecount-- > 0)
3054              {
3055    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3056              int imm2_size = use_pcre16 ? 1 : 2;
3057    #else
3058              int imm2_size = IMM2_SIZE;
3059    #endif
3060              int length = (int)STRLEN(nametable + imm2_size);
3061              fprintf(outfile, "  ");
3062              PCHARSV(nametable, imm2_size, length, outfile);
3063              while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3064    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3065              fprintf(outfile, "%3d\n", use_pcre16?
3066                 (int)(((PCRE_SPTR16)nametable)[0])
3067                :((int)nametable[0] << 8) | (int)nametable[1]);
3068              nametable += nameentrysize * (use_pcre16 ? 2 : 1);
3069    #else
3070              fprintf(outfile, "%3d\n", GET2(nametable, 0));
3071    #ifdef SUPPORT_PCRE8
3072              nametable += nameentrysize;
3073    #else
3074              nametable += nameentrysize * 2;
3075    #endif
3076    #endif
3077              }
3078            }
3079    
3080          if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3081          if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3082    
3083          all_options = ((real_pcre *)re)->options;
3084          if (do_flip) all_options = swap_uint32(all_options);
3085    
3086          if (get_options == 0) fprintf(outfile, "No options\n");
3087            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
3088              ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
3089              ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
3090              ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
3091              ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
3092              ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
3093              ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
3094              ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
3095              ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
3096              ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
3097              ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3098              ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3099              ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3100              ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3101              ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3102              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3103              ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3104              ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3105    
3106          if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
3107    
3108          switch (get_options & PCRE_NEWLINE_BITS)
3109            {
3110            case PCRE_NEWLINE_CR:
3111            fprintf(outfile, "Forced newline sequence: CR\n");
3112            break;
3113    
3114            case PCRE_NEWLINE_LF:
3115            fprintf(outfile, "Forced newline sequence: LF\n");
3116            break;
3117    
3118            case PCRE_NEWLINE_CRLF:
3119            fprintf(outfile, "Forced newline sequence: CRLF\n");
3120            break;
3121    
3122            case PCRE_NEWLINE_ANYCRLF:
3123            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
3124            break;
3125    
3126            case PCRE_NEWLINE_ANY:
3127            fprintf(outfile, "Forced newline sequence: ANY\n");
3128            break;
3129    
3130            default:
3131            break;
3132            }
3133    
3134        if (first_char == -1)        if (first_char == -1)
3135          {          {
3136          fprintf(outfile, "First char at start or follows \\n\n");          fprintf(outfile, "First char at start or follows newline\n");
3137          }          }
3138        else if (first_char < 0)        else if (first_char < 0)
3139          {          {
# Line 656  while (!done) Line 3141  while (!done)
3141          }          }
3142        else        else
3143          {          {
3144          if (isprint(first_char))          const char *caseless =
3145            fprintf(outfile, "First char = \'%c\'\n", first_char);            ((((real_pcre *)re)->flags & PCRE_FCH_CASELESS) == 0)?
3146              "" : " (caseless)";
3147    
3148            if (PRINTOK(first_char))
3149              fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3150          else          else
3151            fprintf(outfile, "First char = %d\n", first_char);            {
3152              fprintf(outfile, "First char = ");
3153              pchar(first_char, outfile);
3154              fprintf(outfile, "%s\n", caseless);
3155              }
3156          }          }
3157    
3158        if (need_char < 0)        if (need_char < 0)
# Line 668  while (!done) Line 3161  while (!done)
3161          }          }
3162        else        else
3163          {          {
3164          if (isprint(need_char))          const char *caseless =
3165            fprintf(outfile, "Need char = \'%c\'\n", need_char);            ((((real_pcre *)re)->flags & PCRE_RCH_CASELESS) == 0)?
3166              "" : " (caseless)";
3167    
3168            if (PRINTOK(need_char))
3169              fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3170            else
3171              {
3172              fprintf(outfile, "Need char = ");
3173              pchar(need_char, outfile);
3174              fprintf(outfile, "%s\n", caseless);
3175              }
3176            }
3177    
3178          /* Don't output study size; at present it is in any case a fixed
3179          value, but it varies, depending on the computer architecture, and
3180          so messes up the test suite. (And with the /F option, it might be
3181          flipped.) If study was forced by an external -s, don't show this
3182          information unless -i or -d was also present. This means that, except
3183          when auto-callouts are involved, the output from runs with and without
3184          -s should be identical. */
3185    
3186          if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3187            {
3188            if (extra == NULL)
3189              fprintf(outfile, "Study returned NULL\n");
3190          else          else
3191            fprintf(outfile, "Need char = %d\n", need_char);            {
3192              pcre_uint8 *start_bits = NULL;
3193              int minlength;
3194    
3195              if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3196                fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3197    
3198              if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
3199                {
3200                if (start_bits == NULL)
3201                  fprintf(outfile, "No set of starting bytes\n");
3202                else
3203                  {
3204                  int i;
3205                  int c = 24;
3206                  fprintf(outfile, "Starting byte set: ");
3207                  for (i = 0; i < 256; i++)
3208                    {
3209                    if ((start_bits[i/8] & (1<<(i&7))) != 0)
3210                      {
3211                      if (c > 75)
3212                        {
3213                        fprintf(outfile, "\n  ");
3214                        c = 2;
3215                        }
3216                      if (PRINTOK(i) && i != ' ')
3217                        {
3218                        fprintf(outfile, "%c ", i);
3219                        c += 2;
3220                        }
3221                      else
3222                        {
3223                        fprintf(outfile, "\\x%02x ", i);
3224                        c += 5;
3225                        }
3226                      }
3227                    }
3228                  fprintf(outfile, "\n");
3229                  }
3230                }
3231              }
3232    
3233            /* Show this only if the JIT was set by /S, not by -s. */
3234    
3235            if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3236              {
3237              int jit;
3238              if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3239                {
3240                if (jit)
3241                  fprintf(outfile, "JIT study was successful\n");
3242                else
3243    #ifdef SUPPORT_JIT
3244                  fprintf(outfile, "JIT study was not successful\n");
3245    #else
3246                  fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3247    #endif
3248                }
3249              }
3250          }          }
3251        }        }
3252    
3253      /* If /S was present, study the regexp to generate additional info to      /* If the '>' option was present, we write out the regex to a file, and
3254      help with the matching. */      that is all. The first 8 bytes of the file are the regex length and then
3255        the study length, in big-endian order. */
3256    
3257      if (do_study)      if (to_file != NULL)
3258        {        {
3259        if (timeit)        FILE *f = fopen((char *)to_file, "wb");
3260          if (f == NULL)
3261          {          {
3262          register int i;          fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
         clock_t time_taken;  
         clock_t start_time = clock();  
         for (i = 0; i < LOOPREPEAT; i++)  
           extra = pcre_study(re, study_options, &error);  
         time_taken = clock() - start_time;  
         if (extra != NULL) free(extra);  
         fprintf(outfile, "  Study time %.3f milliseconds\n",  
           ((double)time_taken * 1000.0)/  
           ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));  
3263          }          }
3264          else
3265            {
3266            pcre_uint8 sbuf[8];
3267    
3268        extra = pcre_study(re, study_options, &error);          if (do_flip) regexflip(re, extra);
3269        if (error != NULL)          sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
3270          fprintf(outfile, "Failed to study: %s\n", error);          sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
3271        else if (extra == NULL)          sbuf[2] = (pcre_uint8)((true_size >>  8) & 255);
3272          fprintf(outfile, "Study returned NULL\n");          sbuf[3] = (pcre_uint8)((true_size) & 255);
3273            sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
3274            sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
3275            sbuf[6] = (pcre_uint8)((true_study_size >>  8) & 255);
3276            sbuf[7] = (pcre_uint8)((true_study_size) & 255);
3277    
3278        else if (do_showinfo)          if (fwrite(sbuf, 1, 8, f) < 8 ||
3279          {              fwrite(re, 1, true_size, f) < true_size)
3280          uschar *start_bits = NULL;            {
3281          new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);            fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
3282          if (start_bits == NULL)            }
           fprintf(outfile, "No starting character set\n");  
3283          else          else
3284            {            {
3285            int i;            fprintf(outfile, "Compiled pattern written to %s\n", to_file);
3286            int c = 24;  
3287            fprintf(outfile, "Starting character set: ");            /* If there is study data, write it. */
3288            for (i = 0; i < 256; i++)  
3289              if (extra != NULL)
3290              {              {
3291              if ((start_bits[i/8] & (1<<(i%8))) != 0)              if (fwrite(extra->study_data, 1, true_study_size, f) <
3292                    true_study_size)
3293                {                {
3294                if (c > 75)                fprintf(outfile, "Write error on %s: %s\n", to_file,
3295                  {                  strerror(errno));
                 fprintf(outfile, "\n  ");  
                 c = 2;  
                 }  
               if (isprint(i) && i != ' ')  
                 {  
                 fprintf(outfile, "%c ", i);  
                 c += 2;  
                 }  
               else  
                 {  
                 fprintf(outfile, "\\x%02x ", i);  
                 c += 5;  
                 }  
3296                }                }
3297                else fprintf(outfile, "Study data written to %s\n", to_file);
3298              }              }
           fprintf(outfile, "\n");  
3299            }            }
3300            fclose(f);
3301            }
3302    
3303          new_free(re);
3304          if (extra != NULL)
3305            {
3306            PCRE_FREE_STUDY(extra);
3307            }
3308          if (locale_set)
3309            {
3310            new_free((void *)tables);
3311            setlocale(LC_CTYPE, "C");
3312            locale_set = 0;
3313          }          }
3314          continue;  /* With next regex */
3315        }        }
3316      }      }        /* End of non-POSIX compile */
3317    
3318    /* Read data lines and test them */    /* Read data lines and test them */
3319    
3320    for (;;)    for (;;)
3321      {      {
3322      unsigned char *q;      pcre_uint8 *q;
3323      unsigned char *bptr = dbuffer;      pcre_uint8 *bptr;
3324        int *use_offsets = offsets;
3325        int use_size_offsets = size_offsets;
3326        int callout_data = 0;
3327        int callout_data_set = 0;
3328      int count, c;      int count, c;
3329      int copystrings = 0;      int copystrings = 0;
3330        int find_match_limit = default_find_match_limit;
3331      int getstrings = 0;      int getstrings = 0;
3332      int getlist = 0;      int getlist = 0;
3333      int gmatched = 0;      int gmatched = 0;
3334      int start_offset = 0;      int start_offset = 0;
3335        int start_offset_sign = 1;
3336      int g_notempty = 0;      int g_notempty = 0;
3337      int offsets[45];      int use_dfa = 0;
     int size_offsets = sizeof(offsets)/sizeof(int);  
3338    
3339        *copynames = 0;
3340        *getnames = 0;
3341    
3342        cn16ptr = copynames;
3343        gn16ptr = getnames;
3344        cn8ptr = copynames8;
3345        gn8ptr = getnames8;
3346    
3347        SET_PCRE_CALLOUT(callout);
3348        first_callout = 1;
3349        last_callout_mark = NULL;
3350        callout_extra = 0;
3351        callout_count = 0;
3352        callout_fail_count = 999999;
3353        callout_fail_id = -1;
3354        show_malloc = 0;
3355      options = 0;      options = 0;
3356    
3357      if (infile == stdin) printf("data> ");      if (extra != NULL) extra->flags &=
3358      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
3359    
3360        len = 0;
3361        for (;;)
3362        {        {
3363        done = 1;        if (extend_inputline(infile, buffer + len, "data> ") == NULL)
3364        goto CONTINUE;          {
3365            if (len > 0)    /* Reached EOF without hitting a newline */
3366              {
3367              fprintf(outfile, "\n");
3368              break;
3369              }
3370            done = 1;
3371            goto CONTINUE;
3372            }
3373          if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3374          len = (int)strlen((char *)buffer);
3375          if (buffer[len-1] == '\n') break;
3376        }        }
     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);  
3377    
     len = (int)strlen((char *)buffer);  
3378      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
3379      buffer[len] = 0;      buffer[len] = 0;
3380      if (len == 0) break;      if (len == 0) break;
# Line 772  while (!done) Line 3382  while (!done)
3382      p = buffer;      p = buffer;
3383      while (isspace(*p)) p++;      while (isspace(*p)) p++;
3384    
3385      q = dbuffer;      bptr = q = dbuffer;
3386      while ((c = *p++) != 0)      while ((c = *p++) != 0)
3387        {        {
3388        int i = 0;        int i = 0;
3389        int n = 0;        int n = 0;
3390        if (c == '\\') switch ((c = *p++))  
3391          /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
3392          In non-UTF mode, allow the value of the byte to fall through to later,
3393          where values greater than 127 are turned into UTF-8 when running in
3394          16-bit mode. */
3395    
3396          if (c != '\\')
3397            {
3398            if (use_utf)
3399              {
3400              *q++ = c;
3401              continue;
3402              }
3403            }
3404    
3405          /* Handle backslash escapes */
3406    
3407          else switch ((c = *p++))
3408          {          {
3409          case 'a': c =    7; break;          case 'a': c =    7; break;
3410          case 'b': c = '\b'; break;          case 'b': c = '\b'; break;
# Line 795  while (!done) Line 3422  while (!done)
3422            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
3423          break;          break;
3424    
3425          case 'x':          case 'x':
3426            if (*p == '{')
3427              {
3428              pcre_uint8 *pt = p;
3429              c = 0;
3430    
3431              /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
3432              when isxdigit() is a macro that refers to its argument more than
3433              once. This is banned by the C Standard, but apparently happens in at
3434              least one MacOS environment. */
3435    
3436              for (pt++; isxdigit(*pt); pt++)
3437                c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
3438              if (*pt == '}')
3439                {
3440                p = pt + 1;
3441                break;
3442                }
3443              /* Not correct form for \x{...}; fall through */
3444              }
3445    
3446            /* \x without {} always defines just one byte in 8-bit mode. This
3447            allows UTF-8 characters to be constructed byte by byte, and also allows
3448            invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
3449            Otherwise, pass it down to later code so that it can be turned into
3450            UTF-8 when running in 16-bit mode. */
3451    
3452          c = 0;          c = 0;
3453          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
3454            {            {
3455            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
3456            p++;            p++;
3457            }            }
3458            if (use_utf)
3459              {
3460              *q++ = c;
3461              continue;
3462              }
3463          break;          break;
3464    
3465          case 0:   /* Allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
3466          p--;          p--;
3467          continue;          continue;
3468    
3469            case '>':
3470            if (*p == '-')
3471              {
3472              start_offset_sign = -1;
3473              p++;
3474              }
3475            while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
3476            start_offset *= start_offset_sign;
3477            continue;
3478    
3479          case 'A':  /* Option setting */          case 'A':  /* Option setting */
3480          options |= PCRE_ANCHORED;          options |= PCRE_ANCHORED;
3481          continue;          continue;
# Line 817  while (!done) Line 3485  while (!done)
3485          continue;          continue;
3486    
3487          case 'C':          case 'C':
3488          while(isdigit(*p)) n = n * 10 + *p++ - '0';          if (isdigit(*p))    /* Set copy string */
3489          copystrings |= 1 << n;            {
3490              while(isdigit(*p)) n = n * 10 + *p++ - '0';
3491              copystrings |= 1 << n;
3492              }
3493            else if (isalnum(*p))
3494              {
3495              READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, re);
3496              }
3497            else if (*p == '+')
3498              {
3499              callout_extra = 1;
3500              p++;
3501              }
3502            else if (*p == '-')
3503              {
3504              SET_PCRE_CALLOUT(NULL);
3505              p++;
3506              }
3507            else if (*p == '!')
3508              {
3509              callout_fail_id = 0;
3510              p++;
3511              while(isdigit(*p))
3512                callout_fail_id = callout_fail_id * 10 + *p++ - '0';
3513              callout_fail_count = 0;
3514              if (*p == '!')
3515                {
3516                p++;
3517                while(isdigit(*p))
3518                  callout_fail_count = callout_fail_count * 10 + *p++ - '0';
3519                }
3520              }
3521            else if (*p == '*')
3522              {
3523              int sign = 1;
3524              callout_data = 0;
3525              if (*(++p) == '-') { sign = -1; p++; }
3526              while(isdigit(*p))
3527                callout_data = callout_data * 10 + *p++ - '0';
3528              callout_data *= sign;
3529              callout_data_set = 1;
3530              }
3531            continue;
3532    
3533    #if !defined NODFA
3534            case 'D':
3535    #if !defined NOPOSIX
3536            if (posix || do_posix)
3537              printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
3538            else
3539    #endif
3540              use_dfa = 1;
3541            continue;
3542    #endif
3543    
3544    #if !defined NODFA
3545            case 'F':
3546            options |= PCRE_DFA_SHORTEST;
3547          continue;          continue;
3548    #endif
3549    
3550          case 'G':          case 'G':
3551            if (isdigit(*p))
3552              {
3553              while(isdigit(*p)) n = n * 10 + *p++ - '0';
3554              getstrings |= 1 << n;
3555              }
3556            else if (isalnum(*p))
3557              {
3558              READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, re);
3559              }
3560            continue;
3561    
3562</