/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 57 by nigel, Sat Feb 24 21:39:50 2007 UTC revision 836 by ph10, Wed Dec 28 17:16:11 2011 UTC
# Line 2  Line 2 
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5    /* This program was hacked up as a tester for PCRE. I really should have
6    written it more tidily in the first place. Will I ever learn? It has grown and
7    been extended and consequently is now rather, er, *very* untidy in places. The
8    addition of 16-bit support has made it even worse. :-(
9    
10    -----------------------------------------------------------------------------
11    Redistribution and use in source and binary forms, with or without
12    modification, are permitted provided that the following conditions are met:
13    
14        * Redistributions of source code must retain the above copyright notice,
15          this list of conditions and the following disclaimer.
16    
17        * Redistributions in binary form must reproduce the above copyright
18          notice, this list of conditions and the following disclaimer in the
19          documentation and/or other materials provided with the distribution.
20    
21        * Neither the name of the University of Cambridge nor the names of its
22          contributors may be used to endorse or promote products derived from
23          this software without specific prior written permission.
24    
25    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35    POSSIBILITY OF SUCH DAMAGE.
36    -----------------------------------------------------------------------------
37    */
38    
39    /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40    libraries in a single program. This is different from the modules such as
41    pcre_compile.c in the library itself, which are compiled separately for each
42    mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43    (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44    compiled only once. Therefore, it must not make use of any of the macros from
45    pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46    however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47    only supported library functions. */
48    
49    
50    #ifdef HAVE_CONFIG_H
51    #include "config.h"
52    #endif
53    
54  #include <ctype.h>  #include <ctype.h>
55  #include <stdio.h>  #include <stdio.h>
56  #include <string.h>  #include <string.h>
57  #include <stdlib.h>  #include <stdlib.h>
58  #include <time.h>  #include <time.h>
59  #include <locale.h>  #include <locale.h>
60    #include <errno.h>
61    
62    #ifdef SUPPORT_LIBREADLINE
63    #ifdef HAVE_UNISTD_H
64    #include <unistd.h>
65    #endif
66    #include <readline/readline.h>
67    #include <readline/history.h>
68    #endif
69    
70    
71    /* A number of things vary for Windows builds. Originally, pcretest opened its
72    input and output without "b"; then I was told that "b" was needed in some
73    environments, so it was added for release 5.0 to both the input and output. (It
74    makes no difference on Unix-like systems.) Later I was told that it is wrong
75    for the input on Windows. I've now abstracted the modes into two macros that
76    are set here, to make it easier to fiddle with them, and removed "b" from the
77    input mode under Windows. */
78    
79    #if defined(_WIN32) || defined(WIN32)
80    #include <io.h>                /* For _setmode() */
81    #include <fcntl.h>             /* For _O_BINARY */
82    #define INPUT_MODE   "r"
83    #define OUTPUT_MODE  "wb"
84    
85    #ifndef isatty
86    #define isatty _isatty         /* This is what Windows calls them, I'm told, */
87    #endif                         /* though in some environments they seem to   */
88                                   /* be already defined, hence the #ifndefs.    */
89    #ifndef fileno
90    #define fileno _fileno
91    #endif
92    
93    /* A user sent this fix for Borland Builder 5 under Windows. */
94    
95    #ifdef __BORLANDC__
96    #define _setmode(handle, mode) setmode(handle, mode)
97    #endif
98    
99    /* Not Windows */
100    
101  /* Use the internal info for displaying the results of pcre_study(). */  #else
102    #include <sys/time.h>          /* These two includes are needed */
103    #include <sys/resource.h>      /* for setrlimit(). */
104    #define INPUT_MODE   "rb"
105    #define OUTPUT_MODE  "wb"
106    #endif
107    
108    
109    /* We have to include pcre_internal.h because we need the internal info for
110    displaying the results of pcre_study() and we also need to know about the
111    internal macros, structures, and other internal data values; pcretest has
112    "inside information" compared to a program that strictly follows the PCRE API.
113    
114    Although pcre_internal.h does itself include pcre.h, we explicitly include it
115    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
116    appropriately for an application, not for building PCRE. */
117    
118    #include "pcre.h"
119    
120    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8
121    /* Configure internal macros to 16 bit mode. */
122    #define COMPILE_PCRE16
123    #endif
124    
125    #include "pcre_internal.h"
126    
127    /* The pcre_printint() function, which prints the internal form of a compiled
128    regex, is held in a separate file so that (a) it can be compiled in either
129    8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
130    when that is compiled in debug mode. */
131    
132    #ifdef SUPPORT_PCRE8
133    void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
134    #endif
135    #ifdef SUPPORT_PCRE16
136    void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
137    #endif
138    
139    /* We need access to some of the data tables that PCRE uses. So as not to have
140    to keep two copies, we include the source file here, changing the names of the
141    external symbols to prevent clashes. */
142    
143    #define PCRE_INCLUDED
144    #undef PRIV
145    #define PRIV(name) name
146    
147    #include "pcre_tables.c"
148    
149    /* The definition of the macro PRINTABLE, which determines whether to print an
150    output character as-is or as a hex value when showing compiled patterns, is
151    the same as in the printint.src file. We uses it here in cases when the locale
152    has not been explicitly changed, so as to get consistent output from systems
153    that differ in their output from isprint() even in the "C" locale. */
154    
155    #ifdef EBCDIC
156    #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
157    #else
158    #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
159    #endif
160    
161  #include "internal.h"  #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
162    
163    /* Posix support is disabled in 16 bit only mode. */
164    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX
165    #define NOPOSIX
166    #endif
167    
168  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
169  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 21  Makefile. */ Line 173  Makefile. */
173  #include "pcreposix.h"  #include "pcreposix.h"
174  #endif  #endif
175    
176    /* It is also possible, originally for the benefit of a version that was
177    imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
178    NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
179    automatically cut out the UTF support if PCRE is built without it. */
180    
181    #ifndef SUPPORT_UTF
182    #ifndef NOUTF
183    #define NOUTF
184    #endif
185    #endif
186    
187    /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
188    for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
189    only from one place and is handled differently). I couldn't dream up any way of
190    using a single macro to do this in a generic way, because of the many different
191    argument requirements. We know that at least one of SUPPORT_PCRE8 and
192    SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
193    use these in the definitions of generic macros.
194    
195    **** Special note about the PCHARSxxx macros: the address of the string to be
196    printed is always given as two arguments: a base address followed by an offset.
197    The base address is cast to the correct data size for 8 or 16 bit data; the
198    offset is in units of this size. If the string were given as base+offset in one
199    argument, the casting might be incorrectly applied. */
200    
201    #ifdef SUPPORT_PCRE8
202    
203    #define PCHARS8(lv, p, offset, len, f) \
204      lv = pchars((pcre_uint8 *)(p) + offset, len, f)
205    
206    #define PCHARSV8(p, offset, len, f) \
207      (void)pchars((pcre_uint8 *)(p) + offset, len, f)
208    
209    #define READ_CAPTURE_NAME8(p, cn8, cn16, re) \
210      p = read_capture_name8(p, cn8, re)
211    
212    #define SET_PCRE_CALLOUT8(callout) \
213      pcre_callout = callout
214    
215    #define STRLEN8(p) ((int)strlen((char *)p))
216    
217    
218    #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
219      re = pcre_compile((char *)pat, options, error, erroffset, tables)
220    
221    #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
222        namesptr, cbuffer, size) \
223      rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
224        (char *)namesptr, cbuffer, size)
225    
226    #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
227      rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
228    
229    #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
230        offsets, size_offsets, workspace, size_workspace) \
231      count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
232        offsets, size_offsets, workspace, size_workspace)
233    
234    #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
235        offsets, size_offsets) \
236      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
237        offsets, size_offsets)
238    
239    #define PCRE_FREE_STUDY8(extra) \
240      pcre_free_study(extra)
241    
242    #define PCRE_FREE_SUBSTRING8(substring) \
243      pcre_free_substring(substring)
244    
245    #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
246      pcre_free_substring_list(listptr)
247    
248    #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
249        getnamesptr, subsptr) \
250      rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
251        (char *)getnamesptr, subsptr)
252    
253    #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
254      n = pcre_get_stringnumber(re, (char *)ptr)
255    
256    #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
257      rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
258    
259    #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
260      rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
261    
262    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(re, extra, tables) \
263      pcre_pattern_to_host_byte_order(re, extra, tables)
264    
265    #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
266      pcre_printint(re, outfile, debug_lengths)
267    
268    #define PCRE_STUDY8(extra, re, options, error) \
269      extra = pcre_study(re, options, error)
270    
271    #endif /* SUPPORT_PCRE8 */
272    
273    /* -----------------------------------------------------------*/
274    
275    #ifdef SUPPORT_PCRE16
276    
277    #define PCHARS16(lv, p, offset, len, f) \
278      lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
279    
280    #define PCHARSV16(p, offset, len, f) \
281      (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
282    
283    #define READ_CAPTURE_NAME16(p, cn8, cn16, re) \
284      p = read_capture_name16(p, cn16, re)
285    
286    #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
287    
288    #define SET_PCRE_CALLOUT16(callout) \
289      pcre16_callout = callout
290    
291    
292    #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
293      re = pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, tables)
294    
295    #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
296        namesptr, cbuffer, size) \
297      rc = pcre16_copy_named_substring(re, (PCRE_SPTR16)bptr, offsets, count, \
298        (PCRE_SPTR16)namesptr, (PCRE_SCHAR16 *)cbuffer, size/2)
299    
300    #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
301      rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
302        (PCRE_SCHAR16 *)cbuffer, size/2)
303    
304    #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
305        offsets, size_offsets, workspace, size_workspace) \
306      count = pcre16_dfa_exec(re, extra, (PCRE_SPTR16)bptr, len, start_offset, \
307        options, offsets, size_offsets, workspace, size_workspace)
308    
309    #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
310        offsets, size_offsets) \
311      count = pcre16_exec(re, extra, (PCRE_SPTR16)bptr, len, start_offset, \
312        options, offsets, size_offsets)
313    
314    #define PCRE_FREE_STUDY16(extra) \
315      pcre16_free_study(extra)
316    
317    #define PCRE_FREE_SUBSTRING16(substring) \
318      pcre16_free_substring((PCRE_SPTR16)substring)
319    
320    #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
321      pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
322    
323    #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
324        getnamesptr, subsptr) \
325      rc = pcre16_get_named_substring(re, (PCRE_SPTR16)bptr, offsets, count, \
326        (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
327    
328    #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
329      n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
330    
331    #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
332      rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
333        (PCRE_SPTR16 *)(void*)subsptr)
334    
335    #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
336      rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
337        (PCRE_SPTR16 **)(void*)listptr)
338    
339    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(re, extra, tables) \
340      pcre16_pattern_to_host_byte_order(re, extra, tables)
341    
342    #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
343      pcre16_printint(re, outfile, debug_lengths)
344    
345    #define PCRE_STUDY16(extra, re, options, error) \
346      extra = pcre16_study(re, options, error)
347    
348    #endif /* SUPPORT_PCRE16 */
349    
350    
351    /* ----- Both modes are supported; a runtime test is needed, except for
352    pcre_config(), and the JIT stack functions, when it doesn't matter which
353    version is called. ----- */
354    
355    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
356    
357    #define CHAR_SIZE (use_pcre16? 2:1)
358    
359    #define PCHARS(lv, p, offset, len, f) \
360      if (use_pcre16) \
361        PCHARS16(lv, p, offset, len, f); \
362      else \
363        PCHARS8(lv, p, offset, len, f)
364    
365    #define PCHARSV(p, offset, len, f) \
366      if (use_pcre16) \
367        PCHARSV16(p, offset, len, f); \
368      else \
369        PCHARSV8(p, offset, len, f)
370    
371    #define READ_CAPTURE_NAME(p, cn8, cn16, re) \
372      if (use_pcre16) \
373        READ_CAPTURE_NAME16(p, cn8, cn16, re); \
374      else \
375        READ_CAPTURE_NAME8(p, cn8, cn16, re)
376    
377    #define SET_PCRE_CALLOUT(callout) \
378      if (use_pcre16) \
379        SET_PCRE_CALLOUT16(callout); \
380      else \
381        SET_PCRE_CALLOUT8(callout)
382    
383    #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
384    
385    #define PCRE_ASSIGN_JIT_STACK pcre_assign_jit_stack
386    
387    #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
388      if (use_pcre16) \
389        PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
390      else \
391        PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
392    
393    #define PCRE_CONFIG pcre_config
394    
395    #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
396        namesptr, cbuffer, size) \
397      if (use_pcre16) \
398        PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
399          namesptr, cbuffer, size); \
400      else \
401        PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
402          namesptr, cbuffer, size)
403    
404    #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
405      if (use_pcre16) \
406        PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
407      else \
408        PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
409    
410    #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
411        offsets, size_offsets, workspace, size_workspace) \
412      if (use_pcre16) \
413        PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
414          offsets, size_offsets, workspace, size_workspace); \
415      else \
416        PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
417          offsets, size_offsets, workspace, size_workspace)
418    
419    #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
420        offsets, size_offsets) \
421      if (use_pcre16) \
422        PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
423          offsets, size_offsets); \
424      else \
425        PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
426          offsets, size_offsets)
427    
428    #define PCRE_FREE_STUDY(extra) \
429      if (use_pcre16) \
430        PCRE_FREE_STUDY16(extra); \
431      else \
432        PCRE_FREE_STUDY8(extra)
433    
434    #define PCRE_FREE_SUBSTRING(substring) \
435      if (use_pcre16) \
436        PCRE_FREE_SUBSTRING16(substring); \
437      else \
438        PCRE_FREE_SUBSTRING8(substring)
439    
440    #define PCRE_FREE_SUBSTRING_LIST(listptr) \
441      if (use_pcre16) \
442        PCRE_FREE_SUBSTRING_LIST16(listptr); \
443      else \
444        PCRE_FREE_SUBSTRING_LIST8(listptr)
445    
446    #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
447        getnamesptr, subsptr) \
448      if (use_pcre16) \
449        PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
450          getnamesptr, subsptr); \
451      else \
452        PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
453          getnamesptr, subsptr)
454    
455    #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
456      if (use_pcre16) \
457        PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
458      else \
459        PCRE_GET_STRINGNUMBER8(n, rc, ptr)
460    
461    #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
462      if (use_pcre16) \
463        PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
464      else \
465        PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
466    
467    #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
468      if (use_pcre16) \
469        PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
470      else \
471        PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
472    
473    #define PCRE_JIT_STACK_ALLOC pcre_jit_stack_alloc
474    #define PCRE_JIT_STACK_FREE pcre_jit_stack_free
475    
476    #define PCRE_MAKETABLES \
477      (use_pcre16? pcre16_maketables() : pcre_maketables())
478    
479    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(re, extra, tables) \
480      if (use_pcre16) \
481        PCRE_PATTERN_TO_HOST_BYTE_ORDER16(re, extra, tables); \
482      else \
483        PCRE_PATTERN_TO_HOST_BYTE_ORDER8(re, extra, tables)
484    
485    #define PCRE_PRINTINT(re, outfile, debug_lengths) \
486      if (use_pcre16) \
487        PCRE_PRINTINT16(re, outfile, debug_lengths); \
488      else \
489        PCRE_PRINTINT8(re, outfile, debug_lengths)
490    
491    #define PCRE_STUDY(extra, re, options, error) \
492      if (use_pcre16) \
493        PCRE_STUDY16(extra, re, options, error); \
494      else \
495        PCRE_STUDY8(extra, re, options, error)
496    
497    /* ----- Only 8-bit mode is supported ----- */
498    
499    #elif defined SUPPORT_PCRE8
500    #define CHAR_SIZE                 1
501    #define PCHARS                    PCHARS8
502    #define PCHARSV                   PCHARSV8
503    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME8
504    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT8
505    #define STRLEN                    STRLEN8
506    #define PCRE_ASSIGN_JIT_STACK     pcre_assign_jit_stack
507    #define PCRE_COMPILE              PCRE_COMPILE8
508    #define PCRE_CONFIG               pcre_config
509    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
510    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING8
511    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC8
512    #define PCRE_EXEC                 PCRE_EXEC8
513    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY8
514    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING8
515    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST8
516    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING8
517    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER8
518    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING8
519    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST8
520    #define PCRE_JIT_STACK_ALLOC      pcre_jit_stack_alloc
521    #define PCRE_JIT_STACK_FREE       pcre_jit_stack_free
522    #define PCRE_MAKETABLES           pcre_maketables()
523    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
524    #define PCRE_PRINTINT             PCRE_PRINTINT8
525    #define PCRE_STUDY                PCRE_STUDY8
526    
527    /* ----- Only 16-bit mode is supported ----- */
528    
529    #else
530    #define CHAR_SIZE                 2
531    #define PCHARS                    PCHARS16
532    #define PCHARSV                   PCHARSV16
533    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME16
534    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT16
535    #define STRLEN                    STRLEN16
536    #define PCRE_ASSIGN_JIT_STACK     pcre16_assign_jit_stack
537    #define PCRE_COMPILE              PCRE_COMPILE16
538    #define PCRE_CONFIG               pcre16_config
539    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
540    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING16
541    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC16
542    #define PCRE_EXEC                 PCRE_EXEC16
543    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY16
544    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING16
545    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST16
546    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING16
547    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER16
548    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING16
549    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST16
550    #define PCRE_JIT_STACK_ALLOC      pcre16_jit_stack_alloc
551    #define PCRE_JIT_STACK_FREE       pcre16_jit_stack_free
552    #define PCRE_MAKETABLES           pcre16_maketables()
553    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
554    #define PCRE_PRINTINT             PCRE_PRINTINT16
555    #define PCRE_STUDY                PCRE_STUDY16
556    #endif
557    
558    /* ----- End of mode-specific function call macros ----- */
559    
560    
561    /* Other parameters */
562    
563  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
564  #ifdef CLK_TCK  #ifdef CLK_TCK
565  #define CLOCKS_PER_SEC CLK_TCK  #define CLOCKS_PER_SEC CLK_TCK
# Line 29  Makefile. */ Line 568  Makefile. */
568  #endif  #endif
569  #endif  #endif
570    
571  #define LOOPREPEAT 20000  /* This is the default loop count for timing. */
572    
573    #define LOOPREPEAT 500000
574    
575    /* Static variables */
576    
577  static FILE *outfile;  static FILE *outfile;
578  static int log_store = 0;  static int log_store = 0;
579    static int callout_count;
580    static int callout_extra;
581    static int callout_fail_count;
582    static int callout_fail_id;
583    static int debug_lengths;
584    static int first_callout;
585    static int locale_set = 0;
586    static int show_malloc;
587    static int use_utf;
588  static size_t gotten_store;  static size_t gotten_store;
589    static size_t first_gotten_store = 0;
590    static const unsigned char *last_callout_mark = NULL;
591    
592    /* The buffers grow automatically if very long input lines are encountered. */
593    
594    static int buffer_size = 50000;
595    static pcre_uint8 *buffer = NULL;
596    static pcre_uint8 *dbuffer = NULL;
597    static pcre_uint8 *pbuffer = NULL;
598    
599    /* Another buffer is needed translation to 16-bit character strings. It will
600    obtained and extended as required. */
601    
602    #ifdef SUPPORT_PCRE16
603    static int buffer16_size = 0;
604    static pcre_uint16 *buffer16 = NULL;
605    
606    #ifdef SUPPORT_PCRE8
607    
608    /* We need the table of operator lengths that is used for 16-bit compiling, in
609    order to swap bytes in a pattern for saving/reloading testing. Luckily, the
610    data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
611    appropriately for the 16-bit world. Just as a safety check, make sure that
612    COMPILE_PCRE16 is *not* set. */
613    
614  static int utf8_table1[] = {  #ifdef COMPILE_PCRE16
615    0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};  #error COMPILE_PCRE16 must not be set when compiling pcretest.c
616    #endif
617    
618    #if LINK_SIZE == 2
619    #undef LINK_SIZE
620    #define LINK_SIZE 1
621    #elif LINK_SIZE == 3 || LINK_SIZE == 4
622    #undef LINK_SIZE
623    #define LINK_SIZE 2
624    #else
625    #error LINK_SIZE must be either 2, 3, or 4
626    #endif
627    
628    #endif /* SUPPORT_PCRE8 */
629    
630    static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
631    #endif  /* SUPPORT_PCRE16 */
632    
633    /* If we have 8-bit support, default use_pcre16 to false; if there is also
634    16-bit support, it can be changed by an option. If there is no 8-bit support,
635    there must be 16-bit support, so default it to 1. */
636    
637    #ifdef SUPPORT_PCRE8
638    static int use_pcre16 = 0;
639    #else
640    static int use_pcre16 = 1;
641    #endif
642    
643  static int utf8_table2[] = {  /* Textual explanations for runtime error codes */
   0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};  
644    
645  static int utf8_table3[] = {  static const char *errtexts[] = {
646    0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};    NULL,  /* 0 is no error */
647      NULL,  /* NOMATCH is handled specially */
648      "NULL argument passed",
649      "bad option value",
650      "magic number missing",
651      "unknown opcode - pattern overwritten?",
652      "no more memory",
653      NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */
654      "match limit exceeded",
655      "callout error code",
656      NULL,  /* BADUTF8/16 is handled specially */
657      NULL,  /* BADUTF8/16 offset is handled specially */
658      NULL,  /* PARTIAL is handled specially */
659      "not used - internal error",
660      "internal error - pattern overwritten?",
661      "bad count value",
662      "item unsupported for DFA matching",
663      "backreference condition or recursion test not supported for DFA matching",
664      "match limit not supported for DFA matching",
665      "workspace size exceeded in DFA matching",
666      "too much recursion for DFA matching",
667      "recursion limit exceeded",
668      "not used - internal error",
669      "invalid combination of newline options",
670      "bad offset value",
671      NULL,  /* SHORTUTF8/16 is handled specially */
672      "nested recursion at the same subject position",
673      "JIT stack limit reached",
674      "pattern compiled in wrong mode: 8-bit/16-bit error"
675    };
676    
677    
678  /*************************************************  /*************************************************
679  *       Convert character value to UTF-8         *  *         Alternate character tables             *
680  *************************************************/  *************************************************/
681    
682  /* This function takes an integer value in the range 0 - 0x7fffffff  /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
683  and encodes it as a UTF-8 character in 0 to 6 bytes.  using the default tables of the library. However, the T option can be used to
684    select alternate sets of tables, for different kinds of testing. Note also that
685    the L (locale) option also adjusts the tables. */
686    
687    /* This is the set of tables distributed as default with PCRE. It recognizes
688    only ASCII characters. */
689    
690    static const pcre_uint8 tables0[] = {
691    
692    /* This table is a lower casing table. */
693    
694        0,  1,  2,  3,  4,  5,  6,  7,
695        8,  9, 10, 11, 12, 13, 14, 15,
696       16, 17, 18, 19, 20, 21, 22, 23,
697       24, 25, 26, 27, 28, 29, 30, 31,
698       32, 33, 34, 35, 36, 37, 38, 39,
699       40, 41, 42, 43, 44, 45, 46, 47,
700       48, 49, 50, 51, 52, 53, 54, 55,
701       56, 57, 58, 59, 60, 61, 62, 63,
702       64, 97, 98, 99,100,101,102,103,
703      104,105,106,107,108,109,110,111,
704      112,113,114,115,116,117,118,119,
705      120,121,122, 91, 92, 93, 94, 95,
706       96, 97, 98, 99,100,101,102,103,
707      104,105,106,107,108,109,110,111,
708      112,113,114,115,116,117,118,119,
709      120,121,122,123,124,125,126,127,
710      128,129,130,131,132,133,134,135,
711      136,137,138,139,140,141,142,143,
712      144,145,146,147,148,149,150,151,
713      152,153,154,155,156,157,158,159,
714      160,161,162,163,164,165,166,167,
715      168,169,170,171,172,173,174,175,
716      176,177,178,179,180,181,182,183,
717      184,185,186,187,188,189,190,191,
718      192,193,194,195,196,197,198,199,
719      200,201,202,203,204,205,206,207,
720      208,209,210,211,212,213,214,215,
721      216,217,218,219,220,221,222,223,
722      224,225,226,227,228,229,230,231,
723      232,233,234,235,236,237,238,239,
724      240,241,242,243,244,245,246,247,
725      248,249,250,251,252,253,254,255,
726    
727    /* This table is a case flipping table. */
728    
729        0,  1,  2,  3,  4,  5,  6,  7,
730        8,  9, 10, 11, 12, 13, 14, 15,
731       16, 17, 18, 19, 20, 21, 22, 23,
732       24, 25, 26, 27, 28, 29, 30, 31,
733       32, 33, 34, 35, 36, 37, 38, 39,
734       40, 41, 42, 43, 44, 45, 46, 47,
735       48, 49, 50, 51, 52, 53, 54, 55,
736       56, 57, 58, 59, 60, 61, 62, 63,
737       64, 97, 98, 99,100,101,102,103,
738      104,105,106,107,108,109,110,111,
739      112,113,114,115,116,117,118,119,
740      120,121,122, 91, 92, 93, 94, 95,
741       96, 65, 66, 67, 68, 69, 70, 71,
742       72, 73, 74, 75, 76, 77, 78, 79,
743       80, 81, 82, 83, 84, 85, 86, 87,
744       88, 89, 90,123,124,125,126,127,
745      128,129,130,131,132,133,134,135,
746      136,137,138,139,140,141,142,143,
747      144,145,146,147,148,149,150,151,
748      152,153,154,155,156,157,158,159,
749      160,161,162,163,164,165,166,167,
750      168,169,170,171,172,173,174,175,
751      176,177,178,179,180,181,182,183,
752      184,185,186,187,188,189,190,191,
753      192,193,194,195,196,197,198,199,
754      200,201,202,203,204,205,206,207,
755      208,209,210,211,212,213,214,215,
756      216,217,218,219,220,221,222,223,
757      224,225,226,227,228,229,230,231,
758      232,233,234,235,236,237,238,239,
759      240,241,242,243,244,245,246,247,
760      248,249,250,251,252,253,254,255,
761    
762    /* This table contains bit maps for various character classes. Each map is 32
763    bytes long and the bits run from the least significant end of each byte. The
764    classes that have their own maps are: space, xdigit, digit, upper, lower, word,
765    graph, print, punct, and cntrl. Other classes are built from combinations. */
766    
767      0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
768      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
769      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
770      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
771    
772      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
773      0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
774      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
775      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
776    
777      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
778      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
779      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
780      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
781    
782      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
783      0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
784      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
785      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
786    
787      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
788      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
789      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
790      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
791    
792      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
793      0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
794      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
795      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
796    
797      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
798      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
799      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
800      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
801    
802      0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
803      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
804      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
805      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
806    
807      0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
808      0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
809      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
810      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
811    
812      0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
813      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
814      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
815      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
816    
817    /* This table identifies various classes of character by individual bits:
818      0x01   white space character
819      0x02   letter
820      0x04   decimal digit
821      0x08   hexadecimal digit
822      0x10   alphanumeric or '_'
823      0x80   regular expression metacharacter or binary zero
824    */
825    
826  Arguments:    0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
827    cvalue     the character value    0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
828    buffer     pointer to buffer for result - at least 6 bytes long    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
829      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
830      0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
831      0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
832      0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
833      0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
834      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
835      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
836      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
837      0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
838      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
839      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
840      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
841      0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
842      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
843      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
844      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
845      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
846      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
847      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
848      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
849      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
850      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
851      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
852      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
853      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
854      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
855      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
856      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
857      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
858    
859    /* This is a set of tables that came orginally from a Windows user. It seems to
860    be at least an approximation of ISO 8859. In particular, there are characters
861    greater than 128 that are marked as spaces, letters, etc. */
862    
863    static const pcre_uint8 tables1[] = {
864    0,1,2,3,4,5,6,7,
865    8,9,10,11,12,13,14,15,
866    16,17,18,19,20,21,22,23,
867    24,25,26,27,28,29,30,31,
868    32,33,34,35,36,37,38,39,
869    40,41,42,43,44,45,46,47,
870    48,49,50,51,52,53,54,55,
871    56,57,58,59,60,61,62,63,
872    64,97,98,99,100,101,102,103,
873    104,105,106,107,108,109,110,111,
874    112,113,114,115,116,117,118,119,
875    120,121,122,91,92,93,94,95,
876    96,97,98,99,100,101,102,103,
877    104,105,106,107,108,109,110,111,
878    112,113,114,115,116,117,118,119,
879    120,121,122,123,124,125,126,127,
880    128,129,130,131,132,133,134,135,
881    136,137,138,139,140,141,142,143,
882    144,145,146,147,148,149,150,151,
883    152,153,154,155,156,157,158,159,
884    160,161,162,163,164,165,166,167,
885    168,169,170,171,172,173,174,175,
886    176,177,178,179,180,181,182,183,
887    184,185,186,187,188,189,190,191,
888    224,225,226,227,228,229,230,231,
889    232,233,234,235,236,237,238,239,
890    240,241,242,243,244,245,246,215,
891    248,249,250,251,252,253,254,223,
892    224,225,226,227,228,229,230,231,
893    232,233,234,235,236,237,238,239,
894    240,241,242,243,244,245,246,247,
895    248,249,250,251,252,253,254,255,
896    0,1,2,3,4,5,6,7,
897    8,9,10,11,12,13,14,15,
898    16,17,18,19,20,21,22,23,
899    24,25,26,27,28,29,30,31,
900    32,33,34,35,36,37,38,39,
901    40,41,42,43,44,45,46,47,
902    48,49,50,51,52,53,54,55,
903    56,57,58,59,60,61,62,63,
904    64,97,98,99,100,101,102,103,
905    104,105,106,107,108,109,110,111,
906    112,113,114,115,116,117,118,119,
907    120,121,122,91,92,93,94,95,
908    96,65,66,67,68,69,70,71,
909    72,73,74,75,76,77,78,79,
910    80,81,82,83,84,85,86,87,
911    88,89,90,123,124,125,126,127,
912    128,129,130,131,132,133,134,135,
913    136,137,138,139,140,141,142,143,
914    144,145,146,147,148,149,150,151,
915    152,153,154,155,156,157,158,159,
916    160,161,162,163,164,165,166,167,
917    168,169,170,171,172,173,174,175,
918    176,177,178,179,180,181,182,183,
919    184,185,186,187,188,189,190,191,
920    224,225,226,227,228,229,230,231,
921    232,233,234,235,236,237,238,239,
922    240,241,242,243,244,245,246,215,
923    248,249,250,251,252,253,254,223,
924    192,193,194,195,196,197,198,199,
925    200,201,202,203,204,205,206,207,
926    208,209,210,211,212,213,214,247,
927    216,217,218,219,220,221,222,255,
928    0,62,0,0,1,0,0,0,
929    0,0,0,0,0,0,0,0,
930    32,0,0,0,1,0,0,0,
931    0,0,0,0,0,0,0,0,
932    0,0,0,0,0,0,255,3,
933    126,0,0,0,126,0,0,0,
934    0,0,0,0,0,0,0,0,
935    0,0,0,0,0,0,0,0,
936    0,0,0,0,0,0,255,3,
937    0,0,0,0,0,0,0,0,
938    0,0,0,0,0,0,12,2,
939    0,0,0,0,0,0,0,0,
940    0,0,0,0,0,0,0,0,
941    254,255,255,7,0,0,0,0,
942    0,0,0,0,0,0,0,0,
943    255,255,127,127,0,0,0,0,
944    0,0,0,0,0,0,0,0,
945    0,0,0,0,254,255,255,7,
946    0,0,0,0,0,4,32,4,
947    0,0,0,128,255,255,127,255,
948    0,0,0,0,0,0,255,3,
949    254,255,255,135,254,255,255,7,
950    0,0,0,0,0,4,44,6,
951    255,255,127,255,255,255,127,255,
952    0,0,0,0,254,255,255,255,
953    255,255,255,255,255,255,255,127,
954    0,0,0,0,254,255,255,255,
955    255,255,255,255,255,255,255,255,
956    0,2,0,0,255,255,255,255,
957    255,255,255,255,255,255,255,127,
958    0,0,0,0,255,255,255,255,
959    255,255,255,255,255,255,255,255,
960    0,0,0,0,254,255,0,252,
961    1,0,0,248,1,0,0,120,
962    0,0,0,0,254,255,255,255,
963    0,0,128,0,0,0,128,0,
964    255,255,255,255,0,0,0,0,
965    0,0,0,0,0,0,0,128,
966    255,255,255,255,0,0,0,0,
967    0,0,0,0,0,0,0,0,
968    128,0,0,0,0,0,0,0,
969    0,1,1,0,1,1,0,0,
970    0,0,0,0,0,0,0,0,
971    0,0,0,0,0,0,0,0,
972    1,0,0,0,128,0,0,0,
973    128,128,128,128,0,0,128,0,
974    28,28,28,28,28,28,28,28,
975    28,28,0,0,0,0,0,128,
976    0,26,26,26,26,26,26,18,
977    18,18,18,18,18,18,18,18,
978    18,18,18,18,18,18,18,18,
979    18,18,18,128,128,0,128,16,
980    0,26,26,26,26,26,26,18,
981    18,18,18,18,18,18,18,18,
982    18,18,18,18,18,18,18,18,
983    18,18,18,128,128,0,0,0,
984    0,0,0,0,0,1,0,0,
985    0,0,0,0,0,0,0,0,
986    0,0,0,0,0,0,0,0,
987    0,0,0,0,0,0,0,0,
988    1,0,0,0,0,0,0,0,
989    0,0,18,0,0,0,0,0,
990    0,0,20,20,0,18,0,0,
991    0,20,18,0,0,0,0,0,
992    18,18,18,18,18,18,18,18,
993    18,18,18,18,18,18,18,18,
994    18,18,18,18,18,18,18,0,
995    18,18,18,18,18,18,18,18,
996    18,18,18,18,18,18,18,18,
997    18,18,18,18,18,18,18,18,
998    18,18,18,18,18,18,18,0,
999    18,18,18,18,18,18,18,18
1000    };
1001    
 Returns:     number of characters placed in the buffer  
              -1 if input character is negative  
              0 if input character is positive but too big (only when  
              int is longer than 32 bits)  
 */  
1002    
1003  static int  
1004  ord2utf8(int cvalue, unsigned char *buffer)  
1005    #ifndef HAVE_STRERROR
1006    /*************************************************
1007    *     Provide strerror() for non-ANSI libraries  *
1008    *************************************************/
1009    
1010    /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1011    in their libraries, but can provide the same facility by this simple
1012    alternative function. */
1013    
1014    extern int   sys_nerr;
1015    extern char *sys_errlist[];
1016    
1017    char *
1018    strerror(int n)
1019  {  {
1020  register int i, j;  if (n < 0 || n >= sys_nerr) return "unknown error number";
1021  for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)  return sys_errlist[n];
1022    if (cvalue <= utf8_table1[i]) break;  }
1023  if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;  #endif /* HAVE_STRERROR */
1024  if (cvalue < 0) return -1;  
1025  *buffer++ = utf8_table2[i] | (cvalue & utf8_table3[i]);  
1026  cvalue >>= 6 - i;  /*************************************************
1027  for (j = 0; j < i; j++)  *         JIT memory callback                    *
1028    {  *************************************************/
1029    *buffer++ = 0x80 | (cvalue & 0x3f);  
1030    cvalue >>= 6;  static pcre_jit_stack* jit_callback(void *arg)
1031    }  {
1032  return i + 1;  return (pcre_jit_stack *)arg;
1033  }  }
1034    
1035    
1036    #if !defined NOUTF || defined SUPPORT_PCRE16
1037  /*************************************************  /*************************************************
1038  *            Convert UTF-8 string to value       *  *            Convert UTF-8 string to value       *
1039  *************************************************/  *************************************************/
# Line 92  return i + 1; Line 1042  return i + 1;
1042  and returns the value of the character.  and returns the value of the character.
1043    
1044  Argument:  Argument:
1045    buffer   a pointer to the byte vector    utf8bytes   a pointer to the byte vector
1046    vptr     a pointer to an int to receive the value    vptr        a pointer to an int to receive the value
1047    
1048  Returns:   >  0 => the number of bytes consumed  Returns:      >  0 => the number of bytes consumed
1049             -6 to 0 => malformed UTF-8 character at offset = (-return)                -6 to 0 => malformed UTF-8 character at offset = (-return)
1050  */  */
1051    
1052  int  static int
1053  utf82ord(unsigned char *buffer, int *vptr)  utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1054  {  {
1055  int c = *buffer++;  int c = *utf8bytes++;
1056  int d = c;  int d = c;
1057  int i, j, s;  int i, j, s;
1058    
# Line 117  if (i == 0 || i == 6) return 0;        / Line 1067  if (i == 0 || i == 6) return 0;        /
1067    
1068  /* i now has a value in the range 1-5 */  /* i now has a value in the range 1-5 */
1069    
1070  d = c & utf8_table3[i];  s = 6*i;
1071  s = 6 - i;  d = (c & utf8_table3[i]) << s;
1072    
1073  for (j = 0; j < i; j++)  for (j = 0; j < i; j++)
1074    {    {
1075    c = *buffer++;    c = *utf8bytes++;
1076    if ((c & 0xc0) != 0x80) return -(j+1);    if ((c & 0xc0) != 0x80) return -(j+1);
1077      s -= 6;
1078    d |= (c & 0x3f) << s;    d |= (c & 0x3f) << s;
   s += 6;  
1079    }    }
1080    
1081  /* Check that encoding was the correct unique one */  /* Check that encoding was the correct unique one */
1082    
1083  for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)  for (j = 0; j < utf8_table1_size; j++)
1084    if (d <= utf8_table1[j]) break;    if (d <= utf8_table1[j]) break;
1085  if (j != i) return -(i+1);  if (j != i) return -(i+1);
1086    
# Line 139  if (j != i) return -(i+1); Line 1089  if (j != i) return -(i+1);
1089  *vptr = d;  *vptr = d;
1090  return i+1;  return i+1;
1091  }  }
1092    #endif /* NOUTF || SUPPORT_PCRE16 */
1093    
1094    
1095    
1096    #if !defined NOUTF || defined SUPPORT_PCRE16
1097    /*************************************************
1098    *       Convert character value to UTF-8         *
1099    *************************************************/
1100    
1101    /* This function takes an integer value in the range 0 - 0x7fffffff
1102    and encodes it as a UTF-8 character in 0 to 6 bytes.
1103    
1104    Arguments:
1105      cvalue     the character value
1106      utf8bytes  pointer to buffer for result - at least 6 bytes long
1107    
1108    Returns:     number of characters placed in the buffer
1109    */
1110    
1111    static int
1112    ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1113    {
1114    register int i, j;
1115    for (i = 0; i < utf8_table1_size; i++)
1116      if (cvalue <= utf8_table1[i]) break;
1117    utf8bytes += i;
1118    for (j = i; j > 0; j--)
1119     {
1120     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1121     cvalue >>= 6;
1122     }
1123    *utf8bytes = utf8_table2[i] | cvalue;
1124    return i + 1;
1125    }
1126    #endif /* NOUTF || SUPPORT_PCRE16 */
1127    
1128    
1129    
1130  /* Debugging function to print the internal form of the regex. This is the same  #ifdef SUPPORT_PCRE16
1131  code as contained in pcre.c under the DEBUG macro. */  /*************************************************
1132    *         Convert a string to 16-bit             *
1133    *************************************************/
1134    
1135  static const char *OP_names[] = {  /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1136    "End", "\\A", "\\B", "\\b", "\\D", "\\d",  8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1137    "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",  double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1138    "Opt", "^", "$", "Any", "chars", "not",  in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1139    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  result is always left in buffer16.
1140    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
1141    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  Note that this function does not object to surrogate values. This is
1142    "*", "*?", "+", "+?", "?", "??", "{", "{",  deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1143    "class", "Ref", "Recurse",  for the purpose of testing that they are correctly faulted.
   "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",  
   "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",  
   "Brazero", "Braminzero", "Branumber", "Bra"  
 };  
1144    
1145    Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1146    in UTF-8 so that values greater than 255 can be handled.
1147    
1148  static void print_internals(pcre *re)  Arguments:
1149      data       TRUE if converting a data line; FALSE for a regex
1150      p          points to a byte string
1151      utf        true if UTF-8 (to be converted to UTF-16)
1152      len        number of bytes in the string (excluding trailing zero)
1153    
1154    Returns:     number of 16-bit data items used (excluding trailing zero)
1155                 OR -1 if a UTF-8 string is malformed
1156                 OR -2 if a value > 0x10ffff is encountered
1157                 OR -3 if a value > 0xffff is encountered when not in UTF mode
1158    */
1159    
1160    static int
1161    to16(int data, pcre_uint8 *p, int utf, int len)
1162  {  {
1163  unsigned char *code = ((real_pcre *)re)->code;  pcre_uint16 *pp;
1164    
1165    if (buffer16_size < 2*len + 2)
1166      {
1167      if (buffer16 != NULL) free(buffer16);
1168      buffer16_size = 2*len + 2;
1169      buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1170      if (buffer16 == NULL)
1171        {
1172        fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1173        exit(1);
1174        }
1175      }
1176    
1177    pp = buffer16;
1178    
1179  fprintf(outfile, "------------------------------------------------------------------\n");  if (!utf && !data)
1180      {
1181      while (len-- > 0) *pp++ = *p++;
1182      }
1183    
1184  for(;;)  else
1185    {    {
1186    int c;    int c = 0;
1187    int charlength;    while (len > 0)
1188        {
1189        int chlen = utf82ord(p, &c);
1190        if (chlen <= 0) return -1;
1191        if (c > 0x10ffff) return -2;
1192        p += chlen;
1193        len -= chlen;
1194        if (c < 0x10000) *pp++ = c; else
1195          {
1196          if (!utf) return -3;
1197          c -= 0x10000;
1198          *pp++ = 0xD800 | (c >> 10);
1199          *pp++ = 0xDC00 | (c & 0x3ff);
1200          }
1201        }
1202      }
1203    
1204    *pp = 0;
1205    return pp - buffer16;
1206    }
1207    #endif
1208    
1209    
1210    /*************************************************
1211    *        Read or extend an input line            *
1212    *************************************************/
1213    
1214    fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));  /* Input lines are read into buffer, but both patterns and data lines can be
1215    continued over multiple input lines. In addition, if the buffer fills up, we
1216    want to automatically expand it so as to be able to handle extremely large
1217    lines that are needed for certain stress tests. When the input buffer is
1218    expanded, the other two buffers must also be expanded likewise, and the
1219    contents of pbuffer, which are a copy of the input for callouts, must be
1220    preserved (for when expansion happens for a data line). This is not the most
1221    optimal way of handling this, but hey, this is just a test program!
1222    
1223    if (*code >= OP_BRA)  Arguments:
1224      f            the file to read
1225      start        where in buffer to start (this *must* be within buffer)
1226      prompt       for stdin or readline()
1227    
1228    Returns:       pointer to the start of new data
1229                   could be a copy of start, or could be moved
1230                   NULL if no data read and EOF reached
1231    */
1232    
1233    static pcre_uint8 *
1234    extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1235    {
1236    pcre_uint8 *here = start;
1237    
1238    for (;;)
1239      {
1240      int rlen = (int)(buffer_size - (here - buffer));
1241    
1242      if (rlen > 1000)
1243      {      {
1244      if (*code - OP_BRA > EXTRACT_BASIC_MAX)      int dlen;
1245        fprintf(outfile, "%3d Bra extra", (code[1] << 8) + code[2]);  
1246        /* If libreadline support is required, use readline() to read a line if the
1247        input is a terminal. Note that readline() removes the trailing newline, so
1248        we must put it back again, to be compatible with fgets(). */
1249    
1250    #ifdef SUPPORT_LIBREADLINE
1251        if (isatty(fileno(f)))
1252          {
1253          size_t len;
1254          char *s = readline(prompt);
1255          if (s == NULL) return (here == start)? NULL : start;
1256          len = strlen(s);
1257          if (len > 0) add_history(s);
1258          if (len > rlen - 1) len = rlen - 1;
1259          memcpy(here, s, len);
1260          here[len] = '\n';
1261          here[len+1] = 0;
1262          free(s);
1263          }
1264      else      else
1265        fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);  #endif
1266      code += 2;  
1267        /* Read the next line by normal means, prompting if the file is stdin. */
1268    
1269          {
1270          if (f == stdin) printf("%s", prompt);
1271          if (fgets((char *)here, rlen,  f) == NULL)
1272            return (here == start)? NULL : start;
1273          }
1274    
1275        dlen = (int)strlen((char *)here);
1276        if (dlen > 0 && here[dlen - 1] == '\n') return start;
1277        here += dlen;
1278      }      }
1279    
1280    else switch(*code)    else
1281      {      {
1282      case OP_END:      int new_buffer_size = 2*buffer_size;
1283      fprintf(outfile, "    %s\n", OP_names[*code]);      pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1284      fprintf(outfile, "------------------------------------------------------------------\n");      pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1285      return;      pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1286    
1287      case OP_OPT:      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1288      fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);        {
1289      code++;        fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1290      break;        exit(1);
1291          }
1292    
1293      case OP_CHARS:      memcpy(new_buffer, buffer, buffer_size);
1294      charlength = *(++code);      memcpy(new_pbuffer, pbuffer, buffer_size);
     fprintf(outfile, "%3d ", charlength);  
     while (charlength-- > 0)  
       if (isprint(c = *(++code))) fprintf(outfile, "%c", c);  
         else fprintf(outfile, "\\x%02x", c);  
     break;  
1295    
1296      case OP_KETRMAX:      buffer_size = new_buffer_size;
     case OP_KETRMIN:  
     case OP_ALT:  
     case OP_KET:  
     case OP_ASSERT:  
     case OP_ASSERT_NOT:  
     case OP_ASSERTBACK:  
     case OP_ASSERTBACK_NOT:  
     case OP_ONCE:  
     case OP_COND:  
     case OP_BRANUMBER:  
     case OP_REVERSE:  
     case OP_CREF:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
1297    
1298      case OP_STAR:      start = new_buffer + (start - buffer);
1299      case OP_MINSTAR:      here = new_buffer + (here - buffer);
     case OP_PLUS:  
     case OP_MINPLUS:  
     case OP_QUERY:  
     case OP_MINQUERY:  
     case OP_TYPESTAR:  
     case OP_TYPEMINSTAR:  
     case OP_TYPEPLUS:  
     case OP_TYPEMINPLUS:  
     case OP_TYPEQUERY:  
     case OP_TYPEMINQUERY:  
     if (*code >= OP_TYPESTAR)  
       fprintf(outfile, "    %s", OP_names[code[1]]);  
     else if (isprint(c = code[1])) fprintf(outfile, "    %c", c);  
       else fprintf(outfile, "    \\x%02x", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
1300    
1301      case OP_EXACT:      free(buffer);
1302      case OP_UPTO:      free(dbuffer);
1303      case OP_MINUPTO:      free(pbuffer);
     if (isprint(c = code[3])) fprintf(outfile, "    %c{", c);  
       else fprintf(outfile, "    \\x%02x{", c);  
     if (*code != OP_EXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_MINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
1304    
1305      case OP_TYPEEXACT:      buffer = new_buffer;
1306      case OP_TYPEUPTO:      dbuffer = new_dbuffer;
1307      case OP_TYPEMINUPTO:      pbuffer = new_pbuffer;
1308      fprintf(outfile, "    %s{", OP_names[code[3]]);      }
1309      if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");    }
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
1310    
1311      case OP_NOT:  return NULL;  /* Control never gets here */
1312      if (isprint(c = *(++code))) fprintf(outfile, "    [^%c]", c);  }
       else fprintf(outfile, "    [^\\x%02x]", c);  
     break;  
1313    
     case OP_NOTSTAR:  
     case OP_NOTMINSTAR:  
     case OP_NOTPLUS:  
     case OP_NOTMINPLUS:  
     case OP_NOTQUERY:  
     case OP_NOTMINQUERY:  
     if (isprint(c = code[1])) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
1314    
     case OP_NOTEXACT:  
     case OP_NOTUPTO:  
     case OP_NOTMINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    [^%c]{", c);  
       else fprintf(outfile, "    [^\\x%02x]{", c);  
     if (*code != OP_NOTEXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
1315    
1316      case OP_REF:  /*************************************************
1317      fprintf(outfile, "    \\%d", (code[1] << 8) | code[2]);  *          Read number from string               *
1318      code += 3;  *************************************************/
     goto CLASS_REF_REPEAT;  
1319    
1320      case OP_CLASS:  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1321        {  around with conditional compilation, just do the job by hand. It is only used
1322        int i, min, max;  for unpicking arguments, so just keep it simple.
       code++;  
       fprintf(outfile, "    [");  
1323    
1324        for (i = 0; i < 256; i++)  Arguments:
1325          {    str           string to be converted
1326          if ((code[i/8] & (1 << (i&7))) != 0)    endptr        where to put the end pointer
           {  
           int j;  
           for (j = i+1; j < 256; j++)  
             if ((code[j/8] & (1 << (j&7))) == 0) break;  
           if (i == '-' || i == ']') fprintf(outfile, "\\");  
           if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);  
           if (--j > i)  
             {  
             fprintf(outfile, "-");  
             if (j == '-' || j == ']') fprintf(outfile, "\\");  
             if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);  
             }  
           i = j;  
           }  
         }  
       fprintf(outfile, "]");  
       code += 32;  
1327    
1328        CLASS_REF_REPEAT:  Returns:        the unsigned long
1329    */
1330    
1331        switch(*code)  static int
1332          {  get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1333          case OP_CRSTAR:  {
1334          case OP_CRMINSTAR:  int result = 0;
1335          case OP_CRPLUS:  while(*str != 0 && isspace(*str)) str++;
1336          case OP_CRMINPLUS:  while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1337          case OP_CRQUERY:  *endptr = str;
1338          case OP_CRMINQUERY:  return(result);
1339          fprintf(outfile, "%s", OP_names[*code]);  }
         break;  
1340    
         case OP_CRRANGE:  
         case OP_CRMINRANGE:  
         min = (code[1] << 8) + code[2];  
         max = (code[3] << 8) + code[4];  
         if (max == 0) fprintf(outfile, "{%d,}", min);  
         else fprintf(outfile, "{%d,%d}", min, max);  
         if (*code == OP_CRMINRANGE) fprintf(outfile, "?");  
         code += 4;  
         break;  
1341    
         default:  
         code--;  
         }  
       }  
     break;  
1342    
1343      /* Anything else is just a one-node item */  /*************************************************
1344    *             Print one character                *
1345    *************************************************/
1346    
1347      default:  /* Print a single character either literally, or as a hex escape. */
     fprintf(outfile, "    %s", OP_names[*code]);  
     break;  
     }  
1348    
1349    code++;  static int pchar(int c, FILE *f)
1350    fprintf(outfile, "\n");  {
1351    if (PRINTOK(c))
1352      {
1353      if (f != NULL) fprintf(f, "%c", c);
1354      return 1;
1355      }
1356    
1357    if (c < 0x100)
1358      {
1359      if (use_utf)
1360        {
1361        if (f != NULL) fprintf(f, "\\x{%02x}", c);
1362        return 6;
1363        }
1364      else
1365        {
1366        if (f != NULL) fprintf(f, "\\x%02x", c);
1367        return 4;
1368        }
1369    }    }
1370    
1371    if (f != NULL) fprintf(f, "\\x{%02x}", c);
1372    return (c <= 0x000000ff)? 6 :
1373           (c <= 0x00000fff)? 7 :
1374           (c <= 0x0000ffff)? 8 :
1375           (c <= 0x000fffff)? 9 : 10;
1376  }  }
1377    
1378    
1379    
1380  /* Character string printing function. A "normal" and a UTF-8 version. */  #ifdef SUPPORT_PCRE8
1381    /*************************************************
1382    *         Print 8-bit character string           *
1383    *************************************************/
1384    
1385  static void pchars(unsigned char *p, int length, int utf8)  /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1386    If handed a NULL file, just counts chars without printing. */
1387    
1388    static int pchars(pcre_uint8 *p, int length, FILE *f)
1389  {  {
1390  int c;  int c = 0;
1391    int yield = 0;
1392    
1393    if (length < 0)
1394      length = strlen((char *)p);
1395    
1396  while (length-- > 0)  while (length-- > 0)
1397    {    {
1398    if (utf8)  #if !defined NOUTF
1399      if (use_utf)
1400      {      {
1401      int rc = utf82ord(p, &c);      int rc = utf82ord(p, &c);
1402      if (rc > 0)      if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
1403        {        {
1404        length -= rc - 1;        length -= rc - 1;
1405        p += rc;        p += rc;
1406        if (c < 256 && isprint(c)) fprintf(outfile, "%c", c);        yield += pchar(c, f);
         else fprintf(outfile, "\\x{%02x}", c);  
1407        continue;        continue;
1408        }        }
1409      }      }
1410    #endif
1411     /* Not UTF-8, or malformed UTF-8  */    c = *p++;
1412      yield += pchar(c, f);
   if (isprint(c = *(p++))) fprintf(outfile, "%c", c);  
     else fprintf(outfile, "\\x%02x", c);  
1413    }    }
1414    
1415    return yield;
1416  }  }
1417    #endif
1418    
1419    
1420    
1421  /* Alternative malloc function, to test functionality and show the size of the  #ifdef SUPPORT_PCRE16
1422  compiled re. */  /*************************************************
1423    *    Find length of 0-terminated 16-bit string   *
1424    *************************************************/
1425    
1426  static void *new_malloc(size_t size)  static int strlen16(PCRE_SPTR16 p)
1427  {  {
1428  gotten_store = size;  int len = 0;
1429  if (log_store)  while (*p++ != 0) len++;
1430    fprintf(outfile, "Memory allocation (code space): %d\n",  return len;
     (int)((int)size - offsetof(real_pcre, code[0])));  
 return malloc(size);  
1431  }  }
1432    #endif  /* SUPPORT_PCRE16 */
1433    
1434    
1435    #ifdef SUPPORT_PCRE16
1436    /*************************************************
1437    *           Print 16-bit character string        *
1438    *************************************************/
1439    
1440    /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1441    If handed a NULL file, just counts chars without printing. */
1442    
1443  /* Get one piece of information from the pcre_fullinfo() function */  static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
   
 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)  
1444  {  {
1445  int rc;  int yield = 0;
 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)  
   fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);  
 }  
1446    
1447    if (length < 0)
1448      length = strlen16(p);
1449    
1450    while (length-- > 0)
1451      {
1452      int c = *p++ & 0xffff;
1453    #if !defined NOUTF
1454      if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1455        {
1456        int d = *p & 0xffff;
1457        if (d >= 0xDC00 && d < 0xDFFF)
1458          {
1459          c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1460          length--;
1461          p++;
1462          }
1463        }
1464    #endif
1465      yield += pchar(c, f);
1466      }
1467    
1468    return yield;
1469    }
1470    #endif  /* SUPPORT_PCRE16 */
1471    
1472    
1473    
1474    #ifdef SUPPORT_PCRE8
1475    /*************************************************
1476    *     Read a capture name (8-bit) and check it   *
1477    *************************************************/
1478    
1479    static pcre_uint8 *
1480    read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1481    {
1482    pcre_uint8 *npp = *pp;
1483    while (isalnum(*p)) *npp++ = *p++;
1484    *npp++ = 0;
1485    *npp = 0;
1486    if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1487      {
1488      fprintf(outfile, "no parentheses with name \"");
1489      PCHARSV(*pp, 0, -1, outfile);
1490      fprintf(outfile, "\"\n");
1491      }
1492    
1493    *pp = npp;
1494    return p;
1495    }
1496    #endif  /* SUPPORT_PCRE8 */
1497    
1498    
1499    
1500    #ifdef SUPPORT_PCRE16
1501    /*************************************************
1502    *     Read a capture name (16-bit) and check it  *
1503    *************************************************/
1504    
1505    /* Note that the text being read is 8-bit. */
1506    
1507    static pcre_uint8 *
1508    read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1509    {
1510    pcre_uint16 *npp = *pp;
1511    while (isalnum(*p)) *npp++ = *p++;
1512    *npp++ = 0;
1513    *npp = 0;
1514    if (pcre16_get_stringnumber(re, (PCRE_SPTR16)(*pp)) < 0)
1515      {
1516      fprintf(outfile, "no parentheses with name \"");
1517      PCHARSV(*pp, 0, -1, outfile);
1518      fprintf(outfile, "\"\n");
1519      }
1520    *pp = npp;
1521    return p;
1522    }
1523    #endif  /* SUPPORT_PCRE16 */
1524    
1525    
1526    
1527    /*************************************************
1528    *              Callout function                  *
1529    *************************************************/
1530    
1531    /* Called from PCRE as a result of the (?C) item. We print out where we are in
1532    the match. Yield zero unless more callouts than the fail count, or the callout
1533    data is not zero. */
1534    
1535    static int callout(pcre_callout_block *cb)
1536    {
1537    FILE *f = (first_callout | callout_extra)? outfile : NULL;
1538    int i, pre_start, post_start, subject_length;
1539    
1540    if (callout_extra)
1541      {
1542      fprintf(f, "Callout %d: last capture = %d\n",
1543        cb->callout_number, cb->capture_last);
1544    
1545      for (i = 0; i < cb->capture_top * 2; i += 2)
1546        {
1547        if (cb->offset_vector[i] < 0)
1548          fprintf(f, "%2d: <unset>\n", i/2);
1549        else
1550          {
1551          fprintf(f, "%2d: ", i/2);
1552          PCHARSV(cb->subject, cb->offset_vector[i],
1553            cb->offset_vector[i+1] - cb->offset_vector[i], f);
1554          fprintf(f, "\n");
1555          }
1556        }
1557      }
1558    
1559    /* Re-print the subject in canonical form, the first time or if giving full
1560    datails. On subsequent calls in the same match, we use pchars just to find the
1561    printed lengths of the substrings. */
1562    
1563    if (f != NULL) fprintf(f, "--->");
1564    
1565    PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
1566    PCHARS(post_start, cb->subject, cb->start_match,
1567      cb->current_position - cb->start_match, f);
1568    
1569    PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
1570    
1571    PCHARSV(cb->subject, cb->current_position,
1572      cb->subject_length - cb->current_position, f);
1573    
1574    if (f != NULL) fprintf(f, "\n");
1575    
1576    /* Always print appropriate indicators, with callout number if not already
1577    shown. For automatic callouts, show the pattern offset. */
1578    
1579    if (cb->callout_number == 255)
1580      {
1581      fprintf(outfile, "%+3d ", cb->pattern_position);
1582      if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
1583      }
1584    else
1585      {
1586      if (callout_extra) fprintf(outfile, "    ");
1587        else fprintf(outfile, "%3d ", cb->callout_number);
1588      }
1589    
1590    for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
1591    fprintf(outfile, "^");
1592    
1593    if (post_start > 0)
1594      {
1595      for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
1596      fprintf(outfile, "^");
1597      }
1598    
1599    for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
1600      fprintf(outfile, " ");
1601    
1602    fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
1603      pbuffer + cb->pattern_position);
1604    
1605    fprintf(outfile, "\n");
1606    first_callout = 0;
1607    
1608    if (cb->mark != last_callout_mark)
1609      {
1610      if (cb->mark == NULL)
1611        fprintf(outfile, "Latest Mark: <unset>\n");
1612      else
1613        {
1614        fprintf(outfile, "Latest Mark: ");
1615        PCHARSV(cb->mark, 0, -1, outfile);
1616        putc('\n', outfile);
1617        }
1618      last_callout_mark = cb->mark;
1619      }
1620    
1621    if (cb->callout_data != NULL)
1622      {
1623      int callout_data = *((int *)(cb->callout_data));
1624      if (callout_data != 0)
1625        {
1626        fprintf(outfile, "Callout data = %d\n", callout_data);
1627        return callout_data;
1628        }
1629      }
1630    
1631    return (cb->callout_number != callout_fail_id)? 0 :
1632           (++callout_count >= callout_fail_count)? 1 : 0;
1633    }
1634    
1635    
1636    /*************************************************
1637    *            Local malloc functions              *
1638    *************************************************/
1639    
1640    /* Alternative malloc function, to test functionality and save the size of a
1641    compiled re, which is the first store request that pcre_compile() makes. The
1642    show_malloc variable is set only during matching. */
1643    
1644    static void *new_malloc(size_t size)
1645    {
1646    void *block = malloc(size);
1647    gotten_store = size;
1648    if (first_gotten_store == 0) first_gotten_store = size;
1649    if (show_malloc)
1650      fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
1651    return block;
1652    }
1653    
1654    static void new_free(void *block)
1655    {
1656    if (show_malloc)
1657      fprintf(outfile, "free             %p\n", block);
1658    free(block);
1659    }
1660    
1661    /* For recursion malloc/free, to test stacking calls */
1662    
1663    static void *stack_malloc(size_t size)
1664    {
1665    void *block = malloc(size);
1666    if (show_malloc)
1667      fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1668    return block;
1669    }
1670    
1671    static void stack_free(void *block)
1672    {
1673    if (show_malloc)
1674      fprintf(outfile, "stack_free       %p\n", block);
1675    free(block);
1676    }
1677    
1678    
1679    /*************************************************
1680    *          Call pcre_fullinfo()                  *
1681    *************************************************/
1682    
1683    /* Get one piece of information from the pcre_fullinfo() function. When only
1684    one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1685    value, but the code is defensive.
1686    
1687    Arguments:
1688      re        compiled regex
1689      study     study data
1690      option    PCRE_INFO_xxx option
1691      ptr       where to put the data
1692    
1693    Returns:    0 when OK, < 0 on error
1694    */
1695    
1696    static int
1697    new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1698    {
1699    int rc;
1700    
1701    if (use_pcre16)
1702    #ifdef SUPPORT_PCRE16
1703      rc = pcre16_fullinfo(re, study, option, ptr);
1704    #else
1705      rc = PCRE_ERROR_BADMODE;
1706    #endif
1707    else
1708    #ifdef SUPPORT_PCRE8
1709      rc = pcre_fullinfo(re, study, option, ptr);
1710    #else
1711      rc = PCRE_ERROR_BADMODE;
1712    #endif
1713    
1714    if (rc < 0)
1715      {
1716      fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1717        use_pcre16? "16" : "", option);
1718      if (rc == PCRE_ERROR_BADMODE)
1719        fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
1720          "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
1721      }
1722    
1723    return rc;
1724    }
1725    
1726    
1727    
1728    /*************************************************
1729    *             Swap byte functions                *
1730    *************************************************/
1731    
1732    /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
1733    value, respectively.
1734    
1735    Arguments:
1736      value        any number
1737    
1738    Returns:       the byte swapped value
1739    */
1740    
1741    static pcre_uint32
1742    swap_uint32(pcre_uint32 value)
1743    {
1744    return ((value & 0x000000ff) << 24) |
1745           ((value & 0x0000ff00) <<  8) |
1746           ((value & 0x00ff0000) >>  8) |
1747           (value >> 24);
1748    }
1749    
1750    static pcre_uint16
1751    swap_uint16(pcre_uint16 value)
1752    {
1753    return (value >> 8) | (value << 8);
1754    }
1755    
1756    
1757    
1758    /*************************************************
1759    *        Flip bytes in a compiled pattern        *
1760    *************************************************/
1761    
1762    /* This function is called if the 'F' option was present on a pattern that is
1763    to be written to a file. We flip the bytes of all the integer fields in the
1764    regex data block and the study block. In 16-bit mode this also flips relevant
1765    bytes in the pattern itself. This is to make it possible to test PCRE's
1766    ability to reload byte-flipped patterns, e.g. those compiled on a different
1767    architecture. */
1768    
1769    static void
1770    regexflip(pcre *ere, pcre_extra *extra)
1771    {
1772    real_pcre *re = (real_pcre *)ere;
1773    #ifdef SUPPORT_PCRE16
1774    int op;
1775    pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1776    int length = re->name_count * re->name_entry_size;
1777    #ifdef SUPPORT_UTF
1778    BOOL utf = (re->options & PCRE_UTF16) != 0;
1779    BOOL utf16_char = FALSE;
1780    #endif /* SUPPORT_UTF */
1781    #endif /* SUPPORT_PCRE16 */
1782    
1783    /* Always flip the bytes in the main data block and study blocks. */
1784    
1785    re->magic_number = REVERSED_MAGIC_NUMBER;
1786    re->size = swap_uint32(re->size);
1787    re->options = swap_uint32(re->options);
1788    re->flags = swap_uint16(re->flags);
1789    re->top_bracket = swap_uint16(re->top_bracket);
1790    re->top_backref = swap_uint16(re->top_backref);
1791    re->first_char = swap_uint16(re->first_char);
1792    re->req_char = swap_uint16(re->req_char);
1793    re->name_table_offset = swap_uint16(re->name_table_offset);
1794    re->name_entry_size = swap_uint16(re->name_entry_size);
1795    re->name_count = swap_uint16(re->name_count);
1796    
1797    if (extra != NULL)
1798      {
1799      pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1800      rsd->size = swap_uint32(rsd->size);
1801      rsd->flags = swap_uint32(rsd->flags);
1802      rsd->minlength = swap_uint32(rsd->minlength);
1803      }
1804    
1805    /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1806    in the name table, if present, and then in the pattern itself. */
1807    
1808    #ifdef SUPPORT_PCRE16
1809    if (!use_pcre16) return;
1810    
1811    while(TRUE)
1812      {
1813      /* Swap previous characters. */
1814      while (length-- > 0)
1815        {
1816        *ptr = swap_uint16(*ptr);
1817        ptr++;
1818        }
1819    #ifdef SUPPORT_UTF
1820      if (utf16_char)
1821        {
1822        if ((ptr[-1] & 0xfc00) == 0xd800)
1823          {
1824          /* We know that there is only one extra character in UTF-16. */
1825          *ptr = swap_uint16(*ptr);
1826          ptr++;
1827          }
1828        }
1829      utf16_char = FALSE;
1830    #endif /* SUPPORT_UTF */
1831    
1832      /* Get next opcode. */
1833    
1834      length = 0;
1835      op = *ptr;
1836      *ptr++ = swap_uint16(op);
1837    
1838      switch (op)
1839        {
1840        case OP_END:
1841        return;
1842    
1843    #ifdef SUPPORT_UTF
1844        case OP_CHAR:
1845        case OP_CHARI:
1846        case OP_NOT:
1847        case OP_NOTI:
1848        case OP_STAR:
1849        case OP_MINSTAR:
1850        case OP_PLUS:
1851        case OP_MINPLUS:
1852        case OP_QUERY:
1853        case OP_MINQUERY:
1854        case OP_UPTO:
1855        case OP_MINUPTO:
1856        case OP_EXACT:
1857        case OP_POSSTAR:
1858        case OP_POSPLUS:
1859        case OP_POSQUERY:
1860        case OP_POSUPTO:
1861        case OP_STARI:
1862        case OP_MINSTARI:
1863        case OP_PLUSI:
1864        case OP_MINPLUSI:
1865        case OP_QUERYI:
1866        case OP_MINQUERYI:
1867        case OP_UPTOI:
1868        case OP_MINUPTOI:
1869        case OP_EXACTI:
1870        case OP_POSSTARI:
1871        case OP_POSPLUSI:
1872        case OP_POSQUERYI:
1873        case OP_POSUPTOI:
1874        case OP_NOTSTAR:
1875        case OP_NOTMINSTAR:
1876        case OP_NOTPLUS:
1877        case OP_NOTMINPLUS:
1878        case OP_NOTQUERY:
1879        case OP_NOTMINQUERY:
1880        case OP_NOTUPTO:
1881        case OP_NOTMINUPTO:
1882        case OP_NOTEXACT:
1883        case OP_NOTPOSSTAR:
1884        case OP_NOTPOSPLUS:
1885        case OP_NOTPOSQUERY:
1886        case OP_NOTPOSUPTO:
1887        case OP_NOTSTARI:
1888        case OP_NOTMINSTARI:
1889        case OP_NOTPLUSI:
1890        case OP_NOTMINPLUSI:
1891        case OP_NOTQUERYI:
1892        case OP_NOTMINQUERYI:
1893        case OP_NOTUPTOI:
1894        case OP_NOTMINUPTOI:
1895        case OP_NOTEXACTI:
1896        case OP_NOTPOSSTARI:
1897        case OP_NOTPOSPLUSI:
1898        case OP_NOTPOSQUERYI:
1899        case OP_NOTPOSUPTOI:
1900        if (utf) utf16_char = TRUE;
1901    #endif
1902        /* Fall through. */
1903    
1904        default:
1905        length = OP_lengths16[op] - 1;
1906        break;
1907    
1908        case OP_CLASS:
1909        case OP_NCLASS:
1910        /* Skip the character bit map. */
1911        ptr += 32/sizeof(pcre_uint16);
1912        length = 0;
1913        break;
1914    
1915        case OP_XCLASS:
1916        /* Reverse the size of the XCLASS instance. */
1917        ptr++;
1918        *ptr = swap_uint16(*ptr);
1919        if (LINK_SIZE > 1)
1920          {
1921          /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
1922          ptr++;
1923          *ptr = swap_uint16(*ptr);
1924          }
1925        ptr++;
1926    
1927        if (LINK_SIZE > 1)
1928          length = ((ptr[-LINK_SIZE] << 16) | ptr[-LINK_SIZE + 1]) -
1929            (1 + LINK_SIZE + 1);
1930        else
1931          length = ptr[-LINK_SIZE] - (1 + LINK_SIZE + 1);
1932    
1933        op = *ptr;
1934        *ptr = swap_uint16(op);
1935        if ((op & XCL_MAP) != 0)
1936          {
1937          /* Skip the character bit map. */
1938          ptr += 32/sizeof(pcre_uint16);
1939          length -= 32/sizeof(pcre_uint16);
1940          }
1941        break;
1942        }
1943      }
1944    /* Control should never reach here in 16 bit mode. */
1945    #endif /* SUPPORT_PCRE16 */
1946    }
1947    
1948    
1949    
1950    /*************************************************
1951    *        Check match or recursion limit          *
1952    *************************************************/
1953    
1954    static int
1955    check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1956      int start_offset, int options, int *use_offsets, int use_size_offsets,
1957      int flag, unsigned long int *limit, int errnumber, const char *msg)
1958    {
1959    int count;
1960    int min = 0;
1961    int mid = 64;
1962    int max = -1;
1963    
1964    extra->flags |= flag;
1965    
1966    for (;;)
1967      {
1968      *limit = mid;
1969    
1970      PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
1971        use_offsets, use_size_offsets);
1972    
1973      if (count == errnumber)
1974        {
1975        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1976        min = mid;
1977        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1978        }
1979    
1980      else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1981                             count == PCRE_ERROR_PARTIAL)
1982        {
1983        if (mid == min + 1)
1984          {
1985          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1986          break;
1987          }
1988        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1989        max = mid;
1990        mid = (min + mid)/2;
1991        }
1992      else break;    /* Some other error */
1993      }
1994    
1995    extra->flags &= ~flag;
1996    return count;
1997    }
1998    
1999    
2000    
2001    /*************************************************
2002    *         Case-independent strncmp() function    *
2003    *************************************************/
2004    
2005    /*
2006    Arguments:
2007      s         first string
2008      t         second string
2009      n         number of characters to compare
2010    
2011    Returns:    < 0, = 0, or > 0, according to the comparison
2012    */
2013    
2014    static int
2015    strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2016    {
2017    while (n--)
2018      {
2019      int c = tolower(*s++) - tolower(*t++);
2020      if (c) return c;
2021      }
2022    return 0;
2023    }
2024    
2025    
2026    
2027    /*************************************************
2028    *         Check newline indicator                *
2029    *************************************************/
2030    
2031    /* This is used both at compile and run-time to check for <xxx> escapes. Print
2032    a message and return 0 if there is no match.
2033    
2034    Arguments:
2035      p           points after the leading '<'
2036      f           file for error message
2037    
2038    Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
2039    */
2040    
2041    static int
2042    check_newline(pcre_uint8 *p, FILE *f)
2043    {
2044    if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2045    if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2046    if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2047    if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2048    if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2049    if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2050    if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2051    fprintf(f, "Unknown newline type at: <%s\n", p);
2052    return 0;
2053    }
2054    
2055    
2056    
2057    /*************************************************
2058    *             Usage function                     *
2059    *************************************************/
2060    
2061    static void
2062    usage(void)
2063    {
2064    printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
2065    printf("Input and output default to stdin and stdout.\n");
2066    #ifdef SUPPORT_LIBREADLINE
2067    printf("If input is a terminal, readline() is used to read from it.\n");
2068    #else
2069    printf("This version of pcretest is not linked with readline().\n");
2070    #endif
2071    printf("\nOptions:\n");
2072    #ifdef SUPPORT_PCRE16
2073    printf("  -16      use 16-bit interface\n");
2074    #endif
2075    printf("  -b       show compiled code (bytecode)\n");
2076    printf("  -C       show PCRE compile-time options and exit\n");
2077    printf("  -C arg   show a specific compile-time option\n");
2078    printf("           and exit with its value. The arg can be:\n");
2079    printf("     linksize     internal link size [2, 3, 4]\n");
2080    printf("     pcre8        8 bit library support enabled [0, 1]\n");
2081    printf("     pcre16       16 bit library support enabled [0, 1]\n");
2082    printf("     utf          Unicode Transformation Format supported [0, 1]\n");
2083    printf("     ucp          Unicode Properties supported [0, 1]\n");
2084    printf("     jit          Just-in-time compiler supported [0, 1]\n");
2085    printf("  -d       debug: show compiled code and information (-b and -i)\n");
2086    #if !defined NODFA
2087    printf("  -dfa     force DFA matching for all subjects\n");
2088    #endif
2089    printf("  -help    show usage information\n");
2090    printf("  -i       show information about compiled patterns\n"
2091           "  -M       find MATCH_LIMIT minimum for each subject\n"
2092           "  -m       output memory used information\n"
2093           "  -o <n>   set size of offsets vector to <n>\n");
2094    #if !defined NOPOSIX
2095    printf("  -p       use POSIX interface\n");
2096    #endif
2097    printf("  -q       quiet: do not output PCRE version number at start\n");
2098    printf("  -S <n>   set stack size to <n> megabytes\n");
2099    printf("  -s       force each pattern to be studied at basic level\n"
2100           "  -s+      force each pattern to be studied, using JIT if available\n"
2101           "  -t       time compilation and execution\n");
2102    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
2103    printf("  -tm      time execution (matching) only\n");
2104    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
2105    }
2106    
2107    
2108    
2109    /*************************************************
2110    *                Main Program                    *
2111    *************************************************/
2112    
2113  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
2114  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
# Line 427  options, followed by a set of test data, Line 2117  options, followed by a set of test data,
2117  int main(int argc, char **argv)  int main(int argc, char **argv)
2118  {  {
2119  FILE *infile = stdin;  FILE *infile = stdin;
2120    const char *version;
2121  int options = 0;  int options = 0;
2122  int study_options = 0;  int study_options = 0;
2123    int default_find_match_limit = FALSE;
2124  int op = 1;  int op = 1;
2125  int timeit = 0;  int timeit = 0;
2126    int timeitm = 0;
2127  int showinfo = 0;  int showinfo = 0;
2128  int showstore = 0;  int showstore = 0;
2129    int force_study = -1;
2130    int force_study_options = 0;
2131    int quiet = 0;
2132  int size_offsets = 45;  int size_offsets = 45;
2133  int size_offsets_max;  int size_offsets_max;
2134  int *offsets;  int *offsets = NULL;
2135  #if !defined NOPOSIX  #if !defined NOPOSIX
2136  int posix = 0;  int posix = 0;
2137  #endif  #endif
2138  int debug = 0;  int debug = 0;
2139  int done = 0;  int done = 0;
2140  unsigned char buffer[30000];  int all_use_dfa = 0;
2141  unsigned char dbuffer[1024];  int yield = 0;
2142    int stack_size;
2143    
2144    pcre_jit_stack *jit_stack = NULL;
2145    
2146    /* These vectors store, end-to-end, a list of zero-terminated captured
2147    substring names, each list itself being terminated by an empty name. Assume
2148    that 1024 is plenty long enough for the few names we'll be testing. It is
2149    easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
2150    for the actual memory, to ensure alignment. By defining these variables always
2151    (whether or not 8-bit or 16-bit is supported), we avoid too much mess with
2152    #ifdefs in the code. */
2153    
2154    pcre_uint16 copynames[1024];
2155    pcre_uint16 getnames[1024];
2156    
2157    pcre_uint16 *cn16ptr;
2158    pcre_uint16 *gn16ptr;
2159    
2160    pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2161    pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2162    pcre_uint8 *cn8ptr;
2163    pcre_uint8 *gn8ptr;
2164    
2165    /* Get buffers from malloc() so that valgrind will check their misuse when
2166    debugging. They grow automatically when very long lines are read. The 16-bit
2167    buffer (buffer16) is obtained only if needed. */
2168    
2169    buffer = (pcre_uint8 *)malloc(buffer_size);
2170    dbuffer = (pcre_uint8 *)malloc(buffer_size);
2171    pbuffer = (pcre_uint8 *)malloc(buffer_size);
2172    
2173  /* Static so that new_malloc can use it. */  /* The outfile variable is static so that new_malloc can use it. */
2174    
2175  outfile = stdout;  outfile = stdout;
2176    
2177  /* Scan options */  /* The following  _setmode() stuff is some Windows magic that tells its runtime
2178    library to translate CRLF into a single LF character. At least, that's what
2179    I've been told: never having used Windows I take this all on trust. Originally
2180    it set 0x8000, but then I was advised that _O_BINARY was better. */
2181    
2182    #if defined(_WIN32) || defined(WIN32)
2183    _setmode( _fileno( stdout ), _O_BINARY );
2184    #endif
2185    
2186    /* Get the version number: both pcre_version() and pcre16_version() give the
2187    same answer. We just need to ensure that we call one that is available. */
2188    
2189    #ifdef SUPPORT_PCRE8
2190    version = pcre_version();
2191    #else
2192    version = pcre16_version();
2193    #endif
2194    
2195    /* Scan options */
2196    
2197    while (argc > 1 && argv[op][0] == '-')
2198      {
2199      pcre_uint8 *endptr;
2200    
2201      if (strcmp(argv[op], "-m") == 0) showstore = 1;
2202      else if (strcmp(argv[op], "-s") == 0) force_study = 0;
2203      else if (strcmp(argv[op], "-s+") == 0)
2204        {
2205        force_study = 1;
2206        force_study_options = PCRE_STUDY_JIT_COMPILE;
2207        }
2208      else if (strcmp(argv[op], "-16") == 0)
2209        {
2210    #ifdef SUPPORT_PCRE16
2211        use_pcre16 = 1;
2212    #else
2213        printf("** This version of PCRE was built without 16-bit support\n");
2214        exit(1);
2215    #endif
2216        }
2217      else if (strcmp(argv[op], "-q") == 0) quiet = 1;
2218      else if (strcmp(argv[op], "-b") == 0) debug = 1;
2219      else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
2220      else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
2221      else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
2222    #if !defined NODFA
2223      else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
2224    #endif
2225      else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
2226          ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2227            *endptr == 0))
2228        {
2229        op++;
2230        argc--;
2231        }
2232      else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
2233        {
2234        int both = argv[op][2] == 0;
2235        int temp;
2236        if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2237                         *endptr == 0))
2238          {
2239          timeitm = temp;
2240          op++;
2241          argc--;
2242          }
2243        else timeitm = LOOPREPEAT;
2244        if (both) timeit = timeitm;
2245        }
2246      else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
2247          ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2248            *endptr == 0))
2249        {
2250    #if defined(_WIN32) || defined(WIN32) || defined(__minix)
2251        printf("PCRE: -S not supported on this OS\n");
2252        exit(1);
2253    #else
2254        int rc;
2255        struct rlimit rlim;
2256        getrlimit(RLIMIT_STACK, &rlim);
2257        rlim.rlim_cur = stack_size * 1024 * 1024;
2258        rc = setrlimit(RLIMIT_STACK, &rlim);
2259        if (rc != 0)
2260          {
2261        printf("PCRE: setrlimit() failed with error %d\n", rc);
2262        exit(1);
2263          }
2264        op++;
2265        argc--;
2266    #endif
2267        }
2268    #if !defined NOPOSIX
2269      else if (strcmp(argv[op], "-p") == 0) posix = 1;
2270    #endif
2271      else if (strcmp(argv[op], "-C") == 0)
2272        {
2273        int rc;
2274        unsigned long int lrc;
2275    
2276        if (argc > 2)
2277          {
2278          if (strcmp(argv[op + 1], "linksize") == 0)
2279            {
2280            (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2281            printf("%d\n", rc);
2282            yield = rc;
2283            goto EXIT;
2284            }
2285          if (strcmp(argv[op + 1], "pcre8") == 0)
2286            {
2287    #ifdef SUPPORT_PCRE8
2288            printf("1\n");
2289            yield = 1;
2290    #else
2291            printf("0\n");
2292            yield = 0;
2293    #endif
2294            goto EXIT;
2295            }
2296          if (strcmp(argv[op + 1], "pcre16") == 0)
2297            {
2298    #ifdef SUPPORT_PCRE16
2299            printf("1\n");
2300            yield = 1;
2301    #else
2302            printf("0\n");
2303            yield = 0;
2304    #endif
2305            goto EXIT;
2306            }
2307          if (strcmp(argv[op + 1], "utf") == 0)
2308            {
2309    #ifdef SUPPORT_PCRE8
2310            (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2311            printf("%d\n", rc);
2312            yield = rc;
2313    #else
2314            (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2315            printf("%d\n", rc);
2316            yield = rc;
2317    #endif
2318            goto EXIT;
2319            }
2320          if (strcmp(argv[op + 1], "ucp") == 0)
2321            {
2322            (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2323            printf("%d\n", rc);
2324            yield = rc;
2325            goto EXIT;
2326            }
2327          if (strcmp(argv[op + 1], "jit") == 0)
2328            {
2329            (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2330            printf("%d\n", rc);
2331            yield = rc;
2332            goto EXIT;
2333            }
2334          printf("Unknown option: %s\n", argv[op + 1]);
2335          goto EXIT;
2336          }
2337    
2338        printf("PCRE version %s\n", version);
2339        printf("Compiled with\n");
2340    
2341    /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2342    are set, either both UTFs are supported or both are not supported. */
2343    
2344  while (argc > 1 && argv[op][0] == '-')  #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2345    {      printf("  8-bit and 16-bit support\n");
2346    char *endptr;      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2347        if (rc)
2348          printf("  UTF-8 and UTF-16 support\n");
2349        else
2350          printf("  No UTF-8 or UTF-16 support\n");
2351    #elif defined SUPPORT_PCRE8
2352        printf("  8-bit support only\n");
2353        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2354        printf("  %sUTF-8 support\n", rc? "" : "No ");
2355    #else
2356        printf("  16-bit support only\n");
2357        (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2358        printf("  %sUTF-16 support\n", rc? "" : "No ");
2359    #endif
2360    
2361    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)      (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2362      showstore = 1;      printf("  %sUnicode properties support\n", rc? "" : "No ");
2363    else if (strcmp(argv[op], "-t") == 0) timeit = 1;      (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2364    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;      if (rc)
2365    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;        printf("  Just-in-time compiler support\n");
2366    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&      else
2367        ((size_offsets = strtoul(argv[op+1], &endptr, 10)), *endptr == 0))        printf("  No just-in-time compiler support\n");
2368        (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2369        /* Note that these values are always the ASCII values, even
2370        in EBCDIC environments. CR is 13 and NL is 10. */
2371        printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
2372          (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2373          (rc == -2)? "ANYCRLF" :
2374          (rc == -1)? "ANY" : "???");
2375        (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
2376        printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
2377                                         "all Unicode newlines");
2378        (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2379        printf("  Internal link size = %d\n", rc);
2380        (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
2381        printf("  POSIX malloc threshold = %d\n", rc);
2382        (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
2383        printf("  Default match limit = %ld\n", lrc);
2384        (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
2385        printf("  Default recursion depth limit = %ld\n", lrc);
2386        (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
2387        printf("  Match recursion uses %s\n", rc? "stack" : "heap");
2388        goto EXIT;
2389        }
2390      else if (strcmp(argv[op], "-help") == 0 ||
2391               strcmp(argv[op], "--help") == 0)
2392      {      {
2393      op++;      usage();
2394      argc--;      goto EXIT;
2395      }      }
 #if !defined NOPOSIX  
   else if (strcmp(argv[op], "-p") == 0) posix = 1;  
 #endif  
2396    else    else
2397      {      {
2398      printf("** Unknown or malformed option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
2399      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");      usage();
2400      printf("  -d     debug: show compiled code; implies -i\n"      yield = 1;
2401             "  -i     show information about compiled pattern\n"      goto EXIT;
            "  -o <n> set size of offsets vector to <n>\n");  
 #if !defined NOPOSIX  
     printf("  -p     use POSIX interface\n");  
 #endif  
     printf("  -s     output store information\n"  
            "  -t     time compilation and execution\n");  
     return 1;  
2402      }      }
2403    op++;    op++;
2404    argc--;    argc--;
# Line 489  while (argc > 1 && argv[op][0] == '-') Line 2407  while (argc > 1 && argv[op][0] == '-')
2407  /* Get the store for the offsets vector, and remember what it was */  /* Get the store for the offsets vector, and remember what it was */
2408    
2409  size_offsets_max = size_offsets;  size_offsets_max = size_offsets;
2410  offsets = malloc(size_offsets_max * sizeof(int));  offsets = (int *)malloc(size_offsets_max * sizeof(int));
2411  if (offsets == NULL)  if (offsets == NULL)
2412    {    {
2413    printf("** Failed to get %d bytes of memory for offsets vector\n",    printf("** Failed to get %d bytes of memory for offsets vector\n",
2414      size_offsets_max * sizeof(int));      (int)(size_offsets_max * sizeof(int)));
2415    return 1;    yield = 1;
2416      goto EXIT;
2417    }    }
2418    
2419  /* Sort out the input and output files */  /* Sort out the input and output files */
2420    
2421  if (argc > 1)  if (argc > 1)
2422    {    {
2423    infile = fopen(argv[op], "r");    infile = fopen(argv[op], INPUT_MODE);
2424    if (infile == NULL)    if (infile == NULL)
2425      {      {
2426      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
2427      return 1;      yield = 1;
2428        goto EXIT;
2429      }      }
2430    }    }
2431    
2432  if (argc > 2)  if (argc > 2)
2433    {    {
2434    outfile = fopen(argv[op+1], "w");    outfile = fopen(argv[op+1], OUTPUT_MODE);
2435    if (outfile == NULL)    if (outfile == NULL)
2436      {      {
2437      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
2438      return 1;      yield = 1;
2439        goto EXIT;
2440      }      }
2441    }    }
2442    
2443  /* Set alternative malloc function */  /* Set alternative malloc function */
2444    
2445    #ifdef SUPPORT_PCRE8
2446  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
2447    pcre_free = new_free;
2448    pcre_stack_malloc = stack_malloc;
2449    pcre_stack_free = stack_free;
2450    #endif
2451    
2452  /* Heading line, then prompt for first regex if stdin */  #ifdef SUPPORT_PCRE16
2453    pcre16_malloc = new_malloc;
2454    pcre16_free = new_free;
2455    pcre16_stack_malloc = stack_malloc;
2456    pcre16_stack_free = stack_free;
2457    #endif
2458    
2459    /* Heading line unless quiet, then prompt for first regex if stdin */
2460    
2461  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
2462    
2463  /* Main loop */  /* Main loop */
2464    
# Line 540  while (!done) Line 2473  while (!done)
2473  #endif  #endif
2474    
2475    const char *error;    const char *error;
2476    unsigned char *p, *pp, *ppp;    pcre_uint8 *markptr;
2477    const unsigned char *tables = NULL;    pcre_uint8 *p, *pp, *ppp;
2478      pcre_uint8 *to_file = NULL;
2479      const pcre_uint8 *tables = NULL;
2480      unsigned long int true_size, true_study_size = 0;
2481      size_t size, regex_gotten_store;
2482      int do_allcaps = 0;
2483      int do_mark = 0;
2484    int do_study = 0;    int do_study = 0;
2485      int no_force_study = 0;
2486    int do_debug = debug;    int do_debug = debug;
2487    int do_G = 0;    int do_G = 0;
2488    int do_g = 0;    int do_g = 0;
2489    int do_showinfo = showinfo;    int do_showinfo = showinfo;
2490    int do_showrest = 0;    int do_showrest = 0;
2491    int utf8 = 0;    int do_showcaprest = 0;
2492    int erroroffset, len, delimiter;    int do_flip = 0;
2493      int erroroffset, len, delimiter, poffset;
2494    
2495    if (infile == stdin) printf("  re> ");    use_utf = 0;
2496    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;    debug_lengths = 1;
2497    
2498      if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
2499    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2500      fflush(outfile);
2501    
2502    p = buffer;    p = buffer;
2503    while (isspace(*p)) p++;    while (isspace(*p)) p++;
2504    if (*p == 0) continue;    if (*p == 0) continue;
2505    
2506    /* Get the delimiter and seek the end of the pattern; if is isn't    /* See if the pattern is to be loaded pre-compiled from a file. */
2507    complete, read more. */  
2508      if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2509        {
2510        unsigned long int magic, get_options;
2511        pcre_uint8 sbuf[8];
2512        FILE *f;
2513    
2514        p++;
2515        pp = p + (int)strlen((char *)p);
2516        while (isspace(pp[-1])) pp--;
2517        *pp = 0;
2518    
2519        f = fopen((char *)p, "rb");
2520        if (f == NULL)
2521          {
2522          fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
2523          continue;
2524          }
2525    
2526        if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2527    
2528        true_size =
2529          (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
2530        true_study_size =
2531          (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2532    
2533        re = (real_pcre *)new_malloc(true_size);
2534        regex_gotten_store = first_gotten_store;
2535    
2536        if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2537    
2538        magic = ((real_pcre *)re)->magic_number;
2539        if (magic != MAGIC_NUMBER)
2540          {
2541          if (swap_uint32(magic) == MAGIC_NUMBER)
2542            {
2543            do_flip = 1;
2544            }
2545          else
2546            {
2547            fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
2548            fclose(f);
2549            continue;
2550            }
2551          }
2552    
2553        fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2554          do_flip? " (byte-inverted)" : "", p);
2555    
2556        /* Now see if there is any following study data. */
2557    
2558        if (true_study_size != 0)
2559          {
2560          pcre_study_data *psd;
2561    
2562          extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
2563          extra->flags = PCRE_EXTRA_STUDY_DATA;
2564    
2565          psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
2566          extra->study_data = psd;
2567    
2568          if (fread(psd, 1, true_study_size, f) != true_study_size)
2569            {
2570            FAIL_READ:
2571            fprintf(outfile, "Failed to read data from %s\n", p);
2572            if (extra != NULL)
2573              {
2574              PCRE_FREE_STUDY(extra);
2575              }
2576            if (re != NULL) new_free(re);
2577            fclose(f);
2578            continue;
2579            }
2580          fprintf(outfile, "Study data loaded from %s\n", p);
2581          do_study = 1;     /* To get the data output if requested */
2582          }
2583        else fprintf(outfile, "No study data\n");
2584    
2585        /* Flip the necessary bytes. */
2586        if (do_flip)
2587          {
2588          PCRE_PATTERN_TO_HOST_BYTE_ORDER(re, extra, NULL);
2589          }
2590    
2591        /* Need to know if UTF-8 for printing data strings. */
2592    
2593        if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0) continue;
2594        use_utf = (get_options & PCRE_UTF8) != 0;
2595    
2596        fclose(f);
2597        goto SHOW_INFO;
2598        }
2599    
2600      /* In-line pattern (the usual case). Get the delimiter and seek the end of
2601      the pattern; if it isn't complete, read more. */
2602    
2603    delimiter = *p++;    delimiter = *p++;
2604    
2605    if (isalnum(delimiter) || delimiter == '\\')    if (isalnum(delimiter) || delimiter == '\\')
2606      {      {
2607      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");      fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
2608      goto SKIP_DATA;      goto SKIP_DATA;
2609      }      }
2610    
2611    pp = p;    pp = p;
2612      poffset = (int)(p - buffer);
2613    
2614    for(;;)    for(;;)
2615      {      {
# Line 581  while (!done) Line 2620  while (!done)
2620        pp++;        pp++;
2621        }        }
2622      if (*pp != 0) break;      if (*pp != 0) break;
2623        if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
     len = sizeof(buffer) - (pp - buffer);  
     if (len < 256)  
       {  
       fprintf(outfile, "** Expression too long - missing delimiter?\n");  
       goto SKIP_DATA;  
       }  
   
     if (infile == stdin) printf("    > ");  
     if (fgets((char *)pp, len, infile) == NULL)  
2624        {        {
2625        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
2626        done = 1;        done = 1;
# Line 599  while (!done) Line 2629  while (!done)
2629      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
2630      }      }
2631    
2632      /* The buffer may have moved while being extended; reset the start of data
2633      pointer to the correct relative point in the buffer. */
2634    
2635      p = buffer + poffset;
2636    
2637    /* If the first character after the delimiter is backslash, make    /* If the first character after the delimiter is backslash, make
2638    the pattern end with backslash. This is purely to provide a way    the pattern end with backslash. This is purely to provide a way
2639    of testing for the error message when a pattern ends with backslash. */    of testing for the error message when a pattern ends with backslash. */
2640    
2641    if (pp[1] == '\\') *pp++ = '\\';    if (pp[1] == '\\') *pp++ = '\\';
2642    
2643    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter, and save a copy of the pattern
2644      for callouts. */
2645    
2646    *pp++ = 0;    *pp++ = 0;
2647      strcpy((char *)pbuffer, (char *)p);
2648    
2649    /* Look for options after final delimiter */    /* Look for options after final delimiter */
2650    
# Line 619  while (!done) Line 2656  while (!done)
2656      {      {
2657      switch (*pp++)      switch (*pp++)
2658        {        {
2659          case 'f': options |= PCRE_FIRSTLINE; break;
2660        case 'g': do_g = 1; break;        case 'g': do_g = 1; break;
2661        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
2662        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
2663        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
2664        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
2665    
2666        case '+': do_showrest = 1; break;        case '+':
2667          if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2668          break;
2669    
2670          case '=': do_allcaps = 1; break;
2671        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
2672          case 'B': do_debug = 1; break;
2673          case 'C': options |= PCRE_AUTO_CALLOUT; break;
2674        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
2675        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
2676          case 'F': do_flip = 1; break;
2677        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
2678        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
2679          case 'J': options |= PCRE_DUPNAMES; break;
2680          case 'K': do_mark = 1; break;
2681        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
2682          case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
2683    
2684  #if !defined NOPOSIX  #if !defined NOPOSIX
2685        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
2686  #endif  #endif
2687    
2688        case 'S': do_study = 1; break;        case 'S':
2689          if (do_study == 0)
2690            {
2691            do_study = 1;
2692            if (*pp == '+')
2693              {
2694              study_options |= PCRE_STUDY_JIT_COMPILE;
2695              pp++;
2696              }
2697            }
2698          else
2699            {
2700            do_study = 0;
2701            no_force_study = 1;
2702            }
2703          break;
2704    
2705        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
2706          case 'W': options |= PCRE_UCP; break;
2707        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
2708        case '8': options |= PCRE_UTF8; utf8 = 1; break;        case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2709          case 'Z': debug_lengths = 0; break;
2710          case '8': options |= PCRE_UTF8; use_utf = 1; break;
2711          case '?': options |= PCRE_NO_UTF8_CHECK; break;
2712    
2713          case 'T':
2714          switch (*pp++)
2715            {
2716            case '0': tables = tables0; break;
2717            case '1': tables = tables1; break;
2718    
2719            case '\r':
2720            case '\n':
2721            case ' ':
2722            case 0:
2723            fprintf(outfile, "** Missing table number after /T\n");
2724            goto SKIP_DATA;
2725    
2726            default:
2727            fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2728            goto SKIP_DATA;
2729            }
2730          break;
2731    
2732        case 'L':        case 'L':
2733        ppp = pp;        ppp = pp;
2734        while (*ppp != '\n' && *ppp != ' ') ppp++;        /* The '\r' test here is so that it works on Windows. */
2735          /* The '0' test is just in case this is an unterminated line. */
2736          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
2737        *ppp = 0;        *ppp = 0;
2738        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
2739          {          {
2740          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
2741          goto SKIP_DATA;          goto SKIP_DATA;
2742          }          }
2743        tables = pcre_maketables();        locale_set = 1;
2744          tables = PCRE_MAKETABLES;
2745        pp = ppp;        pp = ppp;
2746        break;        break;
2747    
2748        case '\n': case ' ': break;        case '>':
2749          to_file = pp;
2750          while (*pp != 0) pp++;
2751          while (isspace(pp[-1])) pp--;
2752          *pp = 0;
2753          break;
2754    
2755          case '<':
2756            {
2757            if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2758              {
2759              options |= PCRE_JAVASCRIPT_COMPAT;
2760              pp += 3;
2761              }
2762            else
2763              {
2764              int x = check_newline(pp, outfile);
2765              if (x == 0) goto SKIP_DATA;
2766              options |= x;
2767              while (*pp++ != '>');
2768              }
2769            }
2770          break;
2771    
2772          case '\r':                      /* So that it works in Windows */
2773          case '\n':
2774          case ' ':
2775          break;
2776    
2777        default:        default:
2778        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
2779        goto SKIP_DATA;        goto SKIP_DATA;
# Line 664  while (!done) Line 2782  while (!done)
2782    
2783    /* Handle compiling via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
2784    timing, showing, or debugging options, nor the ability to pass over    timing, showing, or debugging options, nor the ability to pass over
2785    local character tables. */    local character tables. Neither does it have 16-bit support. */
2786    
2787  #if !defined NOPOSIX  #if !defined NOPOSIX
2788    if (posix || do_posix)    if (posix || do_posix)
2789      {      {
2790      int rc;      int rc;
2791      int cflags = 0;      int cflags = 0;
2792    
2793      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
2794      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
2795        if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
2796        if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
2797        if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
2798        if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2799        if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2800    
2801        first_gotten_store = 0;
2802      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
2803    
2804      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 680  while (!done) Line 2806  while (!done)
2806    
2807      if (rc != 0)      if (rc != 0)
2808        {        {
2809        (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));        (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2810        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
2811        goto SKIP_DATA;        goto SKIP_DATA;
2812        }        }
# Line 692  while (!done) Line 2818  while (!done)
2818  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
2819    
2820      {      {
2821      if (timeit)      unsigned long int get_options;
2822    
2823        /* In 16-bit mode, convert the input. */
2824    
2825    #ifdef SUPPORT_PCRE16
2826        if (use_pcre16)
2827          {
2828          switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
2829            {
2830            case -1:
2831            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2832              "converted to UTF-16\n");
2833            goto SKIP_DATA;
2834    
2835            case -2:
2836            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
2837              "cannot be converted to UTF-16\n");
2838            goto SKIP_DATA;
2839    
2840            case -3: /* "Impossible error" when to16 is called arg1 FALSE */
2841            fprintf(outfile, "**Failed: character value greater than 0xffff "
2842              "cannot be converted to 16-bit in non-UTF mode\n");
2843            goto SKIP_DATA;
2844    
2845            default:
2846            break;
2847            }
2848          p = (pcre_uint8 *)buffer16;
2849          }
2850    #endif
2851    
2852        /* Compile many times when timing */
2853    
2854        if (timeit > 0)
2855        {        {
2856        register int i;        register int i;
2857        clock_t time_taken;        clock_t time_taken;
2858        clock_t start_time = clock();        clock_t start_time = clock();
2859        for (i = 0; i < LOOPREPEAT; i++)        for (i = 0; i < timeit; i++)
2860          {          {
2861          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2862          if (re != NULL) free(re);          if (re != NULL) free(re);
2863          }          }
2864        time_taken = clock() - start_time;        time_taken = clock() - start_time;
2865        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.4f milliseconds\n",
2866          ((double)time_taken * 1000.0) /          (((double)time_taken * 1000.0) / (double)timeit) /
2867          ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));            (double)CLOCKS_PER_SEC);
2868        }        }
2869    
2870      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      first_gotten_store = 0;
2871        PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2872    
2873      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
2874      if non-interactive. */      if non-interactive. */
# Line 721  while (!done) Line 2881  while (!done)
2881          {          {
2882          for (;;)          for (;;)
2883            {            {
2884            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (extend_inputline(infile, buffer, NULL) == NULL)
2885              {              {
2886              done = 1;              done = 1;
2887              goto CONTINUE;              goto CONTINUE;
# Line 735  while (!done) Line 2895  while (!done)
2895        goto CONTINUE;        goto CONTINUE;
2896        }        }
2897    
2898      /* Compilation succeeded; print data if required. There are now two      /* Compilation succeeded. It is now possible to set the UTF-8 option from
2899      info-returning functions. The old one has a limited interface and      within the regex; check for this so that we know how to process the data
2900      returns only limited data. Check that it agrees with the newer one. */      lines. */
2901    
2902      if (do_showinfo)      if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
2903          goto SKIP_DATA;
2904        if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
2905    
2906        /* Extract the size for possible writing before possibly flipping it,
2907        and remember the store that was got. */
2908    
2909        true_size = ((real_pcre *)re)->size;
2910        regex_gotten_store = first_gotten_store;
2911    
2912        /* Output code size information if requested */
2913    
2914        if (log_store)
2915          fprintf(outfile, "Memory allocation (code space): %d\n",
2916            (int)(first_gotten_store -
2917                  sizeof(real_pcre) -
2918                  ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
2919    
2920        /* If -s or /S was present, study the regex to generate additional info to
2921        help with the matching, unless the pattern has the SS option, which
2922        suppresses the effect of /S (used for a few test patterns where studying is
2923        never sensible). */
2924    
2925        if (do_study || (force_study >= 0 && !no_force_study))
2926        {        {
2927        unsigned long int get_options;        if (timeit > 0)
2928        int old_first_char, old_options, old_count;          {
2929        int count, backrefmax, first_char, need_char;          register int i;
2930        size_t size;          clock_t time_taken;
2931            clock_t start_time = clock();
2932        if (do_debug) print_internals(re);          for (i = 0; i < timeit; i++)
2933              {
2934        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);            PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2935        new_info(re, NULL, PCRE_INFO_SIZE, &size);            }
2936        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);          time_taken = clock() - start_time;
2937        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);          if (extra != NULL)
2938        new_info(re, NULL, PCRE_INFO_FIRSTCHAR, &first_char);            {
2939        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);            PCRE_FREE_STUDY(extra);
2940              }
2941        old_count = pcre_info(re, &old_options, &old_first_char);          fprintf(outfile, "  Study time %.4f milliseconds\n",
2942        if (count < 0) fprintf(outfile,            (((double)time_taken * 1000.0) / (double)timeit) /
2943          "Error %d from pcre_info()\n", count);              (double)CLOCKS_PER_SEC);
2944        else          }
2945          PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2946          if (error != NULL)
2947            fprintf(outfile, "Failed to study: %s\n", error);
2948          else if (extra != NULL)
2949          {          {
2950          if (old_count != count) fprintf(outfile,          true_study_size = ((pcre_study_data *)(extra->study_data))->size;
2951            "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,          if (log_store)
2952              old_count);            {
2953              size_t jitsize;
2954              if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
2955                  jitsize != 0)
2956                fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
2957              }
2958            }
2959          }
2960    
2961          if (old_first_char != first_char) fprintf(outfile,      /* If /K was present, we set up for handling MARK data. */
           "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",  
             first_char, old_first_char);  
2962    
2963          if (old_options != (int)get_options) fprintf(outfile,      if (do_mark)
2964            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",        {
2965              get_options, old_options);        if (extra == NULL)
2966            {
2967            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2968            extra->flags = 0;
2969          }          }
2970          extra->mark = &markptr;
2971          extra->flags |= PCRE_EXTRA_MARK;
2972          }
2973    
2974        /* Extract and display information from the compiled data if required. */
2975    
2976        SHOW_INFO:
2977    
2978        if (do_debug)
2979          {
2980          fprintf(outfile, "------------------------------------------------------------------\n");
2981          PCRE_PRINTINT(re, outfile, debug_lengths);
2982          }
2983    
2984        /* We already have the options in get_options (see above) */
2985    
2986        if (do_showinfo)
2987          {
2988          unsigned long int all_options;
2989          int count, backrefmax, first_char, need_char, okpartial, jchanged,
2990            hascrorlf;
2991          int nameentrysize, namecount;
2992          const pcre_uint8 *nametable;
2993    
2994          if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
2995              new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
2996              new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
2997              new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +
2998              new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +
2999              new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3000              new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3001              new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3002              new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3003              new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3004              new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf)
3005              != 0)
3006            goto SKIP_DATA;
3007    
3008        if (size != gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
3009          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
3010          size, gotten_store);          (int)size, (int)regex_gotten_store);
3011    
3012        fprintf(outfile, "Capturing subpattern count = %d\n", count);        fprintf(outfile, "Capturing subpattern count = %d\n", count);
3013        if (backrefmax > 0)        if (backrefmax > 0)
3014          fprintf(outfile, "Max back reference = %d\n", backrefmax);          fprintf(outfile, "Max back reference = %d\n", backrefmax);
3015    
3016          if (namecount > 0)
3017            {
3018            fprintf(outfile, "Named capturing subpatterns:\n");
3019            while (namecount-- > 0)
3020              {
3021    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3022              int imm2_size = use_pcre16 ? 1 : 2;
3023    #else
3024              int imm2_size = IMM2_SIZE;
3025    #endif
3026              int length = (int)STRLEN(nametable + imm2_size);
3027              fprintf(outfile, "  ");
3028              PCHARSV(nametable, imm2_size, length, outfile);
3029              while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3030    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3031              fprintf(outfile, "%3d\n", use_pcre16?
3032                 (int)(((PCRE_SPTR16)nametable)[0])
3033                :((int)nametable[0] << 8) | (int)nametable[1]);
3034              nametable += nameentrysize * (use_pcre16 ? 2 : 1);
3035    #else
3036              fprintf(outfile, "%3d\n", GET2(nametable, 0));
3037    #ifdef SUPPORT_PCRE8
3038              nametable += nameentrysize;
3039    #else
3040              nametable += nameentrysize * 2;
3041    #endif
3042    #endif
3043              }
3044            }
3045    
3046          if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3047          if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3048    
3049          all_options = ((real_pcre *)re)->options;
3050          if (do_flip) all_options = swap_uint32(all_options);
3051    
3052        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
3053          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
3054            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
3055            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
3056            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
3057            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
3058              ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
3059            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
3060              ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
3061              ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
3062            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
3063            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3064            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3065            ((get_options & PCRE_UTF8) != 0)? " utf8" : "");            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3066              ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3067              ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3068              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3069              ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3070              ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3071    
3072          if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
3073    
3074          switch (get_options & PCRE_NEWLINE_BITS)
3075            {
3076            case PCRE_NEWLINE_CR:
3077            fprintf(outfile, "Forced newline sequence: CR\n");
3078            break;
3079    
3080        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)          case PCRE_NEWLINE_LF:
3081          fprintf(outfile, "Case state changes\n");          fprintf(outfile, "Forced newline sequence: LF\n");
3082            break;
3083    
3084            case PCRE_NEWLINE_CRLF:
3085            fprintf(outfile, "Forced newline sequence: CRLF\n");
3086            break;
3087    
3088            case PCRE_NEWLINE_ANYCRLF:
3089            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
3090            break;
3091    
3092            case PCRE_NEWLINE_ANY:
3093            fprintf(outfile, "Forced newline sequence: ANY\n");
3094            break;
3095    
3096            default:
3097            break;
3098            }
3099    
3100        if (first_char == -1)        if (first_char == -1)
3101          {          {
3102          fprintf(outfile, "First char at start or follows \\n\n");          fprintf(outfile, "First char at start or follows newline\n");
3103          }          }
3104        else if (first_char < 0)        else if (first_char < 0)
3105          {          {
# Line 805  while (!done) Line 3107  while (!done)
3107          }          }
3108        else        else
3109          {          {
3110          if (isprint(first_char))          const char *caseless =
3111            fprintf(outfile, "First char = \'%c\'\n", first_char);            ((((real_pcre *)re)->flags & PCRE_FCH_CASELESS) == 0)?
3112              "" : " (caseless)";
3113    
3114            if (PRINTOK(first_char))
3115              fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3116          else          else
3117            fprintf(outfile, "First char = %d\n", first_char);            {
3118              fprintf(outfile, "First char = ");
3119              pchar(first_char, outfile);
3120              fprintf(outfile, "%s\n", caseless);
3121              }
3122          }          }
3123    
3124        if (need_char < 0)        if (need_char < 0)
# Line 817  while (!done) Line 3127  while (!done)
3127          }          }
3128        else        else
3129          {          {
3130          if (isprint(need_char))          const char *caseless =
3131            fprintf(outfile, "Need char = \'%c\'\n", need_char);            ((((real_pcre *)re)->flags & PCRE_RCH_CASELESS) == 0)?
3132              "" : " (caseless)";
3133    
3134            if (PRINTOK(need_char))
3135              fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3136            else
3137              {
3138              fprintf(outfile, "Need char = ");
3139              pchar(need_char, outfile);
3140              fprintf(outfile, "%s\n", caseless);
3141              }
3142            }
3143    
3144          /* Don't output study size; at present it is in any case a fixed
3145          value, but it varies, depending on the computer architecture, and
3146          so messes up the test suite. (And with the /F option, it might be
3147          flipped.) If study was forced by an external -s, don't show this
3148          information unless -i or -d was also present. This means that, except
3149          when auto-callouts are involved, the output from runs with and without
3150          -s should be identical. */
3151    
3152          if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3153            {
3154            if (extra == NULL)
3155              fprintf(outfile, "Study returned NULL\n");
3156          else          else
3157            fprintf(outfile, "Need char = %d\n", need_char);            {
3158              pcre_uint8 *start_bits = NULL;
3159              int minlength;
3160    
3161              if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3162                fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3163    
3164              if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
3165                {
3166                if (start_bits == NULL)
3167                  fprintf(outfile, "No set of starting bytes\n");
3168                else
3169                  {
3170                  int i;
3171                  int c = 24;
3172                  fprintf(outfile, "Starting byte set: ");
3173                  for (i = 0; i < 256; i++)
3174                    {
3175                    if ((start_bits[i/8] & (1<<(i&7))) != 0)
3176                      {
3177                      if (c > 75)
3178                        {
3179                        fprintf(outfile, "\n  ");
3180                        c = 2;
3181                        }
3182                      if (PRINTOK(i) && i != ' ')
3183                        {
3184                        fprintf(outfile, "%c ", i);
3185                        c += 2;
3186                        }
3187                      else
3188                        {
3189                        fprintf(outfile, "\\x%02x ", i);
3190                        c += 5;
3191                        }
3192                      }
3193                    }
3194                  fprintf(outfile, "\n");
3195                  }
3196                }
3197              }
3198    
3199            /* Show this only if the JIT was set by /S, not by -s. */
3200    
3201            if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3202              {
3203              int jit;
3204              if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3205                {
3206                if (jit)
3207                  fprintf(outfile, "JIT study was successful\n");
3208                else
3209    #ifdef SUPPORT_JIT
3210                  fprintf(outfile, "JIT study was not successful\n");
3211    #else
3212                  fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3213    #endif
3214                }
3215              }
3216          }          }
3217        }        }
3218    
3219      /* If /S was present, study the regexp to generate additional info to      /* If the '>' option was present, we write out the regex to a file, and
3220      help with the matching. */      that is all. The first 8 bytes of the file are the regex length and then
3221        the study length, in big-endian order. */
3222    
3223      if (do_study)      if (to_file != NULL)
3224        {        {
3225        if (timeit)        FILE *f = fopen((char *)to_file, "wb");
3226          if (f == NULL)
3227          {          {
3228          register int i;          fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
         clock_t time_taken;  
         clock_t start_time = clock();  
         for (i = 0; i < LOOPREPEAT; i++)  
           extra = pcre_study(re, study_options, &error);  
         time_taken = clock() - start_time;  
         if (extra != NULL) free(extra);  
         fprintf(outfile, "  Study time %.3f milliseconds\n",  
           ((double)time_taken * 1000.0)/  
           ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));  
3229          }          }
3230          else
3231            {
3232            pcre_uint8 sbuf[8];
3233    
3234        extra = pcre_study(re, study_options, &error);          if (do_flip) regexflip(re, extra);
3235        if (error != NULL)          sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
3236          fprintf(outfile, "Failed to study: %s\n", error);          sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
3237        else if (extra == NULL)          sbuf[2] = (pcre_uint8)((true_size >>  8) & 255);
3238          fprintf(outfile, "Study returned NULL\n");          sbuf[3] = (pcre_uint8)((true_size) & 255);
3239            sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
3240            sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
3241            sbuf[6] = (pcre_uint8)((true_study_size >>  8) & 255);
3242            sbuf[7] = (pcre_uint8)((true_study_size) & 255);
3243    
3244        else if (do_showinfo)          if (fwrite(sbuf, 1, 8, f) < 8 ||
3245          {              fwrite(re, 1, true_size, f) < true_size)
3246          uschar *start_bits = NULL;            {
3247          new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);            fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
3248          if (start_bits == NULL)            }
           fprintf(outfile, "No starting character set\n");  
3249          else          else
3250            {            {
3251            int i;            fprintf(outfile, "Compiled pattern written to %s\n", to_file);
3252            int c = 24;  
3253            fprintf(outfile, "Starting character set: ");            /* If there is study data, write it. */
3254            for (i = 0; i < 256; i++)  
3255              if (extra != NULL)
3256              {              {
3257              if ((start_bits[i/8] & (1<<(i%8))) != 0)              if (fwrite(extra->study_data, 1, true_study_size, f) <
3258                    true_study_size)
3259                {                {
3260                if (c > 75)                fprintf(outfile, "Write error on %s: %s\n", to_file,
3261                  {                  strerror(errno));
                 fprintf(outfile, "\n  ");  
                 c = 2;  
                 }  
               if (isprint(i) && i != ' ')  
                 {  
                 fprintf(outfile, "%c ", i);  
                 c += 2;  
                 }  
               else  
                 {  
                 fprintf(outfile, "\\x%02x ", i);  
                 c += 5;  
                 }  
3262                }                }
3263                else fprintf(outfile, "Study data written to %s\n", to_file);
3264              }              }
           fprintf(outfile, "\n");  
3265            }            }
3266            fclose(f);
3267          }          }
3268    
3269          new_free(re);
3270          if (extra != NULL)
3271            {
3272            PCRE_FREE_STUDY(extra);
3273            }
3274          if (locale_set)
3275            {
3276            new_free((void *)tables);
3277            setlocale(LC_CTYPE, "C");
3278            locale_set = 0;
3279            }
3280          continue;  /* With next regex */
3281        }        }
3282      }      }        /* End of non-POSIX compile */
3283    
3284    /* Read data lines and test them */    /* Read data lines and test them */
3285    
3286    for (;;)    for (;;)
3287      {      {
3288      unsigned char *q;      pcre_uint8 *q;
3289      unsigned char *bptr = dbuffer;      pcre_uint8 *bptr;
3290      int *use_offsets = offsets;      int *use_offsets = offsets;
3291      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
3292        int callout_data = 0;
3293        int callout_data_set = 0;
3294      int count, c;      int count, c;
3295      int copystrings = 0;      int copystrings = 0;
3296        int find_match_limit = default_find_match_limit;
3297      int getstrings = 0;      int getstrings = 0;
3298      int getlist = 0;      int getlist = 0;
3299      int gmatched = 0;      int gmatched = 0;
3300      int start_offset = 0;      int start_offset = 0;
3301        int start_offset_sign = 1;
3302      int g_notempty = 0;      int g_notempty = 0;
3303        int use_dfa = 0;
3304    
3305        *copynames = 0;
3306        *getnames = 0;
3307    
3308        cn16ptr = copynames;
3309        gn16ptr = getnames;
3310        cn8ptr = copynames8;
3311        gn8ptr = getnames8;
3312    
3313        SET_PCRE_CALLOUT(callout);
3314        first_callout = 1;
3315        last_callout_mark = NULL;
3316        callout_extra = 0;
3317        callout_count = 0;
3318        callout_fail_count = 999999;
3319        callout_fail_id = -1;
3320        show_malloc = 0;
3321      options = 0;      options = 0;
3322    
3323      if (infile == stdin) printf("data> ");      if (extra != NULL) extra->flags &=
3324      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
3325    
3326        len = 0;
3327        for (;;)
3328        {        {
3329        done = 1;        if (extend_inputline(infile, buffer + len, "data> ") == NULL)
3330        goto CONTINUE;          {
3331            if (len > 0)    /* Reached EOF without hitting a newline */
3332              {
3333              fprintf(outfile, "\n");
3334              break;
3335              }
3336            done = 1;
3337            goto CONTINUE;
3338            }
3339          if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3340          len = (int)strlen((char *)buffer);
3341          if (buffer[len-1] == '\n') break;
3342        }        }
     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);  
3343    
     len = (int)strlen((char *)buffer);  
3344      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
3345      buffer[len] = 0;      buffer[len] = 0;
3346      if (len == 0) break;      if (len == 0) break;
# Line 921  while (!done) Line 3348  while (!done)
3348      p = buffer;      p = buffer;
3349      while (isspace(*p)) p++;      while (isspace(*p)) p++;
3350    
3351      q = dbuffer;      bptr = q = dbuffer;
3352      while ((c = *p++) != 0)      while ((c = *p++) != 0)
3353        {        {
3354        int i = 0;        int i = 0;
3355        int n = 0;        int n = 0;
3356        if (c == '\\') switch ((c = *p++))  
3357          /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
3358          In non-UTF mode, allow the value of the byte to fall through to later,
3359          where values greater than 127 are turned into UTF-8 when running in
3360          16-bit mode. */
3361    
3362          if (c != '\\')
3363            {
3364            if (use_utf)
3365              {
3366              *q++ = c;
3367              continue;
3368              }
3369            }
3370    
3371          /* Handle backslash escapes */
3372    
3373          else switch ((c = *p++))
3374          {          {
3375          case 'a': c =    7; break;          case 'a': c =    7; break;
3376          case 'b': c = '\b'; break;          case 'b': c = '\b'; break;
# Line 945  while (!done) Line 3389  while (!done)
3389          break;          break;
3390    
3391          case 'x':          case 'x':
   
         /* Handle \x{..} specially - new Perl thing for utf8 */  
   
3392          if (*p == '{')          if (*p == '{')
3393            {            {
3394            unsigned char *pt = p;            pcre_uint8 *pt = p;
3395            c = 0;            c = 0;
3396            while (isxdigit(*(++pt)))  
3397              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');            /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
3398              when isxdigit() is a macro that refers to its argument more than
3399              once. This is banned by the C Standard, but apparently happens in at
3400              least one MacOS environment. */
3401    
3402              for (pt++; isxdigit(*pt); pt++)
3403                c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
3404            if (*pt == '}')            if (*pt == '}')
3405              {              {
             unsigned char buffer[8];  
             int ii, utn;  
             utn = ord2utf8(c, buffer);  
             for (ii = 0; ii < utn - 1; ii++) *q++ = buffer[ii];  
             c = buffer[ii];   /* Last byte */  
3406              p = pt + 1;              p = pt + 1;
3407              break;              break;
3408              }              }
3409            /* Not correct form; fall through */            /* Not correct form for \x{...}; fall through */
3410            }            }
3411    
3412          /* Ordinary \x */          /* \x without {} always defines just one byte in 8-bit mode. This
3413            allows UTF-8 characters to be constructed byte by byte, and also allows
3414            invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
3415            Otherwise, pass it down to later code so that it can be turned into
3416            UTF-8 when running in 16-bit mode. */
3417    
3418          c = 0;          c = 0;
3419          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
3420            {            {
3421            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
3422            p++;            p++;
3423            }            }
3424            if (use_utf)
3425              {
3426              *q++ = c;
3427              continue;
3428              }
3429          break;          break;
3430    
3431          case 0:   /* Allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
3432          p--;          p--;
3433          continue;          continue;
3434    
3435            case '>':
3436            if (*p == '-')
3437              {
3438              start_offset_sign = -1;
3439              p++;
3440              }
3441            while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
3442            start_offset *= start_offset_sign;
3443            continue;
3444    
3445          case 'A':  /* Option setting */          case 'A':  /* Option setting */
3446          options |= PCRE_ANCHORED;          options |= PCRE_ANCHORED;
3447          continue;          continue;
# Line 990  while (!done) Line 3451  while (!done)
3451          continue;          continue;
3452    
3453          case 'C':          case 'C':
3454          while(isdigit(*p)) n = n * 10 + *p++ - '0';          if (isdigit(*p))    /* Set copy string */
3455          copystrings |= 1 << n;            {
3456              while(isdigit(*p)) n = n * 10 + *p++ - '0';
3457              copystrings |= 1 << n;
3458              }
3459            else if (isalnum(*p))
3460              {
3461              READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, re);
3462              }
3463            else if (*p == '+')
3464              {
3465              callout_extra = 1;
3466              p++;
3467              }
3468            else if (*p == '-')
3469              {
3470              SET_PCRE_CALLOUT(NULL);
3471              p++;
3472              }
3473            else if (*p == '!')
3474              {
3475              callout_fail_id = 0;
3476              p++;
3477              while(isdigit(*p))
3478                callout_fail_id = callout_fail_id * 10 + *p++ - '0';
3479              callout_fail_count = 0;
3480              if (*p == '!')
3481                {
3482                p++;
3483                while(isdigit(*p))
3484                  callout_fail_count = callout_fail_count * 10 + *p++ - '0';
3485                }
3486              }
3487            else if (*p == '*')
3488              {
3489              int sign = 1;
3490              callout_data = 0;
3491              if (*(++p) == '-') { sign = -1; p++; }
3492              while(isdigit(*p))
3493                callout_data = callout_data * 10 + *p++ - '0';
3494              callout_data *= sign;
3495              callout_data_set = 1;
3496              }
3497            continue;
3498    
3499    #if !defined NODFA
3500            case 'D':
3501    #if !defined NOPOSIX
3502            if (posix || do_posix)
3503              printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
3504            else
3505    #endif
3506              use_dfa = 1;