/[pcre2]/code/trunk/src/pcre2test.c
ViewVC logotype

Contents of /code/trunk/src/pcre2test.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 692 - (show annotations)
Tue Mar 21 16:18:54 2017 UTC (2 years, 3 months ago) by ph10
File MIME type: text/plain
File size: 258574 byte(s)
Close serialization file in pcre2test after any error.
1 /*************************************************
2 * PCRE2 testing program *
3 *************************************************/
4
5 /* PCRE2 is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language. In 2014
7 the API was completely revised and '2' was added to the name, because the old
8 API, which had lasted for 16 years, could not accommodate new requirements. At
9 the same time, this testing program was re-designed because its original
10 hacked-up (non-) design had also run out of steam.
11
12 Written by Philip Hazel
13 Original code Copyright (c) 1997-2012 University of Cambridge
14 Rewritten code Copyright (c) 2016-2017 University of Cambridge
15
16 -----------------------------------------------------------------------------
17 Redistribution and use in source and binary forms, with or without
18 modification, are permitted provided that the following conditions are met:
19
20 * Redistributions of source code must retain the above copyright notice,
21 this list of conditions and the following disclaimer.
22
23 * Redistributions in binary form must reproduce the above copyright
24 notice, this list of conditions and the following disclaimer in the
25 documentation and/or other materials provided with the distribution.
26
27 * Neither the name of the University of Cambridge nor the names of its
28 contributors may be used to endorse or promote products derived from
29 this software without specific prior written permission.
30
31 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
32 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
35 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
36 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
37 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
38 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
39 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
41 POSSIBILITY OF SUCH DAMAGE.
42 -----------------------------------------------------------------------------
43 */
44
45
46 /* This program supports testing of the 8-bit, 16-bit, and 32-bit PCRE2
47 libraries in a single program, though its input and output are always 8-bit.
48 It is different from modules such as pcre2_compile.c in the library itself,
49 which are compiled separately for each code unit width. If two widths are
50 enabled, for example, pcre2_compile.c is compiled twice. In contrast,
51 pcre2test.c is compiled only once, and linked with all the enabled libraries.
52 Therefore, it must not make use of any of the macros from pcre2.h or
53 pcre2_internal.h that depend on PCRE2_CODE_UNIT_WIDTH. It does, however, make
54 use of SUPPORT_PCRE2_8, SUPPORT_PCRE2_16, and SUPPORT_PCRE2_32, to ensure that
55 it references only the enabled library functions. */
56
57 #ifdef HAVE_CONFIG_H
58 #include "config.h"
59 #endif
60
61 #include <ctype.h>
62 #include <stdio.h>
63 #include <string.h>
64 #include <stdlib.h>
65 #include <time.h>
66 #include <locale.h>
67 #include <errno.h>
68
69 #if defined NATIVE_ZOS
70 #include "pcrzoscs.h"
71 /* That header is not included in the main PCRE2 distribution because other
72 apparatus is needed to compile pcre2test for z/OS. The header can be found in
73 the special z/OS distribution, which is available from www.zaconsultants.net or
74 from www.cbttape.org. */
75 #endif
76
77 #ifdef HAVE_UNISTD_H
78 #include <unistd.h>
79 #endif
80
81 /* Both libreadline and libedit are optionally supported. The user-supplied
82 original patch uses readline/readline.h for libedit, but in at least one system
83 it is installed as editline/readline.h, so the configuration code now looks for
84 that first, falling back to readline/readline.h. */
85
86 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
87 #if defined(SUPPORT_LIBREADLINE)
88 #include <readline/readline.h>
89 #include <readline/history.h>
90 #else
91 #if defined(HAVE_EDITLINE_READLINE_H)
92 #include <editline/readline.h>
93 #else
94 #include <readline/readline.h>
95 #endif
96 #endif
97 #endif
98
99 /* Put the test for interactive input into a macro so that it can be changed if
100 required for different environments. */
101
102 #define INTERACTIVE(f) isatty(fileno(f))
103
104
105 /* ---------------------- System-specific definitions ---------------------- */
106
107 /* A number of things vary for Windows builds. Originally, pcretest opened its
108 input and output without "b"; then I was told that "b" was needed in some
109 environments, so it was added for release 5.0 to both the input and output. (It
110 makes no difference on Unix-like systems.) Later I was told that it is wrong
111 for the input on Windows. I've now abstracted the modes into macros that are
112 set here, to make it easier to fiddle with them, and removed "b" from the input
113 mode under Windows. The BINARY versions are used when saving/restoring compiled
114 patterns. */
115
116 #if defined(_WIN32) || defined(WIN32)
117 #include <io.h> /* For _setmode() */
118 #include <fcntl.h> /* For _O_BINARY */
119 #define INPUT_MODE "r"
120 #define OUTPUT_MODE "wb"
121 #define BINARY_INPUT_MODE "rb"
122 #define BINARY_OUTPUT_MODE "wb"
123
124 #ifndef isatty
125 #define isatty _isatty /* This is what Windows calls them, I'm told, */
126 #endif /* though in some environments they seem to */
127 /* be already defined, hence the #ifndefs. */
128 #ifndef fileno
129 #define fileno _fileno
130 #endif
131
132 /* A user sent this fix for Borland Builder 5 under Windows. */
133
134 #ifdef __BORLANDC__
135 #define _setmode(handle, mode) setmode(handle, mode)
136 #endif
137
138 /* Not Windows */
139
140 #else
141 #include <sys/time.h> /* These two includes are needed */
142 #include <sys/resource.h> /* for setrlimit(). */
143 #if defined NATIVE_ZOS /* z/OS uses non-binary I/O */
144 #define INPUT_MODE "r"
145 #define OUTPUT_MODE "w"
146 #define BINARY_INPUT_MODE "rb"
147 #define BINARY_OUTPUT_MODE "wb"
148 #else
149 #define INPUT_MODE "rb"
150 #define OUTPUT_MODE "wb"
151 #define BINARY_INPUT_MODE "rb"
152 #define BINARY_OUTPUT_MODE "wb"
153 #endif
154 #endif
155
156 #ifdef __VMS
157 #include <ssdef.h>
158 void vms_setsymbol( char *, char *, int );
159 #endif
160
161 /* VC doesn't support "%td". */
162 #ifdef _MSC_VER
163 #define PTR_SPEC "%lu"
164 #else
165 #define PTR_SPEC "%td"
166 #endif
167
168 /* ------------------End of system-specific definitions -------------------- */
169
170 /* Glueing macros that are used in several places below. */
171
172 #define glue(a,b) a##b
173 #define G(a,b) glue(a,b)
174
175 /* Miscellaneous parameters and manifests */
176
177 #ifndef CLOCKS_PER_SEC
178 #ifdef CLK_TCK
179 #define CLOCKS_PER_SEC CLK_TCK
180 #else
181 #define CLOCKS_PER_SEC 100
182 #endif
183 #endif
184
185 #define CFORE_UNSET UINT32_MAX /* Unset value for cfail/cerror fields */
186 #define DFA_WS_DIMENSION 1000 /* Size of DFA workspace */
187 #define DEFAULT_OVECCOUNT 15 /* Default ovector count */
188 #define JUNK_OFFSET 0xdeadbeef /* For initializing ovector */
189 #define LOCALESIZE 32 /* Size of locale name */
190 #define LOOPREPEAT 500000 /* Default loop count for timing */
191 #define PATSTACKSIZE 20 /* Pattern stack for save/restore testing */
192 #define REPLACE_MODSIZE 100 /* Field for reading 8-bit replacement */
193 #define VERSION_SIZE 64 /* Size of buffer for the version strings */
194
195 /* Make sure the buffer into which replacement strings are copied is big enough
196 to hold them as 32-bit code units. */
197
198 #define REPLACE_BUFFSIZE 1024 /* This is a byte value */
199
200 /* Execution modes */
201
202 #define PCRE8_MODE 8
203 #define PCRE16_MODE 16
204 #define PCRE32_MODE 32
205
206 /* Processing returns */
207
208 enum { PR_OK, PR_SKIP, PR_ABEND };
209
210 /* The macro PRINTABLE determines whether to print an output character as-is or
211 as a hex value when showing compiled patterns. is We use it in cases when the
212 locale has not been explicitly changed, so as to get consistent output from
213 systems that differ in their output from isprint() even in the "C" locale. */
214
215 #ifdef EBCDIC
216 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
217 #else
218 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
219 #endif
220
221 #define PRINTOK(c) ((use_tables != NULL && c < 256)? isprint(c) : PRINTABLE(c))
222
223 /* We have to include some of the library source files because we need
224 to use some of the macros, internal structure definitions, and other internal
225 values - pcre2test has "inside information" compared to an application program
226 that strictly follows the PCRE2 API.
227
228 Before including pcre2_internal.h we define PRIV so that it does not get
229 defined therein. This ensures that PRIV names in the included files do not
230 clash with those in the libraries. Also, although pcre2_internal.h does itself
231 include pcre2.h, we explicitly include it beforehand, along with pcre2posix.h,
232 so that the PCRE2_EXP_xxx macros get set appropriately for an application, not
233 for building the library. */
234
235 #define PRIV(name) name
236 #define PCRE2_CODE_UNIT_WIDTH 0
237 #include "pcre2.h"
238 #include "pcre2posix.h"
239 #include "pcre2_internal.h"
240
241 /* We need access to some of the data tables that PCRE2 uses. Defining
242 PCRE2_PCRETEST makes some minor changes in the files. The previous definition
243 of PRIV avoids name clashes. */
244
245 #define PCRE2_PCRE2TEST
246 #include "pcre2_tables.c"
247 #include "pcre2_ucd.c"
248
249 /* 32-bit integer values in the input are read by strtoul() or strtol(). The
250 check needed for overflow depends on whether long ints are in fact longer than
251 ints. They are defined not to be shorter. */
252
253 #if ULONG_MAX > UINT32_MAX
254 #define U32OVERFLOW(x) (x > UINT32_MAX)
255 #else
256 #define U32OVERFLOW(x) (x == UINT32_MAX)
257 #endif
258
259 #if LONG_MAX > INT32_MAX
260 #define S32OVERFLOW(x) (x > INT32_MAX || x < INT32_MIN)
261 #else
262 #define S32OVERFLOW(x) (x == INT32_MAX || x == INT32_MIN)
263 #endif
264
265 /* When PCRE2_CODE_UNIT_WIDTH is zero, pcre2_internal.h does not include
266 pcre2_intmodedep.h, which is where mode-dependent macros and structures are
267 defined. We can now include it for each supported code unit width. Because
268 PCRE2_CODE_UNIT_WIDTH was defined as zero before including pcre2.h, it will
269 have left PCRE2_SUFFIX defined as a no-op. We must re-define it appropriately
270 while including these files, and then restore it to a no-op. Because LINK_SIZE
271 may be changed in 16-bit mode and forced to 1 in 32-bit mode, the order of
272 these inclusions should not be changed. */
273
274 #undef PCRE2_SUFFIX
275 #undef PCRE2_CODE_UNIT_WIDTH
276
277 #ifdef SUPPORT_PCRE2_8
278 #define PCRE2_CODE_UNIT_WIDTH 8
279 #define PCRE2_SUFFIX(a) G(a,8)
280 #include "pcre2_intmodedep.h"
281 #include "pcre2_printint.c"
282 #undef PCRE2_CODE_UNIT_WIDTH
283 #undef PCRE2_SUFFIX
284 #endif /* SUPPORT_PCRE2_8 */
285
286 #ifdef SUPPORT_PCRE2_16
287 #define PCRE2_CODE_UNIT_WIDTH 16
288 #define PCRE2_SUFFIX(a) G(a,16)
289 #include "pcre2_intmodedep.h"
290 #include "pcre2_printint.c"
291 #undef PCRE2_CODE_UNIT_WIDTH
292 #undef PCRE2_SUFFIX
293 #endif /* SUPPORT_PCRE2_16 */
294
295 #ifdef SUPPORT_PCRE2_32
296 #define PCRE2_CODE_UNIT_WIDTH 32
297 #define PCRE2_SUFFIX(a) G(a,32)
298 #include "pcre2_intmodedep.h"
299 #include "pcre2_printint.c"
300 #undef PCRE2_CODE_UNIT_WIDTH
301 #undef PCRE2_SUFFIX
302 #endif /* SUPPORT_PCRE2_32 */
303
304 #define PCRE2_SUFFIX(a) a
305
306 /* We need to be able to check input text for UTF-8 validity, whatever code
307 widths are actually available, because the input to pcre2test is always in
308 8-bit code units. So we include the UTF validity checking function for 8-bit
309 code units. */
310
311 extern int valid_utf(PCRE2_SPTR8, PCRE2_SIZE, PCRE2_SIZE *);
312
313 #define PCRE2_CODE_UNIT_WIDTH 8
314 #undef PCRE2_SPTR
315 #define PCRE2_SPTR PCRE2_SPTR8
316 #include "pcre2_valid_utf.c"
317 #undef PCRE2_CODE_UNIT_WIDTH
318 #undef PCRE2_SPTR
319
320 /* If we have 8-bit support, default to it; if there is also 16-or 32-bit
321 support, it can be selected by a command-line option. If there is no 8-bit
322 support, there must be 16- or 32-bit support, so default to one of them. The
323 config function, JIT stack, contexts, and version string are the same in all
324 modes, so use the form of the first that is available. */
325
326 #if defined SUPPORT_PCRE2_8
327 #define DEFAULT_TEST_MODE PCRE8_MODE
328 #define VERSION_TYPE PCRE2_UCHAR8
329 #define PCRE2_CONFIG pcre2_config_8
330 #define PCRE2_JIT_STACK pcre2_jit_stack_8
331 #define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_8
332 #define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_8
333 #define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_8
334 #define VERSION_TYPE PCRE2_UCHAR8
335
336 #elif defined SUPPORT_PCRE2_16
337 #define DEFAULT_TEST_MODE PCRE16_MODE
338 #define VERSION_TYPE PCRE2_UCHAR16
339 #define PCRE2_CONFIG pcre2_config_16
340 #define PCRE2_JIT_STACK pcre2_jit_stack_16
341 #define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_16
342 #define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_16
343 #define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_16
344
345 #elif defined SUPPORT_PCRE2_32
346 #define DEFAULT_TEST_MODE PCRE32_MODE
347 #define VERSION_TYPE PCRE2_UCHAR32
348 #define PCRE2_CONFIG pcre2_config_32
349 #define PCRE2_JIT_STACK pcre2_jit_stack_32
350 #define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_32
351 #define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_32
352 #define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_32
353 #endif
354
355 /* ------------- Structure and table for handling #-commands ------------- */
356
357 typedef struct cmdstruct {
358 const char *name;
359 int value;
360 } cmdstruct;
361
362 enum { CMD_FORBID_UTF, CMD_LOAD, CMD_NEWLINE_DEFAULT, CMD_PATTERN,
363 CMD_PERLTEST, CMD_POP, CMD_POPCOPY, CMD_SAVE, CMD_SUBJECT, CMD_UNKNOWN };
364
365 static cmdstruct cmdlist[] = {
366 { "forbid_utf", CMD_FORBID_UTF },
367 { "load", CMD_LOAD },
368 { "newline_default", CMD_NEWLINE_DEFAULT },
369 { "pattern", CMD_PATTERN },
370 { "perltest", CMD_PERLTEST },
371 { "pop", CMD_POP },
372 { "popcopy", CMD_POPCOPY },
373 { "save", CMD_SAVE },
374 { "subject", CMD_SUBJECT }};
375
376 #define cmdlistcount sizeof(cmdlist)/sizeof(cmdstruct)
377
378 /* ------------- Structures and tables for handling modifiers -------------- */
379
380 /* Table of names for newline types. Must be kept in step with the definitions
381 of PCRE2_NEWLINE_xx in pcre2.h. */
382
383 static const char *newlines[] = {
384 "DEFAULT", "CR", "LF", "CRLF", "ANY", "ANYCRLF" };
385
386 /* Modifier types and applicability */
387
388 enum { MOD_CTC, /* Applies to a compile context */
389 MOD_CTM, /* Applies to a match context */
390 MOD_PAT, /* Applies to a pattern */
391 MOD_PATP, /* Ditto, OK for Perl test */
392 MOD_DAT, /* Applies to a data line */
393 MOD_PD, /* Applies to a pattern or a data line */
394 MOD_PDP, /* As MOD_PD, OK for Perl test */
395 MOD_PND, /* As MOD_PD, but not for a default pattern */
396 MOD_PNDP, /* As MOD_PND, OK for Perl test */
397 MOD_CTL, /* Is a control bit */
398 MOD_BSR, /* Is a BSR value */
399 MOD_IN2, /* Is one or two unsigned integers */
400 MOD_INS, /* Is a signed integer */
401 MOD_INT, /* Is an unsigned integer */
402 MOD_IND, /* Is an unsigned integer, but no value => default */
403 MOD_NL, /* Is a newline value */
404 MOD_NN, /* Is a number or a name; more than one may occur */
405 MOD_OPT, /* Is an option bit */
406 MOD_SIZ, /* Is a PCRE2_SIZE value */
407 MOD_STR }; /* Is a string */
408
409 /* Control bits. Some apply to compiling, some to matching, but some can be set
410 either on a pattern or a data line, so they must all be distinct. There are now
411 so many of them that they are split into two fields. */
412
413 #define CTL_AFTERTEXT 0x00000001u
414 #define CTL_ALLAFTERTEXT 0x00000002u
415 #define CTL_ALLCAPTURES 0x00000004u
416 #define CTL_ALLUSEDTEXT 0x00000008u
417 #define CTL_ALTGLOBAL 0x00000010u
418 #define CTL_BINCODE 0x00000020u
419 #define CTL_CALLOUT_CAPTURE 0x00000040u
420 #define CTL_CALLOUT_INFO 0x00000080u
421 #define CTL_CALLOUT_NONE 0x00000100u
422 #define CTL_DFA 0x00000200u
423 #define CTL_EXPAND 0x00000400u
424 #define CTL_FINDLIMITS 0x00000800u
425 #define CTL_FRAMESIZE 0x00001000u
426 #define CTL_FULLBINCODE 0x00002000u
427 #define CTL_GETALL 0x00004000u
428 #define CTL_GLOBAL 0x00008000u
429 #define CTL_HEXPAT 0x00010000u /* Same word as USE_LENGTH */
430 #define CTL_INFO 0x00020000u
431 #define CTL_JITFAST 0x00040000u
432 #define CTL_JITVERIFY 0x00080000u
433 #define CTL_MARK 0x00100000u
434 #define CTL_MEMORY 0x00200000u
435 #define CTL_NULLCONTEXT 0x00400000u
436 #define CTL_POSIX 0x00800000u
437 #define CTL_POSIX_NOSUB 0x01000000u
438 #define CTL_PUSH 0x02000000u /* These three must be */
439 #define CTL_PUSHCOPY 0x04000000u /* all in the same */
440 #define CTL_PUSHTABLESCOPY 0x08000000u /* word. */
441 #define CTL_STARTCHAR 0x10000000u
442 #define CTL_USE_LENGTH 0x20000000u /* Same word as HEXPAT */
443 #define CTL_UTF8_INPUT 0x40000000u
444 #define CTL_ZERO_TERMINATE 0x80000000u
445
446 /* Second control word */
447
448 #define CTL2_SUBSTITUTE_EXTENDED 0x00000001u
449 #define CTL2_SUBSTITUTE_OVERFLOW_LENGTH 0x00000002u
450 #define CTL2_SUBSTITUTE_UNKNOWN_UNSET 0x00000004u
451 #define CTL2_SUBSTITUTE_UNSET_EMPTY 0x00000008u
452
453 #define CTL_NL_SET 0x40000000u /* Informational */
454 #define CTL_BSR_SET 0x80000000u /* Informational */
455
456 /* Combinations */
457
458 #define CTL_DEBUG (CTL_FULLBINCODE|CTL_INFO) /* For setting */
459 #define CTL_ANYINFO (CTL_DEBUG|CTL_BINCODE|CTL_CALLOUT_INFO)
460 #define CTL_ANYGLOB (CTL_ALTGLOBAL|CTL_GLOBAL)
461
462 /* These are all the controls that may be set either on a pattern or on a
463 data line. */
464
465 #define CTL_ALLPD (CTL_AFTERTEXT|\
466 CTL_ALLAFTERTEXT|\
467 CTL_ALLCAPTURES|\
468 CTL_ALLUSEDTEXT|\
469 CTL_ALTGLOBAL|\
470 CTL_GLOBAL|\
471 CTL_MARK|\
472 CTL_MEMORY|\
473 CTL_STARTCHAR|\
474 CTL_UTF8_INPUT)
475
476 #define CTL2_ALLPD (CTL2_SUBSTITUTE_EXTENDED|\
477 CTL2_SUBSTITUTE_OVERFLOW_LENGTH|\
478 CTL2_SUBSTITUTE_UNKNOWN_UNSET|\
479 CTL2_SUBSTITUTE_UNSET_EMPTY)
480
481 /* Structures for holding modifier information for patterns and subject strings
482 (data). Fields containing modifiers that can be set either for a pattern or a
483 subject must be at the start and in the same order in both cases so that the
484 same offset in the big table below works for both. */
485
486 typedef struct patctl { /* Structure for pattern modifiers. */
487 uint32_t options; /* Must be in same position as datctl */
488 uint32_t control; /* Must be in same position as datctl */
489 uint32_t control2; /* Must be in same position as datctl */
490 uint8_t replacement[REPLACE_MODSIZE]; /* So must this */
491 uint32_t jit;
492 uint32_t stackguard_test;
493 uint32_t tables_id;
494 uint32_t regerror_buffsize;
495 uint8_t locale[LOCALESIZE];
496 } patctl;
497
498 #define MAXCPYGET 10
499 #define LENCPYGET 64
500
501 typedef struct datctl { /* Structure for data line modifiers. */
502 uint32_t options; /* Must be in same position as patctl */
503 uint32_t control; /* Must be in same position as patctl */
504 uint32_t control2; /* Must be in same position as patctl */
505 uint8_t replacement[REPLACE_MODSIZE]; /* So must this */
506 uint32_t cerror[2];
507 uint32_t cfail[2];
508 int32_t callout_data;
509 int32_t copy_numbers[MAXCPYGET];
510 int32_t get_numbers[MAXCPYGET];
511 uint32_t jitstack;
512 uint32_t oveccount;
513 uint32_t offset;
514 uint8_t copy_names[LENCPYGET];
515 uint8_t get_names[LENCPYGET];
516 } datctl;
517
518 /* Ids for which context to modify. */
519
520 enum { CTX_PAT, /* Active pattern context */
521 CTX_POPPAT, /* Ditto, for a popped pattern */
522 CTX_DEFPAT, /* Default pattern context */
523 CTX_DAT, /* Active data (match) context */
524 CTX_DEFDAT }; /* Default data (match) context */
525
526 /* Macros to simplify the big table below. */
527
528 #define CO(name) offsetof(PCRE2_REAL_COMPILE_CONTEXT, name)
529 #define MO(name) offsetof(PCRE2_REAL_MATCH_CONTEXT, name)
530 #define PO(name) offsetof(patctl, name)
531 #define PD(name) PO(name)
532 #define DO(name) offsetof(datctl, name)
533
534 /* Table of all long-form modifiers. Must be in collating sequence of modifier
535 name because it is searched by binary chop. */
536
537 typedef struct modstruct {
538 const char *name;
539 uint16_t which;
540 uint16_t type;
541 uint32_t value;
542 PCRE2_SIZE offset;
543 } modstruct;
544
545 static modstruct modlist[] = {
546 { "aftertext", MOD_PNDP, MOD_CTL, CTL_AFTERTEXT, PO(control) },
547 { "allaftertext", MOD_PNDP, MOD_CTL, CTL_ALLAFTERTEXT, PO(control) },
548 { "allcaptures", MOD_PND, MOD_CTL, CTL_ALLCAPTURES, PO(control) },
549 { "allow_empty_class", MOD_PAT, MOD_OPT, PCRE2_ALLOW_EMPTY_CLASS, PO(options) },
550 { "allusedtext", MOD_PNDP, MOD_CTL, CTL_ALLUSEDTEXT, PO(control) },
551 { "alt_bsux", MOD_PAT, MOD_OPT, PCRE2_ALT_BSUX, PO(options) },
552 { "alt_circumflex", MOD_PAT, MOD_OPT, PCRE2_ALT_CIRCUMFLEX, PO(options) },
553 { "alt_verbnames", MOD_PAT, MOD_OPT, PCRE2_ALT_VERBNAMES, PO(options) },
554 { "altglobal", MOD_PND, MOD_CTL, CTL_ALTGLOBAL, PO(control) },
555 { "anchored", MOD_PD, MOD_OPT, PCRE2_ANCHORED, PD(options) },
556 { "auto_callout", MOD_PAT, MOD_OPT, PCRE2_AUTO_CALLOUT, PO(options) },
557 { "bincode", MOD_PAT, MOD_CTL, CTL_BINCODE, PO(control) },
558 { "bsr", MOD_CTC, MOD_BSR, 0, CO(bsr_convention) },
559 { "callout_capture", MOD_DAT, MOD_CTL, CTL_CALLOUT_CAPTURE, DO(control) },
560 { "callout_data", MOD_DAT, MOD_INS, 0, DO(callout_data) },
561 { "callout_error", MOD_DAT, MOD_IN2, 0, DO(cerror) },
562 { "callout_fail", MOD_DAT, MOD_IN2, 0, DO(cfail) },
563 { "callout_info", MOD_PAT, MOD_CTL, CTL_CALLOUT_INFO, PO(control) },
564 { "callout_none", MOD_DAT, MOD_CTL, CTL_CALLOUT_NONE, DO(control) },
565 { "caseless", MOD_PATP, MOD_OPT, PCRE2_CASELESS, PO(options) },
566 { "copy", MOD_DAT, MOD_NN, DO(copy_numbers), DO(copy_names) },
567 { "debug", MOD_PAT, MOD_CTL, CTL_DEBUG, PO(control) },
568 { "depth_limit", MOD_CTM, MOD_INT, 0, MO(depth_limit) },
569 { "dfa", MOD_DAT, MOD_CTL, CTL_DFA, DO(control) },
570 { "dfa_restart", MOD_DAT, MOD_OPT, PCRE2_DFA_RESTART, DO(options) },
571 { "dfa_shortest", MOD_DAT, MOD_OPT, PCRE2_DFA_SHORTEST, DO(options) },
572 { "dollar_endonly", MOD_PAT, MOD_OPT, PCRE2_DOLLAR_ENDONLY, PO(options) },
573 { "dotall", MOD_PATP, MOD_OPT, PCRE2_DOTALL, PO(options) },
574 { "dupnames", MOD_PATP, MOD_OPT, PCRE2_DUPNAMES, PO(options) },
575 { "expand", MOD_PAT, MOD_CTL, CTL_EXPAND, PO(control) },
576 { "extended", MOD_PATP, MOD_OPT, PCRE2_EXTENDED, PO(options) },
577 { "find_limits", MOD_DAT, MOD_CTL, CTL_FINDLIMITS, DO(control) },
578 { "firstline", MOD_PAT, MOD_OPT, PCRE2_FIRSTLINE, PO(options) },
579 { "framesize", MOD_PAT, MOD_CTL, CTL_FRAMESIZE, PO(control) },
580 { "fullbincode", MOD_PAT, MOD_CTL, CTL_FULLBINCODE, PO(control) },
581 { "get", MOD_DAT, MOD_NN, DO(get_numbers), DO(get_names) },
582 { "getall", MOD_DAT, MOD_CTL, CTL_GETALL, DO(control) },
583 { "global", MOD_PNDP, MOD_CTL, CTL_GLOBAL, PO(control) },
584 { "hex", MOD_PAT, MOD_CTL, CTL_HEXPAT, PO(control) },
585 { "info", MOD_PAT, MOD_CTL, CTL_INFO, PO(control) },
586 { "jit", MOD_PAT, MOD_IND, 7, PO(jit) },
587 { "jitfast", MOD_PAT, MOD_CTL, CTL_JITFAST, PO(control) },
588 { "jitstack", MOD_DAT, MOD_INT, 0, DO(jitstack) },
589 { "jitverify", MOD_PAT, MOD_CTL, CTL_JITVERIFY, PO(control) },
590 { "locale", MOD_PAT, MOD_STR, LOCALESIZE, PO(locale) },
591 { "mark", MOD_PNDP, MOD_CTL, CTL_MARK, PO(control) },
592 { "match_limit", MOD_CTM, MOD_INT, 0, MO(match_limit) },
593 { "match_unset_backref", MOD_PAT, MOD_OPT, PCRE2_MATCH_UNSET_BACKREF, PO(options) },
594 { "max_pattern_length", MOD_CTC, MOD_SIZ, 0, CO(max_pattern_length) },
595 { "memory", MOD_PD, MOD_CTL, CTL_MEMORY, PD(control) },
596 { "multiline", MOD_PATP, MOD_OPT, PCRE2_MULTILINE, PO(options) },
597 { "never_backslash_c", MOD_PAT, MOD_OPT, PCRE2_NEVER_BACKSLASH_C, PO(options) },
598 { "never_ucp", MOD_PAT, MOD_OPT, PCRE2_NEVER_UCP, PO(options) },
599 { "never_utf", MOD_PAT, MOD_OPT, PCRE2_NEVER_UTF, PO(options) },
600 { "newline", MOD_CTC, MOD_NL, 0, CO(newline_convention) },
601 { "no_auto_capture", MOD_PAT, MOD_OPT, PCRE2_NO_AUTO_CAPTURE, PO(options) },
602 { "no_auto_possess", MOD_PATP, MOD_OPT, PCRE2_NO_AUTO_POSSESS, PO(options) },
603 { "no_dotstar_anchor", MOD_PAT, MOD_OPT, PCRE2_NO_DOTSTAR_ANCHOR, PO(options) },
604 { "no_jit", MOD_DAT, MOD_OPT, PCRE2_NO_JIT, DO(options) },
605 { "no_start_optimize", MOD_PATP, MOD_OPT, PCRE2_NO_START_OPTIMIZE, PO(options) },
606 { "no_utf_check", MOD_PD, MOD_OPT, PCRE2_NO_UTF_CHECK, PD(options) },
607 { "notbol", MOD_DAT, MOD_OPT, PCRE2_NOTBOL, DO(options) },
608 { "notempty", MOD_DAT, MOD_OPT, PCRE2_NOTEMPTY, DO(options) },
609 { "notempty_atstart", MOD_DAT, MOD_OPT, PCRE2_NOTEMPTY_ATSTART, DO(options) },
610 { "noteol", MOD_DAT, MOD_OPT, PCRE2_NOTEOL, DO(options) },
611 { "null_context", MOD_PD, MOD_CTL, CTL_NULLCONTEXT, PO(control) },
612 { "offset", MOD_DAT, MOD_INT, 0, DO(offset) },
613 { "offset_limit", MOD_CTM, MOD_SIZ, 0, MO(offset_limit)},
614 { "ovector", MOD_DAT, MOD_INT, 0, DO(oveccount) },
615 { "parens_nest_limit", MOD_CTC, MOD_INT, 0, CO(parens_nest_limit) },
616 { "partial_hard", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_HARD, DO(options) },
617 { "partial_soft", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_SOFT, DO(options) },
618 { "ph", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_HARD, DO(options) },
619 { "posix", MOD_PAT, MOD_CTL, CTL_POSIX, PO(control) },
620 { "posix_nosub", MOD_PAT, MOD_CTL, CTL_POSIX|CTL_POSIX_NOSUB, PO(control) },
621 { "ps", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_SOFT, DO(options) },
622 { "push", MOD_PAT, MOD_CTL, CTL_PUSH, PO(control) },
623 { "pushcopy", MOD_PAT, MOD_CTL, CTL_PUSHCOPY, PO(control) },
624 { "pushtablescopy", MOD_PAT, MOD_CTL, CTL_PUSHTABLESCOPY, PO(control) },
625 { "recursion_limit", MOD_CTM, MOD_INT, 0, MO(depth_limit) }, /* Obsolete synonym */
626 { "regerror_buffsize", MOD_PAT, MOD_INT, 0, PO(regerror_buffsize) },
627 { "replace", MOD_PND, MOD_STR, REPLACE_MODSIZE, PO(replacement) },
628 { "stackguard", MOD_PAT, MOD_INT, 0, PO(stackguard_test) },
629 { "startchar", MOD_PND, MOD_CTL, CTL_STARTCHAR, PO(control) },
630 { "startoffset", MOD_DAT, MOD_INT, 0, DO(offset) },
631 { "substitute_extended", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_EXTENDED, PO(control2) },
632 { "substitute_overflow_length", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_OVERFLOW_LENGTH, PO(control2) },
633 { "substitute_unknown_unset", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_UNKNOWN_UNSET, PO(control2) },
634 { "substitute_unset_empty", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_UNSET_EMPTY, PO(control2) },
635 { "tables", MOD_PAT, MOD_INT, 0, PO(tables_id) },
636 { "ucp", MOD_PATP, MOD_OPT, PCRE2_UCP, PO(options) },
637 { "ungreedy", MOD_PAT, MOD_OPT, PCRE2_UNGREEDY, PO(options) },
638 { "use_length", MOD_PAT, MOD_CTL, CTL_USE_LENGTH, PO(control) },
639 { "use_offset_limit", MOD_PAT, MOD_OPT, PCRE2_USE_OFFSET_LIMIT, PO(options) },
640 { "utf", MOD_PATP, MOD_OPT, PCRE2_UTF, PO(options) },
641 { "utf8_input", MOD_PAT, MOD_CTL, CTL_UTF8_INPUT, PO(control) },
642 { "zero_terminate", MOD_DAT, MOD_CTL, CTL_ZERO_TERMINATE, DO(control) }
643 };
644
645 #define MODLISTCOUNT sizeof(modlist)/sizeof(modstruct)
646
647 /* Controls and options that are supported for use with the POSIX interface. */
648
649 #define POSIX_SUPPORTED_COMPILE_OPTIONS ( \
650 PCRE2_CASELESS|PCRE2_DOTALL|PCRE2_MULTILINE|PCRE2_UCP|PCRE2_UTF| \
651 PCRE2_UNGREEDY)
652
653 #define POSIX_SUPPORTED_COMPILE_CONTROLS ( \
654 CTL_AFTERTEXT|CTL_ALLAFTERTEXT|CTL_EXPAND|CTL_POSIX|CTL_POSIX_NOSUB)
655
656 #define POSIX_SUPPORTED_COMPILE_CONTROLS2 (0)
657
658 #define POSIX_SUPPORTED_MATCH_OPTIONS ( \
659 PCRE2_NOTBOL|PCRE2_NOTEMPTY|PCRE2_NOTEOL)
660
661 #define POSIX_SUPPORTED_MATCH_CONTROLS (CTL_AFTERTEXT|CTL_ALLAFTERTEXT)
662 #define POSIX_SUPPORTED_MATCH_CONTROLS2 (0)
663
664 /* Control bits that are not ignored with 'push'. */
665
666 #define PUSH_SUPPORTED_COMPILE_CONTROLS ( \
667 CTL_BINCODE|CTL_CALLOUT_INFO|CTL_FULLBINCODE|CTL_HEXPAT|CTL_INFO| \
668 CTL_JITVERIFY|CTL_MEMORY|CTL_FRAMESIZE|CTL_PUSH|CTL_PUSHCOPY| \
669 CTL_PUSHTABLESCOPY|CTL_USE_LENGTH)
670
671 #define PUSH_SUPPORTED_COMPILE_CONTROLS2 (CTL_BSR_SET|CTL_NL_SET)
672
673 /* Controls that apply only at compile time with 'push'. */
674
675 #define PUSH_COMPILE_ONLY_CONTROLS CTL_JITVERIFY
676 #define PUSH_COMPILE_ONLY_CONTROLS2 (0)
677
678 /* Controls that are forbidden with #pop or #popcopy. */
679
680 #define NOTPOP_CONTROLS (CTL_HEXPAT|CTL_POSIX|CTL_POSIX_NOSUB|CTL_PUSH| \
681 CTL_PUSHCOPY|CTL_PUSHTABLESCOPY|CTL_USE_LENGTH)
682
683 /* Pattern controls that are mutually exclusive. At present these are all in
684 the first control word. Note that CTL_POSIX_NOSUB is always accompanied by
685 CTL_POSIX, so it doesn't need its own entries. */
686
687 static uint32_t exclusive_pat_controls[] = {
688 CTL_POSIX | CTL_HEXPAT,
689 CTL_POSIX | CTL_PUSH,
690 CTL_POSIX | CTL_PUSHCOPY,
691 CTL_POSIX | CTL_PUSHTABLESCOPY,
692 CTL_POSIX | CTL_USE_LENGTH,
693 CTL_EXPAND | CTL_HEXPAT };
694
695 /* Data controls that are mutually exclusive. At present these are all in the
696 first control word. */
697
698 static uint32_t exclusive_dat_controls[] = {
699 CTL_ALLUSEDTEXT | CTL_STARTCHAR,
700 CTL_FINDLIMITS | CTL_NULLCONTEXT };
701
702 /* Table of single-character abbreviated modifiers. The index field is
703 initialized to -1, but the first time the modifier is encountered, it is filled
704 in with the index of the full entry in modlist, to save repeated searching when
705 processing multiple test items. This short list is searched serially, so its
706 order does not matter. */
707
708 typedef struct c1modstruct {
709 const char *fullname;
710 uint32_t onechar;
711 int index;
712 } c1modstruct;
713
714 static c1modstruct c1modlist[] = {
715 { "bincode", 'B', -1 },
716 { "info", 'I', -1 },
717 { "global", 'g', -1 },
718 { "caseless", 'i', -1 },
719 { "multiline", 'm', -1 },
720 { "dotall", 's', -1 },
721 { "extended", 'x', -1 }
722 };
723
724 #define C1MODLISTCOUNT sizeof(c1modlist)/sizeof(c1modstruct)
725
726 /* Table of arguments for the -C command line option. Use macros to make the
727 table itself easier to read. */
728
729 #if defined SUPPORT_PCRE2_8
730 #define SUPPORT_8 1
731 #endif
732 #if defined SUPPORT_PCRE2_16
733 #define SUPPORT_16 1
734 #endif
735 #if defined SUPPORT_PCRE2_32
736 #define SUPPORT_32 1
737 #endif
738
739 #ifndef SUPPORT_8
740 #define SUPPORT_8 0
741 #endif
742 #ifndef SUPPORT_16
743 #define SUPPORT_16 0
744 #endif
745 #ifndef SUPPORT_32
746 #define SUPPORT_32 0
747 #endif
748
749 #ifdef EBCDIC
750 #define SUPPORT_EBCDIC 1
751 #define EBCDIC_NL CHAR_LF
752 #else
753 #define SUPPORT_EBCDIC 0
754 #define EBCDIC_NL 0
755 #endif
756
757 #ifdef NEVER_BACKSLASH_C
758 #define BACKSLASH_C 0
759 #else
760 #define BACKSLASH_C 1
761 #endif
762
763 typedef struct coptstruct {
764 const char *name;
765 uint32_t type;
766 uint32_t value;
767 } coptstruct;
768
769 enum { CONF_BSR,
770 CONF_FIX,
771 CONF_FIZ,
772 CONF_INT,
773 CONF_NL
774 };
775
776 static coptstruct coptlist[] = {
777 { "backslash-C", CONF_FIX, BACKSLASH_C },
778 { "bsr", CONF_BSR, PCRE2_CONFIG_BSR },
779 { "ebcdic", CONF_FIX, SUPPORT_EBCDIC },
780 { "ebcdic-nl", CONF_FIZ, EBCDIC_NL },
781 { "jit", CONF_INT, PCRE2_CONFIG_JIT },
782 { "linksize", CONF_INT, PCRE2_CONFIG_LINKSIZE },
783 { "newline", CONF_NL, PCRE2_CONFIG_NEWLINE },
784 { "pcre2-16", CONF_FIX, SUPPORT_16 },
785 { "pcre2-32", CONF_FIX, SUPPORT_32 },
786 { "pcre2-8", CONF_FIX, SUPPORT_8 },
787 { "unicode", CONF_INT, PCRE2_CONFIG_UNICODE }
788 };
789
790 #define COPTLISTCOUNT sizeof(coptlist)/sizeof(coptstruct)
791
792 #undef SUPPORT_8
793 #undef SUPPORT_16
794 #undef SUPPORT_32
795 #undef SUPPORT_EBCDIC
796
797
798 /* ----------------------- Static variables ------------------------ */
799
800 static FILE *infile;
801 static FILE *outfile;
802
803 static const void *last_callout_mark;
804 static PCRE2_JIT_STACK *jit_stack = NULL;
805 static size_t jit_stack_size = 0;
806
807 static BOOL first_callout;
808 static BOOL jit_was_used;
809 static BOOL restrict_for_perl_test = FALSE;
810 static BOOL show_memory = FALSE;
811
812 static int code_unit_size; /* Bytes */
813 static int jitrc; /* Return from JIT compile */
814 static int test_mode = DEFAULT_TEST_MODE;
815 static int timeit = 0;
816 static int timeitm = 0;
817
818 clock_t total_compile_time = 0;
819 clock_t total_jit_compile_time = 0;
820 clock_t total_match_time = 0;
821
822 static uint32_t dfa_matched;
823 static uint32_t forbid_utf = 0;
824 static uint32_t maxlookbehind;
825 static uint32_t max_oveccount;
826 static uint32_t callout_count;
827
828 static uint16_t local_newline_default = 0;
829
830 static VERSION_TYPE jittarget[VERSION_SIZE];
831 static VERSION_TYPE version[VERSION_SIZE];
832 static VERSION_TYPE uversion[VERSION_SIZE];
833
834 static patctl def_patctl;
835 static patctl pat_patctl;
836 static datctl def_datctl;
837 static datctl dat_datctl;
838
839 static void *patstack[PATSTACKSIZE];
840 static int patstacknext = 0;
841
842 #ifdef SUPPORT_PCRE2_8
843 static regex_t preg = { NULL, NULL, 0, 0, 0 };
844 #endif
845
846 static int *dfa_workspace = NULL;
847 static const uint8_t *locale_tables = NULL;
848 static const uint8_t *use_tables = NULL;
849 static uint8_t locale_name[32];
850
851 /* We need buffers for building 16/32-bit strings; 8-bit strings don't need
852 rebuilding, but set up the same naming scheme for use in macros. The "buffer"
853 buffer is where all input lines are read. Its size is the same as pbuffer8.
854 Pattern lines are always copied to pbuffer8 for use in callouts, even if they
855 are actually compiled from pbuffer16 or pbuffer32. */
856
857 static size_t pbuffer8_size = 50000; /* Initial size, bytes */
858 static uint8_t *pbuffer8 = NULL;
859 static uint8_t *buffer = NULL;
860
861 /* The dbuffer is where all processed data lines are put. In non-8-bit modes it
862 is cast as needed. For long data lines it grows as necessary. */
863
864 static size_t dbuffer_size = 1u << 14; /* Initial size, bytes */
865 static uint8_t *dbuffer = NULL;
866
867
868 /* ---------------- Mode-dependent variables -------------------*/
869
870 #ifdef SUPPORT_PCRE2_8
871 static pcre2_code_8 *compiled_code8;
872 static pcre2_general_context_8 *general_context8, *general_context_copy8;
873 static pcre2_compile_context_8 *pat_context8, *default_pat_context8;
874 static pcre2_match_context_8 *dat_context8, *default_dat_context8;
875 static pcre2_match_data_8 *match_data8;
876 #endif
877
878 #ifdef SUPPORT_PCRE2_16
879 static pcre2_code_16 *compiled_code16;
880 static pcre2_general_context_16 *general_context16, *general_context_copy16;
881 static pcre2_compile_context_16 *pat_context16, *default_pat_context16;
882 static pcre2_match_context_16 *dat_context16, *default_dat_context16;
883 static pcre2_match_data_16 *match_data16;
884 static PCRE2_SIZE pbuffer16_size = 0; /* Set only when needed */
885 static uint16_t *pbuffer16 = NULL;
886 #endif
887
888 #ifdef SUPPORT_PCRE2_32
889 static pcre2_code_32 *compiled_code32;
890 static pcre2_general_context_32 *general_context32, *general_context_copy32;
891 static pcre2_compile_context_32 *pat_context32, *default_pat_context32;
892 static pcre2_match_context_32 *dat_context32, *default_dat_context32;
893 static pcre2_match_data_32 *match_data32;
894 static PCRE2_SIZE pbuffer32_size = 0; /* Set only when needed */
895 static uint32_t *pbuffer32 = NULL;
896 #endif
897
898
899 /* ---------------- Macros that work in all modes ----------------- */
900
901 #define CAST8VAR(x) CASTVAR(uint8_t *, x)
902 #define SET(x,y) SETOP(x,y,=)
903 #define SETPLUS(x,y) SETOP(x,y,+=)
904 #define strlen8(x) strlen((char *)x)
905
906
907 /* ---------------- Mode-dependent, runtime-testing macros ------------------*/
908
909 /* Define macros for variables and functions that must be selected dynamically
910 depending on the mode setting (8, 16, 32). These are dependent on which modes
911 are supported. */
912
913 #if (defined (SUPPORT_PCRE2_8) + defined (SUPPORT_PCRE2_16) + \
914 defined (SUPPORT_PCRE2_32)) >= 2
915
916 /* ----- All three modes supported ----- */
917
918 #if defined(SUPPORT_PCRE2_8) && defined(SUPPORT_PCRE2_16) && defined(SUPPORT_PCRE2_32)
919
920 #define CASTFLD(t,a,b) ((test_mode == PCRE8_MODE)? (t)(G(a,8)->b) : \
921 (test_mode == PCRE16_MODE)? (t)(G(a,16)->b) : (t)(G(a,32)->b))
922
923 #define CASTVAR(t,x) ( \
924 (test_mode == PCRE8_MODE)? (t)G(x,8) : \
925 (test_mode == PCRE16_MODE)? (t)G(x,16) : (t)G(x,32))
926
927 #define CODE_UNIT(a,b) ( \
928 (test_mode == PCRE8_MODE)? (uint32_t)(((PCRE2_SPTR8)(a))[b]) : \
929 (test_mode == PCRE16_MODE)? (uint32_t)(((PCRE2_SPTR16)(a))[b]) : \
930 (uint32_t)(((PCRE2_SPTR32)(a))[b]))
931
932 #define DATCTXCPY(a,b) \
933 if (test_mode == PCRE8_MODE) \
934 memcpy(G(a,8),G(b,8),sizeof(pcre2_match_context_8)); \
935 else if (test_mode == PCRE16_MODE) \
936 memcpy(G(a,16),G(b,16),sizeof(pcre2_match_context_16)); \
937 else memcpy(G(a,32),G(b,32),sizeof(pcre2_match_context_32))
938
939 #define FLD(a,b) ((test_mode == PCRE8_MODE)? G(a,8)->b : \
940 (test_mode == PCRE16_MODE)? G(a,16)->b : G(a,32)->b)
941
942 #define PATCTXCPY(a,b) \
943 if (test_mode == PCRE8_MODE) \
944 memcpy(G(a,8),G(b,8),sizeof(pcre2_compile_context_8)); \
945 else if (test_mode == PCRE16_MODE) \
946 memcpy(G(a,16),G(b,16),sizeof(pcre2_compile_context_16)); \
947 else memcpy(G(a,32),G(b,32),sizeof(pcre2_compile_context_32))
948
949 #define PCHARS(lv, p, offset, len, utf, f) \
950 if (test_mode == PCRE32_MODE) \
951 lv = pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f); \
952 else if (test_mode == PCRE16_MODE) \
953 lv = pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f); \
954 else \
955 lv = pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
956
957 #define PCHARSV(p, offset, len, utf, f) \
958 if (test_mode == PCRE32_MODE) \
959 (void)pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f); \
960 else if (test_mode == PCRE16_MODE) \
961 (void)pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f); \
962 else \
963 (void)pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
964
965 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
966 if (test_mode == PCRE8_MODE) \
967 a = pcre2_callout_enumerate_8(compiled_code8, \
968 (int (*)(struct pcre2_callout_enumerate_block_8 *, void *))b,c); \
969 else if (test_mode == PCRE16_MODE) \
970 a = pcre2_callout_enumerate_16(compiled_code16, \
971 (int(*)(struct pcre2_callout_enumerate_block_16 *, void *))b,c); \
972 else \
973 a = pcre2_callout_enumerate_32(compiled_code32, \
974 (int (*)(struct pcre2_callout_enumerate_block_32 *, void *))b,c)
975
976 #define PCRE2_CODE_COPY_FROM_VOID(a,b) \
977 if (test_mode == PCRE8_MODE) \
978 G(a,8) = pcre2_code_copy_8(b); \
979 else if (test_mode == PCRE16_MODE) \
980 G(a,16) = pcre2_code_copy_16(b); \
981 else \
982 G(a,32) = pcre2_code_copy_32(b)
983
984 #define PCRE2_CODE_COPY_TO_VOID(a,b) \
985 if (test_mode == PCRE8_MODE) \
986 a = (void *)pcre2_code_copy_8(G(b,8)); \
987 else if (test_mode == PCRE16_MODE) \
988 a = (void *)pcre2_code_copy_16(G(b,16)); \
989 else \
990 a = (void *)pcre2_code_copy_32(G(b,32))
991
992 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) \
993 if (test_mode == PCRE8_MODE) \
994 a = (void *)pcre2_code_copy_with_tables_8(G(b,8)); \
995 else if (test_mode == PCRE16_MODE) \
996 a = (void *)pcre2_code_copy_with_tables_16(G(b,16)); \
997 else \
998 a = (void *)pcre2_code_copy_with_tables_32(G(b,32))
999
1000 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
1001 if (test_mode == PCRE8_MODE) \
1002 G(a,8) = pcre2_compile_8(G(b,8),c,d,e,f,g); \
1003 else if (test_mode == PCRE16_MODE) \
1004 G(a,16) = pcre2_compile_16(G(b,16),c,d,e,f,g); \
1005 else \
1006 G(a,32) = pcre2_compile_32(G(b,32),c,d,e,f,g)
1007
1008 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
1009 if (test_mode == PCRE8_MODE) \
1010 a = pcre2_dfa_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h,i,j); \
1011 else if (test_mode == PCRE16_MODE) \
1012 a = pcre2_dfa_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h,i,j); \
1013 else \
1014 a = pcre2_dfa_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h,i,j)
1015
1016 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
1017 if (test_mode == PCRE8_MODE) \
1018 r = pcre2_get_error_message_8(a,G(b,8),G(G(b,8),_size)); \
1019 else if (test_mode == PCRE16_MODE) \
1020 r = pcre2_get_error_message_16(a,G(b,16),G(G(b,16),_size)); \
1021 else \
1022 r = pcre2_get_error_message_32(a,G(b,32),G(G(b,32),_size))
1023
1024 #define PCRE2_GET_OVECTOR_COUNT(a,b) \
1025 if (test_mode == PCRE8_MODE) \
1026 a = pcre2_get_ovector_count_8(G(b,8)); \
1027 else if (test_mode == PCRE16_MODE) \
1028 a = pcre2_get_ovector_count_16(G(b,16)); \
1029 else \
1030 a = pcre2_get_ovector_count_32(G(b,32))
1031
1032 #define PCRE2_GET_STARTCHAR(a,b) \
1033 if (test_mode == PCRE8_MODE) \
1034 a = pcre2_get_startchar_8(G(b,8)); \
1035 else if (test_mode == PCRE16_MODE) \
1036 a = pcre2_get_startchar_16(G(b,16)); \
1037 else \
1038 a = pcre2_get_startchar_32(G(b,32))
1039
1040 #define PCRE2_JIT_COMPILE(r,a,b) \
1041 if (test_mode == PCRE8_MODE) r = pcre2_jit_compile_8(G(a,8),b); \
1042 else if (test_mode == PCRE16_MODE) r = pcre2_jit_compile_16(G(a,16),b); \
1043 else r = pcre2_jit_compile_32(G(a,32),b)
1044
1045 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) \
1046 if (test_mode == PCRE8_MODE) pcre2_jit_free_unused_memory_8(G(a,8)); \
1047 else if (test_mode == PCRE16_MODE) pcre2_jit_free_unused_memory_16(G(a,16)); \
1048 else pcre2_jit_free_unused_memory_32(G(a,32))
1049
1050 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
1051 if (test_mode == PCRE8_MODE) \
1052 a = pcre2_jit_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h); \
1053 else if (test_mode == PCRE16_MODE) \
1054 a = pcre2_jit_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h); \
1055 else \
1056 a = pcre2_jit_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
1057
1058 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
1059 if (test_mode == PCRE8_MODE) \
1060 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_8(b,c,d); \
1061 else if (test_mode == PCRE16_MODE) \
1062 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_16(b,c,d); \
1063 else \
1064 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_32(b,c,d);
1065
1066 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
1067 if (test_mode == PCRE8_MODE) \
1068 pcre2_jit_stack_assign_8(G(a,8),(pcre2_jit_callback_8)b,c); \
1069 else if (test_mode == PCRE16_MODE) \
1070 pcre2_jit_stack_assign_16(G(a,16),(pcre2_jit_callback_16)b,c); \
1071 else \
1072 pcre2_jit_stack_assign_32(G(a,32),(pcre2_jit_callback_32)b,c);
1073
1074 #define PCRE2_JIT_STACK_FREE(a) \
1075 if (test_mode == PCRE8_MODE) \
1076 pcre2_jit_stack_free_8((pcre2_jit_stack_8 *)a); \
1077 else if (test_mode == PCRE16_MODE) \
1078 pcre2_jit_stack_free_16((pcre2_jit_stack_16 *)a); \
1079 else \
1080 pcre2_jit_stack_free_32((pcre2_jit_stack_32 *)a);
1081
1082 #define PCRE2_MAKETABLES(a) \
1083 if (test_mode == PCRE8_MODE) a = pcre2_maketables_8(NULL); \
1084 else if (test_mode == PCRE16_MODE) a = pcre2_maketables_16(NULL); \
1085 else a = pcre2_maketables_32(NULL)
1086
1087 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
1088 if (test_mode == PCRE8_MODE) \
1089 a = pcre2_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h); \
1090 else if (test_mode == PCRE16_MODE) \
1091 a = pcre2_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h); \
1092 else \
1093 a = pcre2_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
1094
1095 #define PCRE2_MATCH_DATA_CREATE(a,b,c) \
1096 if (test_mode == PCRE8_MODE) \
1097 G(a,8) = pcre2_match_data_create_8(b,c); \
1098 else if (test_mode == PCRE16_MODE) \
1099 G(a,16) = pcre2_match_data_create_16(b,c); \
1100 else \
1101 G(a,32) = pcre2_match_data_create_32(b,c)
1102
1103 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
1104 if (test_mode == PCRE8_MODE) \
1105 G(a,8) = pcre2_match_data_create_from_pattern_8(G(b,8),c); \
1106 else if (test_mode == PCRE16_MODE) \
1107 G(a,16) = pcre2_match_data_create_from_pattern_16(G(b,16),c); \
1108 else \
1109 G(a,32) = pcre2_match_data_create_from_pattern_32(G(b,32),c)
1110
1111 #define PCRE2_MATCH_DATA_FREE(a) \
1112 if (test_mode == PCRE8_MODE) \
1113 pcre2_match_data_free_8(G(a,8)); \
1114 else if (test_mode == PCRE16_MODE) \
1115 pcre2_match_data_free_16(G(a,16)); \
1116 else \
1117 pcre2_match_data_free_32(G(a,32))
1118
1119 #define PCRE2_PATTERN_INFO(a,b,c,d) \
1120 if (test_mode == PCRE8_MODE) \
1121 a = pcre2_pattern_info_8(G(b,8),c,d); \
1122 else if (test_mode == PCRE16_MODE) \
1123 a = pcre2_pattern_info_16(G(b,16),c,d); \
1124 else \
1125 a = pcre2_pattern_info_32(G(b,32),c,d)
1126
1127 #define PCRE2_PRINTINT(a) \
1128 if (test_mode == PCRE8_MODE) \
1129 pcre2_printint_8(compiled_code8,outfile,a); \
1130 else if (test_mode == PCRE16_MODE) \
1131 pcre2_printint_16(compiled_code16,outfile,a); \
1132 else \
1133 pcre2_printint_32(compiled_code32,outfile,a)
1134
1135 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
1136 if (test_mode == PCRE8_MODE) \
1137 r = pcre2_serialize_decode_8((pcre2_code_8 **)a,b,c,G(d,8)); \
1138 else if (test_mode == PCRE16_MODE) \
1139 r = pcre2_serialize_decode_16((pcre2_code_16 **)a,b,c,G(d,16)); \
1140 else \
1141 r = pcre2_serialize_decode_32((pcre2_code_32 **)a,b,c,G(d,32))
1142
1143 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
1144 if (test_mode == PCRE8_MODE) \
1145 r = pcre2_serialize_encode_8((const pcre2_code_8 **)a,b,c,d,G(e,8)); \
1146 else if (test_mode == PCRE16_MODE) \
1147 r = pcre2_serialize_encode_16((const pcre2_code_16 **)a,b,c,d,G(e,16)); \
1148 else \
1149 r = pcre2_serialize_encode_32((const pcre2_code_32 **)a,b,c,d,G(e,32))
1150
1151 #define PCRE2_SERIALIZE_FREE(a) \
1152 if (test_mode == PCRE8_MODE) \
1153 pcre2_serialize_free_8(a); \
1154 else if (test_mode == PCRE16_MODE) \
1155 pcre2_serialize_free_16(a); \
1156 else \
1157 pcre2_serialize_free_32(a)
1158
1159 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
1160 if (test_mode == PCRE8_MODE) \
1161 r = pcre2_serialize_get_number_of_codes_8(a); \
1162 else if (test_mode == PCRE16_MODE) \
1163 r = pcre2_serialize_get_number_of_codes_16(a); \
1164 else \
1165 r = pcre2_serialize_get_number_of_codes_32(a); \
1166
1167 #define PCRE2_SET_CALLOUT(a,b,c) \
1168 if (test_mode == PCRE8_MODE) \
1169 pcre2_set_callout_8(G(a,8),(int (*)(pcre2_callout_block_8 *, void *))b,c); \
1170 else if (test_mode == PCRE16_MODE) \
1171 pcre2_set_callout_16(G(a,16),(int (*)(pcre2_callout_block_16 *, void *))b,c); \
1172 else \
1173 pcre2_set_callout_32(G(a,32),(int (*)(pcre2_callout_block_32 *, void *))b,c);
1174
1175 #define PCRE2_SET_CHARACTER_TABLES(a,b) \
1176 if (test_mode == PCRE8_MODE) \
1177 pcre2_set_character_tables_8(G(a,8),b); \
1178 else if (test_mode == PCRE16_MODE) \
1179 pcre2_set_character_tables_16(G(a,16),b); \
1180 else \
1181 pcre2_set_character_tables_32(G(a,32),b)
1182
1183 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
1184 if (test_mode == PCRE8_MODE) \
1185 pcre2_set_compile_recursion_guard_8(G(a,8),b,c); \
1186 else if (test_mode == PCRE16_MODE) \
1187 pcre2_set_compile_recursion_guard_16(G(a,16),b,c); \
1188 else \
1189 pcre2_set_compile_recursion_guard_32(G(a,32),b,c)
1190
1191 #define PCRE2_SET_DEPTH_LIMIT(a,b) \
1192 if (test_mode == PCRE8_MODE) \
1193 pcre2_set_depth_limit_8(G(a,8),b); \
1194 else if (test_mode == PCRE16_MODE) \
1195 pcre2_set_depth_limit_16(G(a,16),b); \
1196 else \
1197 pcre2_set_depth_limit_32(G(a,32),b)
1198
1199 #define PCRE2_SET_MATCH_LIMIT(a,b) \
1200 if (test_mode == PCRE8_MODE) \
1201 pcre2_set_match_limit_8(G(a,8),b); \
1202 else if (test_mode == PCRE16_MODE) \
1203 pcre2_set_match_limit_16(G(a,16),b); \
1204 else \
1205 pcre2_set_match_limit_32(G(a,32),b)
1206
1207 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) \
1208 if (test_mode == PCRE8_MODE) \
1209 pcre2_set_max_pattern_length_8(G(a,8),b); \
1210 else if (test_mode == PCRE16_MODE) \
1211 pcre2_set_max_pattern_length_16(G(a,16),b); \
1212 else \
1213 pcre2_set_max_pattern_length_32(G(a,32),b)
1214
1215 #define PCRE2_SET_OFFSET_LIMIT(a,b) \
1216 if (test_mode == PCRE8_MODE) \
1217 pcre2_set_offset_limit_8(G(a,8),b); \
1218 else if (test_mode == PCRE16_MODE) \
1219 pcre2_set_offset_limit_16(G(a,16),b); \
1220 else \
1221 pcre2_set_offset_limit_32(G(a,32),b)
1222
1223 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) \
1224 if (test_mode == PCRE8_MODE) \
1225 pcre2_set_parens_nest_limit_8(G(a,8),b); \
1226 else if (test_mode == PCRE16_MODE) \
1227 pcre2_set_parens_nest_limit_16(G(a,16),b); \
1228 else \
1229 pcre2_set_parens_nest_limit_32(G(a,32),b)
1230
1231 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
1232 if (test_mode == PCRE8_MODE) \
1233 a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),G(h,8), \
1234 (PCRE2_SPTR8)i,j,(PCRE2_UCHAR8 *)k,l); \
1235 else if (test_mode == PCRE16_MODE) \
1236 a = pcre2_substitute_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),G(h,16), \
1237 (PCRE2_SPTR16)i,j,(PCRE2_UCHAR16 *)k,l); \
1238 else \
1239 a = pcre2_substitute_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),G(h,32), \
1240 (PCRE2_SPTR32)i,j,(PCRE2_UCHAR32 *)k,l)
1241
1242 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
1243 if (test_mode == PCRE8_MODE) \
1244 a = pcre2_substring_copy_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 *)d,e); \
1245 else if (test_mode == PCRE16_MODE) \
1246 a = pcre2_substring_copy_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 *)d,e); \
1247 else \
1248 a = pcre2_substring_copy_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 *)d,e)
1249
1250 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
1251 if (test_mode == PCRE8_MODE) \
1252 a = pcre2_substring_copy_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 *)d,e); \
1253 else if (test_mode == PCRE16_MODE) \
1254 a = pcre2_substring_copy_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 *)d,e); \
1255 else \
1256 a = pcre2_substring_copy_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 *)d,e)
1257
1258 #define PCRE2_SUBSTRING_FREE(a) \
1259 if (test_mode == PCRE8_MODE) pcre2_substring_free_8((PCRE2_UCHAR8 *)a); \
1260 else if (test_mode == PCRE16_MODE) \
1261 pcre2_substring_free_16((PCRE2_UCHAR16 *)a); \
1262 else pcre2_substring_free_32((PCRE2_UCHAR32 *)a)
1263
1264 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
1265 if (test_mode == PCRE8_MODE) \
1266 a = pcre2_substring_get_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 **)d,e); \
1267 else if (test_mode == PCRE16_MODE) \
1268 a = pcre2_substring_get_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 **)d,e); \
1269 else \
1270 a = pcre2_substring_get_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 **)d,e)
1271
1272 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
1273 if (test_mode == PCRE8_MODE) \
1274 a = pcre2_substring_get_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 **)d,e); \
1275 else if (test_mode == PCRE16_MODE) \
1276 a = pcre2_substring_get_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 **)d,e); \
1277 else \
1278 a = pcre2_substring_get_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 **)d,e)
1279
1280 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
1281 if (test_mode == PCRE8_MODE) \
1282 a = pcre2_substring_length_byname_8(G(b,8),G(c,8),d); \
1283 else if (test_mode == PCRE16_MODE) \
1284 a = pcre2_substring_length_byname_16(G(b,16),G(c,16),d); \
1285 else \
1286 a = pcre2_substring_length_byname_32(G(b,32),G(c,32),d)
1287
1288 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
1289 if (test_mode == PCRE8_MODE) \
1290 a = pcre2_substring_length_bynumber_8(G(b,8),c,d); \
1291 else if (test_mode == PCRE16_MODE) \
1292 a = pcre2_substring_length_bynumber_16(G(b,16),c,d); \
1293 else \
1294 a = pcre2_substring_length_bynumber_32(G(b,32),c,d)
1295
1296 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
1297 if (test_mode == PCRE8_MODE) \
1298 a = pcre2_substring_list_get_8(G(b,8),(PCRE2_UCHAR8 ***)c,d); \
1299 else if (test_mode == PCRE16_MODE) \
1300 a = pcre2_substring_list_get_16(G(b,16),(PCRE2_UCHAR16 ***)c,d); \
1301 else \
1302 a = pcre2_substring_list_get_32(G(b,32),(PCRE2_UCHAR32 ***)c,d)
1303
1304 #define PCRE2_SUBSTRING_LIST_FREE(a) \
1305 if (test_mode == PCRE8_MODE) \
1306 pcre2_substring_list_free_8((PCRE2_SPTR8 *)a); \
1307 else if (test_mode == PCRE16_MODE) \
1308 pcre2_substring_list_free_16((PCRE2_SPTR16 *)a); \
1309 else \
1310 pcre2_substring_list_free_32((PCRE2_SPTR32 *)a)
1311
1312 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
1313 if (test_mode == PCRE8_MODE) \
1314 a = pcre2_substring_number_from_name_8(G(b,8),G(c,8)); \
1315 else if (test_mode == PCRE16_MODE) \
1316 a = pcre2_substring_number_from_name_16(G(b,16),G(c,16)); \
1317 else \
1318 a = pcre2_substring_number_from_name_32(G(b,32),G(c,32))
1319
1320 #define PTR(x) ( \
1321 (test_mode == PCRE8_MODE)? (void *)G(x,8) : \
1322 (test_mode == PCRE16_MODE)? (void *)G(x,16) : \
1323 (void *)G(x,32))
1324
1325 #define SETFLD(x,y,z) \
1326 if (test_mode == PCRE8_MODE) G(x,8)->y = z; \
1327 else if (test_mode == PCRE16_MODE) G(x,16)->y = z; \
1328 else G(x,32)->y = z
1329
1330 #define SETFLDVEC(x,y,v,z) \
1331 if (test_mode == PCRE8_MODE) G(x,8)->y[v] = z; \
1332 else if (test_mode == PCRE16_MODE) G(x,16)->y[v] = z; \
1333 else G(x,32)->y[v] = z
1334
1335 #define SETOP(x,y,z) \
1336 if (test_mode == PCRE8_MODE) G(x,8) z y; \
1337 else if (test_mode == PCRE16_MODE) G(x,16) z y; \
1338 else G(x,32) z y
1339
1340 #define SETCASTPTR(x,y) \
1341 if (test_mode == PCRE8_MODE) \
1342 G(x,8) = (uint8_t *)(y); \
1343 else if (test_mode == PCRE16_MODE) \
1344 G(x,16) = (uint16_t *)(y); \
1345 else \
1346 G(x,32) = (uint32_t *)(y)
1347
1348 #define STRLEN(p) ((test_mode == PCRE8_MODE)? ((int)strlen((char *)p)) : \
1349 (test_mode == PCRE16_MODE)? ((int)strlen16((PCRE2_SPTR16)p)) : \
1350 ((int)strlen32((PCRE2_SPTR32)p)))
1351
1352 #define SUB1(a,b) \
1353 if (test_mode == PCRE8_MODE) G(a,8)(G(b,8)); \
1354 else if (test_mode == PCRE16_MODE) G(a,16)(G(b,16)); \
1355 else G(a,32)(G(b,32))
1356
1357 #define SUB2(a,b,c) \
1358 if (test_mode == PCRE8_MODE) G(a,8)(G(b,8),G(c,8)); \
1359 else if (test_mode == PCRE16_MODE) G(a,16)(G(b,16),G(c,16)); \
1360 else G(a,32)(G(b,32),G(c,32))
1361
1362 #define TEST(x,r,y) ( \
1363 (test_mode == PCRE8_MODE && G(x,8) r (y)) || \
1364 (test_mode == PCRE16_MODE && G(x,16) r (y)) || \
1365 (test_mode == PCRE32_MODE && G(x,32) r (y)))
1366
1367 #define TESTFLD(x,f,r,y) ( \
1368 (test_mode == PCRE8_MODE && G(x,8)->f r (y)) || \
1369 (test_mode == PCRE16_MODE && G(x,16)->f r (y)) || \
1370 (test_mode == PCRE32_MODE && G(x,32)->f r (y)))
1371
1372
1373 /* ----- Two out of three modes are supported ----- */
1374
1375 #else
1376
1377 /* We can use some macro trickery to make a single set of definitions work in
1378 the three different cases. */
1379
1380 /* ----- 32-bit and 16-bit but not 8-bit supported ----- */
1381
1382 #if defined(SUPPORT_PCRE2_32) && defined(SUPPORT_PCRE2_16)
1383 #define BITONE 32
1384 #define BITTWO 16
1385
1386 /* ----- 32-bit and 8-bit but not 16-bit supported ----- */
1387
1388 #elif defined(SUPPORT_PCRE2_32) && defined(SUPPORT_PCRE2_8)
1389 #define BITONE 32
1390 #define BITTWO 8
1391
1392 /* ----- 16-bit and 8-bit but not 32-bit supported ----- */
1393
1394 #else
1395 #define BITONE 16
1396 #define BITTWO 8
1397 #endif
1398
1399
1400 /* ----- Common macros for two-mode cases ----- */
1401
1402 #define CASTFLD(t,a,b) \
1403 ((test_mode == G(G(PCRE,BITONE),_MODE))? (t)(G(a,BITONE)->b) : \
1404 (t)(G(a,BITTWO)->b))
1405
1406 #define CASTVAR(t,x) ( \
1407 (test_mode == G(G(PCRE,BITONE),_MODE))? \
1408 (t)G(x,BITONE) : (t)G(x,BITTWO))
1409
1410 #define CODE_UNIT(a,b) ( \
1411 (test_mode == G(G(PCRE,BITONE),_MODE))? \
1412 (uint32_t)(((G(PCRE2_SPTR,BITONE))(a))[b]) : \
1413 (uint32_t)(((G(PCRE2_SPTR,BITTWO))(a))[b]))
1414
1415 #define DATCTXCPY(a,b) \
1416 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1417 memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_match_context_,BITONE))); \
1418 else \
1419 memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_match_context_,BITTWO)))
1420
1421 #define FLD(a,b) \
1422 ((test_mode == G(G(PCRE,BITONE),_MODE))? G(a,BITONE)->b : G(a,BITTWO)->b)
1423
1424 #define PATCTXCPY(a,b) \
1425 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1426 memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_compile_context_,BITONE))); \
1427 else \
1428 memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_compile_context_,BITTWO)))
1429
1430 #define PCHARS(lv, p, offset, len, utf, f) \
1431 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1432 lv = G(pchars,BITONE)((G(PCRE2_SPTR,BITONE))(p)+offset, len, utf, f); \
1433 else \
1434 lv = G(pchars,BITTWO)((G(PCRE2_SPTR,BITTWO))(p)+offset, len, utf, f)
1435
1436 #define PCHARSV(p, offset, len, utf, f) \
1437 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1438 (void)G(pchars,BITONE)((G(PCRE2_SPTR,BITONE))(p)+offset, len, utf, f); \
1439 else \
1440 (void)G(pchars,BITTWO)((G(PCRE2_SPTR,BITTWO))(p)+offset, len, utf, f)
1441
1442 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
1443 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1444 a = G(pcre2_callout_enumerate,BITONE)(G(compiled_code,BITONE), \
1445 (int (*)(struct G(pcre2_callout_enumerate_block_,BITONE) *, void *))b,c); \
1446 else \
1447 a = G(pcre2_callout_enumerate,BITTWO)(G(compiled_code,BITTWO), \
1448 (int (*)(struct G(pcre2_callout_enumerate_block_,BITTWO) *, void *))b,c)
1449
1450 #define PCRE2_CODE_COPY_FROM_VOID(a,b) \
1451 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1452 G(a,BITONE) = G(pcre2_code_copy_,BITONE)(b); \
1453 else \
1454 G(a,BITTWO) = G(pcre2_code_copy_,BITTWO)(b)
1455
1456 #define PCRE2_CODE_COPY_TO_VOID(a,b) \
1457 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1458 a = (void *)G(pcre2_code_copy_,BITONE)(G(b,BITONE)); \
1459 else \
1460 a = (void *)G(pcre2_code_copy_,BITTWO)(G(b,BITTWO))
1461
1462 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) \
1463 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1464 a = (void *)G(pcre2_code_copy_with_tables_,BITONE)(G(b,BITONE)); \
1465 else \
1466 a = (void *)G(pcre2_code_copy_with_tables_,BITTWO)(G(b,BITTWO))
1467
1468 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
1469 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1470 G(a,BITONE) = G(pcre2_compile_,BITONE)(G(b,BITONE),c,d,e,f,g); \
1471 else \
1472 G(a,BITTWO) = G(pcre2_compile_,BITTWO)(G(b,BITTWO),c,d,e,f,g)
1473
1474 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
1475 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1476 a = G(pcre2_dfa_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1477 G(g,BITONE),h,i,j); \
1478 else \
1479 a = G(pcre2_dfa_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1480 G(g,BITTWO),h,i,j)
1481
1482 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
1483 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1484 r = G(pcre2_get_error_message_,BITONE)(a,G(b,BITONE),G(G(b,BITONE),_size)); \
1485 else \
1486 r = G(pcre2_get_error_message_,BITTWO)(a,G(b,BITTWO),G(G(b,BITTWO),_size))
1487
1488 #define PCRE2_GET_OVECTOR_COUNT(a,b) \
1489 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1490 a = G(pcre2_get_ovector_count_,BITONE)(G(b,BITONE)); \
1491 else \
1492 a = G(pcre2_get_ovector_count_,BITTWO)(G(b,BITTWO))
1493
1494 #define PCRE2_GET_STARTCHAR(a,b) \
1495 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1496 a = G(pcre2_get_startchar_,BITONE)(G(b,BITONE)); \
1497 else \
1498 a = G(pcre2_get_startchar_,BITTWO)(G(b,BITTWO))
1499
1500 #define PCRE2_JIT_COMPILE(r,a,b) \
1501 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1502 r = G(pcre2_jit_compile_,BITONE)(G(a,BITONE),b); \
1503 else \
1504 r = G(pcre2_jit_compile_,BITTWO)(G(a,BITTWO),b)
1505
1506 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) \
1507 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1508 G(pcre2_jit_free_unused_memory_,BITONE)(G(a,BITONE)); \
1509 else \
1510 G(pcre2_jit_free_unused_memory_,BITTWO)(G(a,BITTWO))
1511
1512 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
1513 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1514 a = G(pcre2_jit_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1515 G(g,BITONE),h); \
1516 else \
1517 a = G(pcre2_jit_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1518 G(g,BITTWO),h)
1519
1520 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
1521 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1522 a = (PCRE2_JIT_STACK *)G(pcre2_jit_stack_create_,BITONE)(b,c,d); \
1523 else \
1524 a = (PCRE2_JIT_STACK *)G(pcre2_jit_stack_create_,BITTWO)(b,c,d); \
1525
1526 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
1527 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1528 G(pcre2_jit_stack_assign_,BITONE)(G(a,BITONE),(G(pcre2_jit_callback_,BITONE))b,c); \
1529 else \
1530 G(pcre2_jit_stack_assign_,BITTWO)(G(a,BITTWO),(G(pcre2_jit_callback_,BITTWO))b,c);
1531
1532 #define PCRE2_JIT_STACK_FREE(a) \
1533 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1534 G(pcre2_jit_stack_free_,BITONE)((G(pcre2_jit_stack_,BITONE) *)a); \
1535 else \
1536 G(pcre2_jit_stack_free_,BITTWO)((G(pcre2_jit_stack_,BITTWO) *)a);
1537
1538 #define PCRE2_MAKETABLES(a) \
1539 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1540 a = G(pcre2_maketables_,BITONE)(NULL); \
1541 else \
1542 a = G(pcre2_maketables_,BITTWO)(NULL)
1543
1544 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
1545 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1546 a = G(pcre2_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1547 G(g,BITONE),h); \
1548 else \
1549 a = G(pcre2_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1550 G(g,BITTWO),h)
1551
1552 #define PCRE2_MATCH_DATA_CREATE(a,b,c) \
1553 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1554 G(a,BITONE) = G(pcre2_match_data_create_,BITONE)(b,c); \
1555 else \
1556 G(a,BITTWO) = G(pcre2_match_data_create_,BITTWO)(b,c)
1557
1558 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
1559 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1560 G(a,BITONE) = G(pcre2_match_data_create_from_pattern_,BITONE)(G(b,BITONE),c); \
1561 else \
1562 G(a,BITTWO) = G(pcre2_match_data_create_from_pattern_,BITTWO)(G(b,BITTWO),c)
1563
1564 #define PCRE2_MATCH_DATA_FREE(a) \
1565 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1566 G(pcre2_match_data_free_,BITONE)(G(a,BITONE)); \
1567 else \
1568 G(pcre2_match_data_free_,BITTWO)(G(a,BITTWO))
1569
1570 #define PCRE2_PATTERN_INFO(a,b,c,d) \
1571 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1572 a = G(pcre2_pattern_info_,BITONE)(G(b,BITONE),c,d); \
1573 else \
1574 a = G(pcre2_pattern_info_,BITTWO)(G(b,BITTWO),c,d)
1575
1576 #define PCRE2_PRINTINT(a) \
1577 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1578 G(pcre2_printint_,BITONE)(G(compiled_code,BITONE),outfile,a); \
1579 else \
1580 G(pcre2_printint_,BITTWO)(G(compiled_code,BITTWO),outfile,a)
1581
1582 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
1583 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1584 r = G(pcre2_serialize_decode_,BITONE)((G(pcre2_code_,BITONE) **)a,b,c,G(d,BITONE)); \
1585 else \
1586 r = G(pcre2_serialize_decode_,BITTWO)((G(pcre2_code_,BITTWO) **)a,b,c,G(d,BITTWO))
1587
1588 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
1589 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1590 r = G(pcre2_serialize_encode_,BITONE)((G(const pcre2_code_,BITONE) **)a,b,c,d,G(e,BITONE)); \
1591 else \
1592 r = G(pcre2_serialize_encode_,BITTWO)((G(const pcre2_code_,BITTWO) **)a,b,c,d,G(e,BITTWO))
1593
1594 #define PCRE2_SERIALIZE_FREE(a) \
1595 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1596 G(pcre2_serialize_free_,BITONE)(a); \
1597 else \
1598 G(pcre2_serialize_free_,BITTWO)(a)
1599
1600 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
1601 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1602 r = G(pcre2_serialize_get_number_of_codes_,BITONE)(a); \
1603 else \
1604 r = G(pcre2_serialize_get_number_of_codes_,BITTWO)(a)
1605
1606 #define PCRE2_SET_CALLOUT(a,b,c) \
1607 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1608 G(pcre2_set_callout_,BITONE)(G(a,BITONE), \
1609 (int (*)(G(pcre2_callout_block_,BITONE) *, void *))b,c); \
1610 else \
1611 G(pcre2_set_callout_,BITTWO)(G(a,BITTWO), \
1612 (int (*)(G(pcre2_callout_block_,BITTWO) *, void *))b,c);
1613
1614 #define PCRE2_SET_CHARACTER_TABLES(a,b) \
1615 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1616 G(pcre2_set_character_tables_,BITONE)(G(a,BITONE),b); \
1617 else \
1618 G(pcre2_set_character_tables_,BITTWO)(G(a,BITTWO),b)
1619
1620 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
1621 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1622 G(pcre2_set_compile_recursion_guard_,BITONE)(G(a,BITONE),b,c); \
1623 else \
1624 G(pcre2_set_compile_recursion_guard_,BITTWO)(G(a,BITTWO),b,c)
1625
1626 #define PCRE2_SET_DEPTH_LIMIT(a,b) \
1627 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1628 G(pcre2_set_depth_limit_,BITONE)(G(a,BITONE),b); \
1629 else \
1630 G(pcre2_set_depth_limit_,BITTWO)(G(a,BITTWO),b)
1631
1632 #define PCRE2_SET_MATCH_LIMIT(a,b) \
1633 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1634 G(pcre2_set_match_limit_,BITONE)(G(a,BITONE),b); \
1635 else \
1636 G(pcre2_set_match_limit_,BITTWO)(G(a,BITTWO),b)
1637
1638 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) \
1639 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1640 G(pcre2_set_max_pattern_length_,BITONE)(G(a,BITONE),b); \
1641 else \
1642 G(pcre2_set_max_pattern_length_,BITTWO)(G(a,BITTWO),b)
1643
1644 #define PCRE2_SET_OFFSET_LIMIT(a,b) \
1645 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1646 G(pcre2_set_offset_limit_,BITONE)(G(a,BITONE),b); \
1647 else \
1648 G(pcre2_set_offset_limit_,BITTWO)(G(a,BITTWO),b)
1649
1650 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) \
1651 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1652 G(pcre2_set_parens_nest_limit_,BITONE)(G(a,BITONE),b); \
1653 else \
1654 G(pcre2_set_parens_nest_limit_,BITTWO)(G(a,BITTWO),b)
1655
1656 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
1657 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1658 a = G(pcre2_substitute_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1659 G(g,BITONE),G(h,BITONE),(G(PCRE2_SPTR,BITONE))i,j, \
1660 (G(PCRE2_UCHAR,BITONE) *)k,l); \
1661 else \
1662 a = G(pcre2_substitute_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1663 G(g,BITTWO),G(h,BITTWO),(G(PCRE2_SPTR,BITTWO))i,j, \
1664 (G(PCRE2_UCHAR,BITTWO) *)k,l)
1665
1666 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
1667 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1668 a = G(pcre2_substring_copy_byname_,BITONE)(G(b,BITONE),G(c,BITONE),\
1669 (G(PCRE2_UCHAR,BITONE) *)d,e); \
1670 else \
1671 a = G(pcre2_substring_copy_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),\
1672 (G(PCRE2_UCHAR,BITTWO) *)d,e)
1673
1674 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
1675 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1676 a = G(pcre2_substring_copy_bynumber_,BITONE)(G(b,BITONE),c,\
1677 (G(PCRE2_UCHAR,BITONE) *)d,e); \
1678 else \
1679 a = G(pcre2_substring_copy_bynumber_,BITTWO)(G(b,BITTWO),c,\
1680 (G(PCRE2_UCHAR,BITTWO) *)d,e)
1681
1682 #define PCRE2_SUBSTRING_FREE(a) \
1683 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1684 G(pcre2_substring_free_,BITONE)((G(PCRE2_UCHAR,BITONE) *)a); \
1685 else G(pcre2_substring_free_,BITTWO)((G(PCRE2_UCHAR,BITTWO) *)a)
1686
1687 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
1688 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1689 a = G(pcre2_substring_get_byname_,BITONE)(G(b,BITONE),G(c,BITONE),\
1690 (G(PCRE2_UCHAR,BITONE) **)d,e); \
1691 else \
1692 a = G(pcre2_substring_get_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),\
1693 (G(PCRE2_UCHAR,BITTWO) **)d,e)
1694
1695 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
1696 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1697 a = G(pcre2_substring_get_bynumber_,BITONE)(G(b,BITONE),c,\
1698 (G(PCRE2_UCHAR,BITONE) **)d,e); \
1699 else \
1700 a = G(pcre2_substring_get_bynumber_,BITTWO)(G(b,BITTWO),c,\
1701 (G(PCRE2_UCHAR,BITTWO) **)d,e)
1702
1703 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
1704 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1705 a = G(pcre2_substring_length_byname_,BITONE)(G(b,BITONE),G(c,BITONE),d); \
1706 else \
1707 a = G(pcre2_substring_length_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),d)
1708
1709 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
1710 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1711 a = G(pcre2_substring_length_bynumber_,BITONE)(G(b,BITONE),c,d); \
1712 else \
1713 a = G(pcre2_substring_length_bynumber_,BITTWO)(G(b,BITTWO),c,d)
1714
1715 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
1716 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1717 a = G(pcre2_substring_list_get_,BITONE)(G(b,BITONE), \
1718 (G(PCRE2_UCHAR,BITONE) ***)c,d); \
1719 else \
1720 a = G(pcre2_substring_list_get_,BITTWO)(G(b,BITTWO), \
1721 (G(PCRE2_UCHAR,BITTWO) ***)c,d)
1722
1723 #define PCRE2_SUBSTRING_LIST_FREE(a) \
1724 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1725 G(pcre2_substring_list_free_,BITONE)((G(PCRE2_SPTR,BITONE) *)a); \
1726 else \
1727 G(pcre2_substring_list_free_,BITTWO)((G(PCRE2_SPTR,BITTWO) *)a)
1728
1729 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
1730 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1731 a = G(pcre2_substring_number_from_name_,BITONE)(G(b,BITONE),G(c,BITONE)); \
1732 else \
1733 a = G(pcre2_substring_number_from_name_,BITTWO)(G(b,BITTWO),G(c,BITTWO))
1734
1735 #define PTR(x) ( \
1736 (test_mode == G(G(PCRE,BITONE),_MODE))? (void *)G(x,BITONE) : \
1737 (void *)G(x,BITTWO))
1738
1739 #define SETFLD(x,y,z) \
1740 if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE)->y = z; \
1741 else G(x,BITTWO)->y = z
1742
1743 #define SETFLDVEC(x,y,v,z) \
1744 if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE)->y[v] = z; \
1745 else G(x,BITTWO)->y[v] = z
1746
1747 #define SETOP(x,y,z) \
1748 if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE) z y; \
1749 else G(x,BITTWO) z y
1750
1751 #define SETCASTPTR(x,y) \
1752 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1753 G(x,BITONE) = (G(G(uint,BITONE),_t) *)(y); \
1754 else \
1755 G(x,BITTWO) = (G(G(uint,BITTWO),_t) *)(y)
1756
1757 #define STRLEN(p) ((test_mode == G(G(PCRE,BITONE),_MODE))? \
1758 G(strlen,BITONE)((G(PCRE2_SPTR,BITONE))p) : \
1759 G(strlen,BITTWO)((G(PCRE2_SPTR,BITTWO))p))
1760
1761 #define SUB1(a,b) \
1762 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1763 G(a,BITONE)(G(b,BITONE)); \
1764 else \
1765 G(a,BITTWO)(G(b,BITTWO))
1766
1767 #define SUB2(a,b,c) \
1768 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1769 G(a,BITONE))(G(b,BITONE),G(c,BITONE)); \
1770 else \
1771 G(a,BITTWO))(G(b,BITTWO),G(c,BITTWO))
1772
1773 #define TEST(x,r,y) ( \
1774 (test_mode == G(G(PCRE,BITONE),_MODE) && G(x,BITONE) r (y)) || \
1775 (test_mode == G(G(PCRE,BITTWO),_MODE) && G(x,BITTWO) r (y)))
1776
1777 #define TESTFLD(x,f,r,y) ( \
1778 (test_mode == G(G(PCRE,BITONE),_MODE) && G(x,BITONE)->f r (y)) || \
1779 (test_mode == G(G(PCRE,BITTWO),_MODE) && G(x,BITTWO)->f r (y)))
1780
1781
1782 #endif /* Two out of three modes */
1783
1784 /* ----- End of cases where more than one mode is supported ----- */
1785
1786
1787 /* ----- Only 8-bit mode is supported ----- */
1788
1789 #elif defined SUPPORT_PCRE2_8
1790 #define CASTFLD(t,a,b) (t)(G(a,8)->b)
1791 #define CASTVAR(t,x) (t)G(x,8)
1792 #define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR8)(a))[b])
1793 #define DATCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_match_context_8))
1794 #define FLD(a,b) G(a,8)->b
1795 #define PATCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_compile_context_8))
1796 #define PCHARS(lv, p, offset, len, utf, f) \
1797 lv = pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
1798 #define PCHARSV(p, offset, len, utf, f) \
1799 (void)pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
1800 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
1801 a = pcre2_callout_enumerate_8(compiled_code8, \
1802 (int (*)(struct pcre2_callout_enumerate_block_8 *, void *))b,c)
1803 #define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,8) = pcre2_code_copy_8(b)
1804 #define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_8(G(b,8))
1805 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_8(G(b,8))
1806 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
1807 G(a,8) = pcre2_compile_8(G(b,8),c,d,e,f,g)
1808 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
1809 a = pcre2_dfa_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h,i,j)
1810 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
1811 r = pcre2_get_error_message_8(a,G(b,8),G(G(b,8),_size))
1812 #define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_8(G(b,8))
1813 #define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_8(G(b,8))
1814 #define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_8(G(a,8),b)
1815 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_8(G(a,8))
1816 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
1817 a = pcre2_jit_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h)
1818 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
1819 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_8(b,c,d);
1820 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
1821 pcre2_jit_stack_assign_8(G(a,8),(pcre2_jit_callback_8)b,c);
1822 #define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_8((pcre2_jit_stack_8 *)a);
1823 #define PCRE2_MAKETABLES(a) a = pcre2_maketables_8(NULL)
1824 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
1825 a = pcre2_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h)
1826 #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,8) = pcre2_match_data_create_8(b,c)
1827 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
1828 G(a,8) = pcre2_match_data_create_from_pattern_8(G(b,8),c)
1829 #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_8(G(a,8))
1830 #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_8(G(b,8),c,d)
1831 #define PCRE2_PRINTINT(a) pcre2_printint_8(compiled_code8,outfile,a)
1832 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
1833 r = pcre2_serialize_decode_8((pcre2_code_8 **)a,b,c,G(d,8))
1834 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
1835 r = pcre2_serialize_encode_8((const pcre2_code_8 **)a,b,c,d,G(e,8))
1836 #define PCRE2_SERIALIZE_FREE(a) pcre2_serialize_free_8(a)
1837 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
1838 r = pcre2_serialize_get_number_of_codes_8(a)
1839 #define PCRE2_SET_CALLOUT(a,b,c) \
1840 pcre2_set_callout_8(G(a,8),(int (*)(pcre2_callout_block_8 *, void *))b,c)
1841 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_8(G(a,8),b)
1842 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
1843 pcre2_set_compile_recursion_guard_8(G(a,8),b,c)
1844 #define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_8(G(a,8),b)
1845 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_8(G(a,8),b)
1846 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_8(G(a,8),b)
1847 #define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_8(G(a,8),b)
1848 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_8(G(a,8),b)
1849 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
1850 a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),G(h,8), \
1851 (PCRE2_SPTR8)i,j,(PCRE2_UCHAR8 *)k,l)
1852 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
1853 a = pcre2_substring_copy_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 *)d,e)
1854 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
1855 a = pcre2_substring_copy_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 *)d,e)
1856 #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_8((PCRE2_UCHAR8 *)a)
1857 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
1858 a = pcre2_substring_get_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 **)d,e)
1859 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
1860 a = pcre2_substring_get_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 **)d,e)
1861 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
1862 a = pcre2_substring_length_byname_8(G(b,8),G(c,8),d)
1863 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
1864 a = pcre2_substring_length_bynumber_8(G(b,8),c,d)
1865 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
1866 a = pcre2_substring_list_get_8(G(b,8),(PCRE2_UCHAR8 ***)c,d)
1867 #define PCRE2_SUBSTRING_LIST_FREE(a) \
1868 pcre2_substring_list_free_8((PCRE2_SPTR8 *)a)
1869 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
1870 a = pcre2_substring_number_from_name_8(G(b,8),G(c,8));
1871 #define PTR(x) (void *)G(x,8)
1872 #define SETFLD(x,y,z) G(x,8)->y = z
1873 #define SETFLDVEC(x,y,v,z) G(x,8)->y[v] = z
1874 #define SETOP(x,y,z) G(x,8) z y
1875 #define SETCASTPTR(x,y) G(x,8) = (uint8_t *)(y)
1876 #define STRLEN(p) (int)strlen((char *)p)
1877 #define SUB1(a,b) G(a,8)(G(b,8))
1878 #define SUB2(a,b,c) G(a,8)(G(b,8),G(c,8))
1879 #define TEST(x,r,y) (G(x,8) r (y))
1880 #define TESTFLD(x,f,r,y) (G(x,8)->f r (y))
1881
1882
1883 /* ----- Only 16-bit mode is supported ----- */
1884
1885 #elif defined SUPPORT_PCRE2_16
1886 #define CASTFLD(t,a,b) (t)(G(a,16)->b)
1887 #define CASTVAR(t,x) (t)G(x,16)
1888 #define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR16)(a))[b])
1889 #define DATCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_match_context_16))
1890 #define FLD(a,b) G(a,16)->b
1891 #define PATCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_compile_context_16))
1892 #define PCHARS(lv, p, offset, len, utf, f) \
1893 lv = pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f)
1894 #define PCHARSV(p, offset, len, utf, f) \
1895 (void)pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f)
1896 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
1897 a = pcre2_callout_enumerate_16(compiled_code16, \
1898 (int (*)(struct pcre2_callout_enumerate_block_16 *, void *))b,c)
1899 #define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,16) = pcre2_code_copy_16(b)
1900 #define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_16(G(b,16))
1901 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_16(G(b,16))
1902 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
1903 G(a,16) = pcre2_compile_16(G(b,16),c,d,e,f,g)
1904 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
1905 a = pcre2_dfa_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h,i,j)
1906 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
1907 r = pcre2_get_error_message_16(a,G(b,16),G(G(b,16),_size))
1908 #define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_16(G(b,16))
1909 #define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_16(G(b,16))
1910 #define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_16(G(a,16),b)
1911 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_16(G(a,16))
1912 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
1913 a = pcre2_jit_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h)
1914 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
1915 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_16(b,c,d);
1916 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
1917 pcre2_jit_stack_assign_16(G(a,16),(pcre2_jit_callback_16)b,c);
1918 #define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_16((pcre2_jit_stack_16 *)a);
1919 #define PCRE2_MAKETABLES(a) a = pcre2_maketables_16(NULL)
1920 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
1921 a = pcre2_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h)
1922 #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,16) = pcre2_match_data_create_16(b,c)
1923 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
1924 G(a,16) = pcre2_match_data_create_from_pattern_16(G(b,16),c)
1925 #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_16(G(a,16))
1926 #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_16(G(b,16),c,d)
1927 #define PCRE2_PRINTINT(a) pcre2_printint_16(compiled_code16,outfile,a)
1928 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
1929 r = pcre2_serialize_decode_16((pcre2_code_16 **)a,b,c,G(d,16))
1930 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
1931 r = pcre2_serialize_encode_16((const pcre2_code_16 **)a,b,c,d,G(e,16))
1932 #define PCRE2_SERIALIZE_FREE(a) pcre2_serialize_free_16(a)
1933 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
1934 r = pcre2_serialize_get_number_of_codes_16(a)
1935 #define PCRE2_SET_CALLOUT(a,b,c) \
1936 pcre2_set_callout_16(G(a,16),(int (*)(pcre2_callout_block_16 *, void *))b,c);
1937 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_16(G(a,16),b)
1938 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
1939 pcre2_set_compile_recursion_guard_16(G(a,16),b,c)
1940 #define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_16(G(a,16),b)
1941 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_16(G(a,16),b)
1942 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_16(G(a,16),b)
1943 #define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_16(G(a,16),b)
1944 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_16(G(a,16),b)
1945 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
1946 a = pcre2_substitute_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),G(h,16), \
1947 (PCRE2_SPTR16)i,j,(PCRE2_UCHAR16 *)k,l)
1948 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
1949 a = pcre2_substring_copy_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 *)d,e)
1950 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
1951 a = pcre2_substring_copy_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 *)d,e)
1952 #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_16((PCRE2_UCHAR16 *)a)
1953 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
1954 a = pcre2_substring_get_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 **)d,e)
1955 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
1956 a = pcre2_substring_get_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 **)d,e)
1957 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
1958 a = pcre2_substring_length_byname_16(G(b,16),G(c,16),d)
1959 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
1960 a = pcre2_substring_length_bynumber_16(G(b,16),c,d)
1961 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
1962 a = pcre2_substring_list_get_16(G(b,16),(PCRE2_UCHAR16 ***)c,d)
1963 #define PCRE2_SUBSTRING_LIST_FREE(a) \
1964 pcre2_substring_list_free_16((PCRE2_SPTR16 *)a)
1965 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
1966 a = pcre2_substring_number_from_name_16(G(b,16),G(c,16));
1967 #define PTR(x) (void *)G(x,16)
1968 #define SETFLD(x,y,z) G(x,16)->y = z
1969 #define SETFLDVEC(x,y,v,z) G(x,16)->y[v] = z
1970 #define SETOP(x,y,z) G(x,16) z y
1971 #define SETCASTPTR(x,y) G(x,16) = (uint16_t *)(y)
1972 #define STRLEN(p) (int)strlen16((PCRE2_SPTR16)p)
1973 #define SUB1(a,b) G(a,16)(G(b,16))
1974 #define SUB2(a,b,c) G(a,16)(G(b,16),G(c,16))
1975 #define TEST(x,r,y) (G(x,16) r (y))
1976 #define TESTFLD(x,f,r,y) (G(x,16)->f r (y))
1977
1978
1979 /* ----- Only 32-bit mode is supported ----- */
1980
1981 #elif defined SUPPORT_PCRE2_32
1982 #define CASTFLD(t,a,b) (t)(G(a,32)->b)
1983 #define CASTVAR(t,x) (t)G(x,32)
1984 #define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR32)(a))[b])
1985 #define DATCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_match_context_32))
1986 #define FLD(a,b) G(a,32)->b
1987 #define PATCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_compile_context_32))
1988 #define PCHARS(lv, p, offset, len, utf, f) \
1989 lv = pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f)
1990 #define PCHARSV(p, offset, len, utf, f) \
1991 (void)pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f)
1992 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
1993 a = pcre2_callout_enumerate_32(compiled_code32, \
1994 (int (*)(struct pcre2_callout_enumerate_block_32 *, void *))b,c)
1995 #define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,32) = pcre2_code_copy_32(b)
1996 #define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_32(G(b,32))
1997 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_32(G(b,32))
1998 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
1999 G(a,32) = pcre2_compile_32(G(b,32),c,d,e,f,g)
2000 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
2001 a = pcre2_dfa_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h,i,j)
2002 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
2003 r = pcre2_get_error_message_32(a,G(b,32),G(G(b,32),_size))
2004 #define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_32(G(b,32))
2005 #define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_32(G(b,32))
2006 #define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_32(G(a,32),b)
2007 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_32(G(a,32))
2008 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
2009 a = pcre2_jit_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
2010 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
2011 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_32(b,c,d);
2012 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
2013 pcre2_jit_stack_assign_32(G(a,32),(pcre2_jit_callback_32)b,c);
2014 #define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_32((pcre2_jit_stack_32 *)a);
2015 #define PCRE2_MAKETABLES(a) a = pcre2_maketables_32(NULL)
2016 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
2017 a = pcre2_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
2018 #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,32) = pcre2_match_data_create_32(b,c)
2019 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
2020 G(a,32) = pcre2_match_data_create_from_pattern_32(G(b,32),c)
2021 #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_32(G(a,32))
2022 #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_32(G(b,32),c,d)
2023 #define PCRE2_PRINTINT(a) pcre2_printint_32(compiled_code32,outfile,a)
2024 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
2025 r = pcre2_serialize_decode_32((pcre2_code_32 **)a,b,c,G(d,32))
2026 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
2027 r = pcre2_serialize_encode_32((const pcre2_code_32 **)a,b,c,d,G(e,32))
2028 #define PCRE2_SERIALIZE_FREE(a) pcre2_serialize_free_32(a)
2029 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
2030 r = pcre2_serialize_get_number_of_codes_32(a)
2031 #define PCRE2_SET_CALLOUT(a,b,c) \
2032 pcre2_set_callout_32(G(a,32),(int (*)(pcre2_callout_block_32 *, void *))b,c);
2033 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_32(G(a,32),b)
2034 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
2035 pcre2_set_compile_recursion_guard_32(G(a,32),b,c)
2036 #define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_32(G(a,32),b)
2037 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_32(G(a,32),b)
2038 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_32(G(a,32),b)
2039 #define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_32(G(a,32),b)
2040 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_32(G(a,32),b)
2041 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
2042 a = pcre2_substitute_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),G(h,32), \
2043 (PCRE2_SPTR32)i,j,(PCRE2_UCHAR32 *)k,l)
2044 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
2045 a = pcre2_substring_copy_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 *)d,e)
2046 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
2047 a = pcre2_substring_copy_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 *)d,e);
2048 #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_32((PCRE2_UCHAR32 *)a)
2049 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
2050 a = pcre2_substring_get_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 **)d,e)
2051 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
2052 a = pcre2_substring_get_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 **)d,e)
2053 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
2054 a = pcre2_substring_length_byname_32(G(b,32),G(c,32),d)
2055 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
2056 a = pcre2_substring_length_bynumber_32(G(b,32),c,d)
2057 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
2058 a = pcre2_substring_list_get_32(G(b,32),(PCRE2_UCHAR32 ***)c,d)
2059 #define PCRE2_SUBSTRING_LIST_FREE(a) \
2060 pcre2_substring_list_free_32((PCRE2_SPTR32 *)a)
2061 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
2062 a = pcre2_substring_number_from_name_32(G(b,32),G(c,32));
2063 #define PTR(x) (void *)G(x,32)
2064 #define SETFLD(x,y,z) G(x,32)->y = z
2065 #define SETFLDVEC(x,y,v,z) G(x,32)->y[v] = z
2066 #define SETOP(x,y,z) G(x,32) z y
2067 #define SETCASTPTR(x,y) G(x,32) = (uint32_t *)(y)
2068 #define STRLEN(p) (int)strlen32((PCRE2_SPTR32)p)
2069 #define SUB1(a,b) G(a,32)(G(b,32))
2070 #define SUB2(a,b,c) G(a,32)(G(b,32),G(c,32))
2071 #define TEST(x,r,y) (G(x,32) r (y))
2072 #define TESTFLD(x,f,r,y) (G(x,32)->f r (y))
2073
2074 #endif
2075
2076 /* ----- End of mode-specific function call macros ----- */
2077
2078
2079
2080
2081 /*************************************************
2082 * Alternate character tables *
2083 *************************************************/
2084
2085 /* By default, the "tables" pointer in the compile context when calling
2086 pcre2_compile() is not set (= NULL), thereby using the default tables of the
2087 library. However, the tables modifier can be used to select alternate sets of
2088 tables, for different kinds of testing. Note that the locale modifier also
2089 adjusts the tables. */
2090
2091 /* This is the set of tables distributed as default with PCRE2. It recognizes
2092 only ASCII characters. */
2093
2094 static const uint8_t tables1[] = {
2095
2096 /* This table is a lower casing table. */
2097
2098 0, 1, 2, 3, 4, 5, 6, 7,
2099 8, 9, 10, 11, 12, 13, 14, 15,
2100 16, 17, 18, 19, 20, 21, 22, 23,
2101 24, 25, 26, 27, 28, 29, 30, 31,
2102 32, 33, 34, 35, 36, 37, 38, 39,
2103 40, 41, 42, 43, 44, 45, 46, 47,
2104 48, 49, 50, 51, 52, 53, 54, 55,
2105 56, 57, 58, 59, 60, 61, 62, 63,
2106 64, 97, 98, 99,100,101,102,103,
2107 104,105,106,107,108,109,110,111,
2108 112,113,114,115,116,117,118,119,
2109 120,121,122, 91, 92, 93, 94, 95,
2110 96, 97, 98, 99,100,101,102,103,
2111 104,105,106,107,108,109,110,111,
2112 112,113,114,115,116,117,118,119,
2113 120,121,122,123,124,125,126,127,
2114 128,129,130,131,132,133,134,135,
2115 136,137,138,139,140,141,142,143,
2116 144,145,146,147,148,149,150,151,
2117 152,153,154,155,156,157,158,159,
2118 160,161,162,163,164,165,166,167,
2119 168,169,170,171,172,173,174,175,
2120 176,177,178,179,180,181,182,183,
2121 184,185,186,187,188,189,190,191,
2122 192,193,194,195,196,197,198,199,
2123 200,201,202,203,204,205,206,207,
2124 208,209,210,211,212,213,214,215,
2125 216,217,218,219,220,221,222,223,
2126 224,225,226,227,228,229,230,231,
2127 232,233,234,235,236,237,238,239,
2128 240,241,242,243,244,245,246,247,
2129 248,249,250,251,252,253,254,255,
2130
2131 /* This table is a case flipping table. */
2132
2133 0, 1, 2, 3, 4, 5, 6, 7,
2134 8, 9, 10, 11, 12, 13, 14, 15,
2135 16, 17, 18, 19, 20, 21, 22, 23,
2136 24, 25, 26, 27, 28, 29, 30, 31,
2137 32, 33, 34, 35, 36, 37, 38, 39,
2138 40, 41, 42, 43, 44, 45, 46, 47,
2139 48, 49, 50, 51, 52, 53, 54, 55,
2140 56, 57, 58, 59, 60, 61, 62, 63,
2141 64, 97, 98, 99,100,101,102,103,
2142 104,105,106,107,108,109,110,111,
2143 112,113,114,115,116,117,118,119,
2144 120,121,122, 91, 92, 93, 94, 95,
2145 96, 65, 66, 67, 68, 69, 70, 71,
2146 72, 73, 74, 75, 76, 77, 78, 79,
2147 80, 81, 82, 83, 84, 85, 86, 87,
2148 88, 89, 90,123,124,125,126,127,
2149 128,129,130,131,132,133,134,135,
2150 136,137,138,139,140,141,142,143,
2151 144,145,146,147,148,149,150,151,
2152 152,153,154,155,156,157,158,159,
2153 160,161,162,163,164,165,166,167,
2154 168,169,170,171,172,173,174,175,
2155 176,177,178,179,180,181,182,183,
2156 184,185,186,187,188,189,190,191,
2157 192,193,194,195,196,197,198,199,
2158 200,201,202,203,204,205,206,207,
2159 208,209,210,211,212,213,214,215,
2160 216,217,218,219,220,221,222,223,
2161 224,225,226,227,228,229,230,231,
2162 232,233,234,235,236,237,238,239,
2163 240,241,242,243,244,245,246,247,
2164 248,249,250,251,252,253,254,255,
2165
2166 /* This table contains bit maps for various character classes. Each map is 32
2167 bytes long and the bits run from the least significant end of each byte. The
2168 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
2169 graph, print, punct, and cntrl. Other classes are built from combinations. */
2170
2171 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
2172 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2173 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2174 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2175
2176 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
2177 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
2178 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2179 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2180
2181 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
2182 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2183 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2184 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2185
2186 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2187 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
2188 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2189 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2190
2191 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2192 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
2193 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2194 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2195
2196 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
2197 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
2198 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2199 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2200
2201 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
2202 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
2203 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2204 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2205
2206 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
2207 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
2208 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2209 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2210
2211 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
2212 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
2213 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2214 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2215
2216 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
2217 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
2218 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2219 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2220
2221 /* This table identifies various classes of character by individual bits:
2222 0x01 white space character
2223 0x02 letter
2224 0x04 decimal digit
2225 0x08 hexadecimal digit
2226 0x10 alphanumeric or '_'
2227 0x80 regular expression metacharacter or binary zero
2228 */
2229
2230 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
2231 0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /* 8- 15 */
2232 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
2233 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
2234 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
2235 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
2236 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
2237 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
2238 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
2239 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
2240 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
2241 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
2242 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
2243 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
2244 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
2245 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
2246 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
2247 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
2248 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
2249 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
2250 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
2251 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
2252 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
2253 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
2254 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
2255 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
2256 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
2257 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
2258 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
2259 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
2260 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
2261 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
2262
2263 /* This is a set of tables that came originally from a Windows user. It seems
2264 to be at least an approximation of ISO 8859. In particular, there are
2265 characters greater than 128 that are marked as spaces, letters, etc. */
2266
2267 static const uint8_t tables2[] = {
2268 0,1,2,3,4,5,6,7,
2269 8,9,10,11,12,13,14,15,
2270 16,17,18,19,20,21,22,23,
2271 24,25,26,27,28,29,30,31,
2272 32,33,34,35,36,37,38,39,
2273 40,41,42,43,44,45,46,47,
2274 48,49,50,51,52,53,54,55,
2275 56,57,58,59,60,61,62,63,
2276 64,97,98,99,100,101,102,103,
2277 104,105,106,107,108,109,110,111,
2278 112,113,114,115,116,117,118,119,
2279 120,121,122,91,92,93,94,95,
2280 96,97,98,99,100,101,102,103,
2281 104,105,106,107,108,109,110,111,
2282 112,113,114,115,116,117,118,119,
2283 120,121,122,123,124,125,126,127,
2284 128,129,130,131,132,133,134,135,
2285 136,137,138,139,140,141,142,143,
2286 144,145,146,147,148,149,150,151,
2287 152,153,154,155,156,157,158,159,
2288 160,161,162,163,164,165,166,167,
2289 168,169,170,171,172,173,174,175,
2290 176,177,178,179,180,181,182,183,
2291 184,185,186,187,188,189,190,191,
2292 224,225,226,227,228,229,230,231,
2293 232,233,234,235,236,237,238,239,
2294 240,241,242,243,244,245,246,215,
2295 248,249,250,251,252,253,254,223,
2296 224,225,226,227,228,229,230,231,
2297 232,233,234,235,236,237,238,239,
2298 240,241,242,243,244,245,246,247,
2299 248,249,250,251,252,253,254,255,
2300 0,1,2,3,4,5,6,7,
2301 8,9,10,11,12,13,14,15,
2302 16,17,18,19,20,21,22,23,
2303 24,25,26,27,28,29,30,31,
2304 32,33,34,35,36,37,38,39,
2305 40,41,42,43,44,45,46,47,
2306 48,49,50,51,52,53,54,55,
2307 56,57,58,59,60,61,62,63,
2308 64,97,98,99,100,101,102,103,
2309 104,105,106,107,108,109,110,111,
2310 112,113,114,115,116,117,118,119,
2311 120,121,122,91,92,93,94,95,
2312 96,65,66,67,68,69,70,71,
2313 72,73,74,75,76,77,78,79,
2314 80,81,82,83,84,85,86,87,
2315 88,89,90,123,124,125,126,127,
2316 128,129,130,131,132,133,134,135,
2317 136,137,138,139,140,141,142,143,
2318 144,145,146,147,148,149,150,151,
2319 152,153,154,155,156,157,158,159,
2320 160,161,162,163,164,165,166,167,
2321 168,169,170,171,172,173,174,175,
2322 176,177,178,179,180,181,182,183,
2323 184,185,186,187,188,189,190,191,
2324 224,225,226,227,228,229,230,231,
2325 232,233,234,235,236,237,238,239,
2326 240,241,242,243,244,245,246,215,
2327 248,249,250,251,252,253,254,223,
2328 192,193,194,195,196,197,198,199,
2329 200,201,202,203,204,205,206,207,
2330 208,209,210,211,212,213,214,247,
2331 216,217,218,219,220,221,222,255,
2332 0,62,0,0,1,0,0,0,
2333 0,0,0,0,0,0,0,0,
2334 32,0,0,0,1,0,0,0,
2335 0,0,0,0,0,0,0,0,
2336 0,0,0,0,0,0,255,3,
2337 126,0,0,0,126,0,0,0,
2338 0,0,0,0,0,0,0,0,
2339 0,0,0,0,0,0,0,0,
2340 0,0,0,0,0,0,255,3,
2341 0,0,0,0,0,0,0,0,
2342 0,0,0,0,0,0,12,2,
2343 0,0,0,0,0,0,0,0,
2344 0,0,0,0,0,0,0,0,
2345 254,255,255,7,0,0,0,0,
2346 0,0,0,0,0,0,0,0,
2347 255,255,127,127,0,0,0,0,
2348 0,0,0,0,0,0,0,0,
2349 0,0,0,0,254,255,255,7,
2350 0,0,0,0,0,4,32,4,
2351 0,0,0,128,255,255,127,255,
2352 0,0,0,0,0,0,255,3,
2353 254,255,255,135,254,255,255,7,
2354 0,0,0,0,0,4,44,6,
2355 255,255,127,255,255,255,127,255,
2356 0,0,0,0,254,255,255,255,
2357 255,255,255,255,255,255,255,127,
2358 0,0,0,0,254,255,255,255,
2359 255,255,255,255,255,255,255,255,
2360 0,2,0,0,255,255,255,255,
2361 255,255,255,255,255,255,255,127,
2362 0,0,0,0,255,255,255,255,
2363 255,255,255,255,255,255,255,255,
2364 0,0,0,0,254,255,0,252,
2365 1,0,0,248,1,0,0,120,
2366 0,0,0,0,254,255,255,255,
2367 0,0,128,0,0,0,128,0,
2368 255,255,255,255,0,0,0,0,
2369 0,0,0,0,0,0,0,128,
2370 255,255,255,255,0,0,0,0,
2371 0,0,0,0,0,0,0,0,
2372 128,0,0,0,0,0,0,0,
2373 0,1,1,0,1,1,0,0,
2374 0,0,0,0,0,0,0,0,
2375 0,0,0,0,0,0,0,0,
2376 1,0,0,0,128,0,0,0,
2377 128,128,128,128,0,0,128,0,
2378 28,28,28,28,28,28,28,28,
2379 28,28,0,0,0,0,0,128,
2380 0,26,26,26,26,26,26,18,
2381 18,18,18,18,18,18,18,18,
2382 18,18,18,18,18,18,18,18,
2383 18,18,18,128,128,0,128,16,
2384 0,26,26,26,26,26,26,18,
2385 18,18,18,18,18,18,18,18,
2386 18,18,18,18,18,18,18,18,
2387 18,18,18,128,128,0,0,0,
2388 0,0,0,0,0,1,0,0,
2389 0,0,0,0,0,0,0,0,
2390 0,0,0,0,0,0,0,0,
2391 0,0,0,0,0,0,0,0,
2392 1,0,0,0,0,0,0,0,
2393 0,0,18,0,0,0,0,0,
2394 0,0,20,20,0,18,0,0,
2395 0,20,18,0,0,0,0,0,
2396 18,18,18,18,18,18,18,18,
2397 18,18,18,18,18,18,18,18,
2398 18,18,18,18,18,18,18,0,
2399 18,18,18,18,18,18,18,18,
2400 18,18,18,18,18,18,18,18,
2401 18,18,18,18,18,18,18,18,
2402 18,18,18,18,18,18,18,0,
2403 18,18,18,18,18,18,18,18
2404 };
2405
2406
2407 #ifndef HAVE_STRERROR
2408 /*************************************************
2409 * Provide strerror() for non-ANSI libraries *
2410 *************************************************/
2411
2412 /* Some old-fashioned systems (e.g. SunOS4) didn't have strerror() in their
2413 libraries. They may no longer be around, but just in case, we can try to
2414 provide the same facility by this simple alternative function. */
2415
2416 extern int sys_nerr;
2417 extern char *sys_errlist[];
2418
2419 char *
2420 strerror(int n)
2421 {
2422 if (n < 0 || n >= sys_nerr) return "unknown error number";
2423 return sys_errlist[n];
2424 }
2425 #endif /* HAVE_STRERROR */
2426
2427
2428
2429 /*************************************************
2430 * Local memory functions *
2431 *************************************************/
2432
2433 /* Alternative memory functions, to test functionality. */
2434
2435 static void *my_malloc(size_t size, void *data)
2436 {
2437 void *block = malloc(size);
2438 (void)data;
2439 if (show_memory)
2440 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
2441 return block;
2442 }
2443
2444 static void my_free(void *block, void *data)
2445 {
2446 (void)data;
2447 if (show_memory)
2448 fprintf(outfile, "free %p\n", block);
2449 free(block);
2450 }
2451
2452
2453
2454 /*************************************************
2455 * Callback function for stack guard *
2456 *************************************************/
2457
2458 /* This is set up to be called from pcre2_compile() when the stackguard=n
2459 modifier sets a value greater than zero. The test we do is whether the
2460 parenthesis nesting depth is greater than the value set by the modifier.
2461
2462 Argument: the current parenthesis nesting depth
2463 Returns: non-zero to kill the compilation
2464 */
2465
2466 static int
2467 stack_guard(uint32_t depth, void *user_data)
2468 {
2469 (void)user_data;
2470 return depth > pat_patctl.stackguard_test;
2471 }
2472
2473
2474 /*************************************************
2475 * JIT memory callback *
2476 *************************************************/
2477
2478 static PCRE2_JIT_STACK*
2479 jit_callback(void *arg)
2480 {
2481 jit_was_used = TRUE;
2482 return (PCRE2_JIT_STACK *)arg;
2483 }
2484
2485
2486 /*************************************************
2487 * Convert UTF-8 character to code point *
2488 *************************************************/
2489
2490 /* This function reads one or more bytes that represent a UTF-8 character,
2491 and returns the codepoint of that character. Note that the function supports
2492 the original UTF-8 definition of RFC 2279, allowing for values in the range 0
2493 to 0x7fffffff, up to 6 bytes long. This makes it possible to generate
2494 codepoints greater than 0x10ffff which are useful for testing PCRE2's error
2495 checking, and also for generating 32-bit non-UTF data values above the UTF
2496 limit.
2497
2498 Argument:
2499 utf8bytes a pointer to the byte vector
2500 vptr a pointer to an int to receive the value
2501
2502 Returns: > 0 => the number of bytes consumed
2503 -6 to 0 => malformed UTF-8 character at offset = (-return)
2504 */
2505
2506 static int
2507 utf82ord(PCRE2_SPTR8 utf8bytes, uint32_t *vptr)
2508 {
2509 uint32_t c = *utf8bytes++;
2510 uint32_t d = c;
2511 int i, j, s;
2512
2513 for (i = -1; i < 6; i++) /* i is number of additional bytes */
2514 {
2515 if ((d & 0x80) == 0) break;
2516 d <<= 1;
2517 }
2518
2519 if (i == -1) { *vptr = c; return 1; } /* ascii character */
2520 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
2521
2522 /* i now has a value in the range 1-5 */
2523
2524 s = 6*i;
2525 d = (c & utf8_table3[i]) << s;
2526
2527 for (j = 0; j < i; j++)
2528 {
2529 c = *utf8bytes++;
2530 if ((c & 0xc0) != 0x80) return -(j+1);
2531 s -= 6;
2532 d |= (c & 0x3f) << s;
2533 }
2534
2535 /* Check that encoding was the correct unique one */
2536
2537 for (j = 0; j < utf8_table1_size; j++)
2538 if (d <= (uint32_t)utf8_table1[j]) break;
2539 if (j != i) return -(i+1);
2540
2541 /* Valid value */
2542
2543 *vptr = d;
2544 return i+1;
2545 }
2546
2547
2548
2549 /*************************************************
2550 * Print one character *
2551 *************************************************/
2552
2553 /* Print a single character either literally, or as a hex escape, and count how
2554 many printed characters are used.
2555
2556 Arguments:
2557 c the character
2558 utf TRUE in UTF mode
2559 f the FILE to print to, or NULL just to count characters
2560
2561 Returns: number of characters written
2562 */
2563
2564 static int
2565 pchar(uint32_t c, BOOL utf, FILE *f)
2566 {
2567 int n = 0;
2568 char tempbuffer[16];
2569
2570 if (PRINTOK(c))
2571 {
2572 if (f != NULL) fprintf(f, "%c", c);
2573 return 1;
2574 }
2575
2576 if (c < 0x100)
2577 {
2578 if (utf)
2579 {
2580 if (f != NULL) fprintf(f, "\\x{%02x}", c);
2581 return 6;
2582 }
2583 else
2584 {
2585 if (f != NULL) fprintf(f, "\\x%02x", c);
2586 return 4;
2587 }
2588 }
2589
2590 if (f != NULL) n = fprintf(f, "\\x{%02x}", c);
2591 else n = sprintf(tempbuffer, "\\x{%02x}", c);
2592
2593 return n >= 0 ? n : 0;
2594 }
2595
2596
2597
2598 #ifdef SUPPORT_PCRE2_16
2599 /*************************************************
2600 * Find length of 0-terminated 16-bit string *
2601 *************************************************/
2602
2603 static size_t strlen16(PCRE2_SPTR16 p)
2604 {
2605 PCRE2_SPTR16 pp = p;
2606 while (*pp != 0) pp++;
2607 return (int)(pp - p);
2608 }
2609 #endif /* SUPPORT_PCRE2_16 */
2610
2611
2612
2613 #ifdef SUPPORT_PCRE2_32
2614 /*************************************************
2615 * Find length of 0-terminated 32-bit string *
2616 *************************************************/
2617
2618 static size_t strlen32(PCRE2_SPTR32 p)
2619 {
2620 PCRE2_SPTR32 pp = p;
2621 while (*pp != 0) pp++;
2622 return (int)(pp - p);
2623 }
2624 #endif /* SUPPORT_PCRE2_32 */
2625
2626
2627 #ifdef SUPPORT_PCRE2_8
2628 /*************************************************
2629 * Print 8-bit character string *
2630 *************************************************/
2631
2632 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
2633 For printing *MARK strings, a negative length is given. If handed a NULL file,
2634 just counts chars without printing (because pchar() does that). */
2635
2636 static int pchars8(PCRE2_SPTR8 p, int length, BOOL utf, FILE *f)
2637 {
2638 uint32_t c = 0;
2639 int yield = 0;
2640
2641 if (length < 0) length = p[-1];
2642 while (length-- > 0)
2643 {
2644 if (utf)
2645 {
2646 int rc = utf82ord(p, &c);
2647 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
2648 {
2649 length -= rc - 1;
2650 p += rc;
2651 yield += pchar(c, utf, f);
2652 continue;
2653 }
2654 }
2655 c = *p++;
2656 yield += pchar(c, utf, f);
2657 }
2658
2659 return yield;
2660 }
2661 #endif
2662
2663
2664 #ifdef SUPPORT_PCRE2_16
2665 /*************************************************
2666 * Print 16-bit character string *
2667 *************************************************/
2668
2669 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
2670 For printing *MARK strings, a negative length is given. If handed a NULL file,
2671 just counts chars without printing. */
2672
2673 static int pchars16(PCRE2_SPTR16 p, int length, BOOL utf, FILE *f)
2674 {
2675 int yield = 0;
2676 if (length < 0) length = p[-1];
2677 while (length-- > 0)
2678 {
2679 uint32_t c = *p++ & 0xffff;
2680 if (utf && c >= 0xD800 && c < 0xDC00 && length > 0)
2681 {
2682 int d = *p & 0xffff;
2683 if (d >= 0xDC00 && d <= 0xDFFF)
2684 {
2685 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
2686 length--;
2687 p++;
2688 }
2689 }
2690 yield += pchar(c, utf, f);
2691 }
2692 return yield;
2693 }
2694 #endif /* SUPPORT_PCRE2_16 */
2695
2696
2697
2698 #ifdef SUPPORT_PCRE2_32
2699 /*************************************************
2700 * Print 32-bit character string *
2701 *************************************************/
2702
2703 /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
2704 For printing *MARK strings, a negative length is given.If handed a NULL file,
2705 just counts chars without printing. */
2706
2707 static int pchars32(PCRE2_SPTR32 p, int length, BOOL utf, FILE *f)
2708 {
2709 int yield = 0;
2710 (void)(utf); /* Avoid compiler warning */
2711 if (length < 0) length = p[-1];
2712 while (length-- > 0)
2713 {
2714 uint32_t c = *p++;
2715 yield += pchar(c, utf, f);
2716 }
2717 return yield;
2718 }
2719 #endif /* SUPPORT_PCRE2_32 */
2720
2721
2722
2723
2724 #ifdef SUPPORT_PCRE2_8
2725 /*************************************************
2726 * Convert character value to UTF-8 *
2727 *************************************************/
2728
2729 /* This function takes an integer value in the range 0 - 0x7fffffff
2730 and encodes it as a UTF-8 character in 0 to 6 bytes.
2731
2732 Arguments:
2733 cvalue the character value
2734 utf8bytes pointer to buffer for result - at least 6 bytes long
2735
2736 Returns: number of characters placed in the buffer
2737 */
2738
2739 static int
2740 ord2utf8(uint32_t cvalue, uint8_t *utf8bytes)
2741 {
2742 int i, j;
2743 if (cvalue > 0x7fffffffu)
2744 return -1;
2745 for (i = 0; i < utf8_table1_size; i++)
2746 if (cvalue <= (uint32_t)utf8_table1[i]) break;
2747 utf8bytes += i;
2748 for (j = i; j > 0; j--)
2749 {
2750 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
2751 cvalue >>= 6;
2752 }
2753 *utf8bytes = utf8_table2[i] | cvalue;
2754 return i + 1;
2755 }
2756 #endif /* SUPPORT_PCRE2_8 */
2757
2758
2759
2760 #ifdef SUPPORT_PCRE2_16
2761 /*************************************************
2762 * Convert string to 16-bit *
2763 *************************************************/
2764
2765 /* In UTF mode the input is always interpreted as a string of UTF-8 bytes using
2766 the original UTF-8 definition of RFC 2279, which allows for up to 6 bytes, and
2767 code values from 0 to 0x7fffffff. However, values greater than the later UTF
2768 limit of 0x10ffff cause an error. In non-UTF mode the input is interpreted as
2769 UTF-8 if the utf8_input modifier is set, but an error is generated for values
2770 greater than 0xffff.
2771
2772 If all the input bytes are ASCII, the space needed for a 16-bit string is
2773 exactly double the 8-bit size. Otherwise, the size needed for a 16-bit string
2774 is no more than double, because up to 0xffff uses no more than 3 bytes in UTF-8
2775 but possibly 4 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes
2776 in UTF-16. The result is always left in pbuffer16. Impose a minimum size to
2777 save repeated re-sizing.
2778
2779 Note that this function does not object to surrogate values. This is
2780 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
2781 for the purpose of testing that they are correctly faulted.
2782
2783 Arguments:
2784 p points to a byte string
2785 utf true in UTF mode
2786 lenptr points to number of bytes in the string (excluding trailing zero)
2787
2788 Returns: 0 on success, with the length updated to the number of 16-bit
2789 data items used (excluding the trailing zero)
2790 OR -1 if a UTF-8 string is malformed
2791 OR -2 if a value > 0x10ffff is encountered in UTF mode
2792 OR -3 if a value > 0xffff is encountered when not in UTF mode
2793 */
2794
2795 static PCRE2_SIZE
2796 to16(uint8_t *p, int utf, PCRE2_SIZE *lenptr)
2797 {
2798 uint16_t *pp;
2799 PCRE2_SIZE len = *lenptr;
2800
2801 if (pbuffer16_size < 2*len + 2)
2802 {
2803 if (pbuffer16 != NULL) free(pbuffer16);
2804 pbuffer16_size = 2*len + 2;
2805 if (pbuffer16_size < 256) pbuffer16_size = 256;
2806 pbuffer16 = (uint16_t *)malloc(pbuffer16_size);
2807 if (pbuffer16 == NULL)
2808 {
2809 fprintf(stderr, "pcre2test: malloc(%lu) failed for pbuffer16\n",
2810 (unsigned long int)pbuffer16_size);
2811 exit(1);
2812 }
2813 }
2814
2815 pp = pbuffer16;
2816 if (!utf && (pat_patctl.control & CTL_UTF8_INPUT) == 0)
2817 {
2818 for (; len > 0; len--) *pp++ = *p++;
2819 }
2820 else while (len > 0)
2821 {
2822 uint32_t c;
2823 int chlen = utf82ord(p, &c);
2824 if (chlen <= 0) return -1;
2825 if (!utf && c > 0xffff) return -3;
2826 if (c > 0x10ffff) return -2;
2827 p += chlen;
2828 len -= chlen;
2829 if (c < 0x10000) *pp++ = c; else
2830 {
2831 c -= 0x10000;
2832 *pp++ = 0xD800 | (c >> 10);
2833 *pp++ = 0xDC00 | (c & 0x3ff);
2834 }
2835 }
2836
2837 *pp = 0;
2838 *lenptr = pp - pbuffer16;
2839 return 0;
2840 }
2841 #endif
2842
2843
2844
2845 #ifdef SUPPORT_PCRE2_32
2846 /*************************************************
2847 * Convert string to 32-bit *
2848 *************************************************/
2849
2850 /* In UTF mode the input is always interpreted as a string of UTF-8 bytes using
2851 the original UTF-8 definition of RFC 2279, which allows for up to 6 bytes, and
2852 code values from 0 to 0x7fffffff. However, values greater than the later UTF
2853 limit of 0x10ffff cause an error.
2854
2855 In non-UTF mode the input is interpreted as UTF-8 if the utf8_input modifier
2856 is set, and no limit is imposed. There is special interpretation of the 0xff
2857 byte (which is illegal in UTF-8) in this case: it causes the top bit of the
2858 next character to be set. This provides a way of generating 32-bit characters
2859 greater than 0x7fffffff.
2860
2861 If all the input bytes are ASCII, the space needed for a 32-bit string is
2862 exactly four times the 8-bit size. Otherwise, the size needed for a 32-bit
2863 string is no more than four times, because the number of characters must be
2864 less than the number of bytes. The result is always left in pbuffer32. Impose a
2865 minimum size to save repeated re-sizing.
2866
2867 Note that this function does not object to surrogate values. This is
2868 deliberate; it makes it possible to construct UTF-32 strings that are invalid,
2869 for the purpose of testing that they are correctly faulted.
2870
2871 Arguments:
2872 p points to a byte string
2873 utf true in UTF mode
2874 lenptr points to number of bytes in the string (excluding trailing zero)
2875
2876 Returns: 0 on success, with the length updated to the number of 32-bit
2877 data items used (excluding the trailing zero)
2878 OR -1 if a UTF-8 string is malformed
2879 OR -2 if a value > 0x10ffff is encountered in UTF mode
2880 */
2881
2882 static PCRE2_SIZE
2883 to32(uint8_t *p, int utf, PCRE2_SIZE *lenptr)
2884 {
2885 uint32_t *pp;
2886 PCRE2_SIZE len = *lenptr;
2887
2888 if (pbuffer32_size < 4*len + 4)
2889 {
2890 if (pbuffer32 != NULL) free(pbuffer32);
2891 pbuffer32_size = 4*len + 4;
2892 if (pbuffer32_size < 256) pbuffer32_size = 256;
2893 pbuffer32 = (uint32_t *)malloc(pbuffer32_size);
2894 if (pbuffer32 == NULL)
2895 {
2896 fprintf(stderr, "pcre2test: malloc(%lu) failed for pbuffer32\n",
2897 (unsigned long int)pbuffer32_size);
2898 exit(1);
2899 }
2900 }
2901
2902 pp = pbuffer32;
2903
2904 if (!utf && (pat_patctl.control & CTL_UTF8_INPUT) == 0)
2905 {
2906 for (; len > 0; len--) *pp++ = *p++;
2907 }
2908
2909 else while (len > 0)
2910 {
2911 int chlen;
2912 uint32_t c;
2913 uint32_t topbit = 0;
2914 if (!utf && *p == 0xff && len > 1)
2915 {
2916 topbit = 0x80000000u;
2917 p++;
2918 len--;
2919 }
2920 chlen = utf82ord(p, &c);
2921 if (chlen <= 0) return -1;
2922 if (utf && c > 0x10ffff) return -2;
2923 p += chlen;
2924 len -= chlen;
2925 *pp++ = c | topbit;
2926 }
2927
2928 *pp = 0;
2929 *lenptr = pp - pbuffer32;
2930 return 0;
2931 }
2932 #endif /* SUPPORT_PCRE2_32 */
2933
2934
2935
2936 /*************************************************
2937 * Move back by so many characters *
2938 *************************************************/
2939
2940 /* Given a code unit offset in a subject string, move backwards by a number of
2941 characters, and return the resulting offset.
2942
2943 Arguments:
2944 subject pointer to the string
2945 offset start offset
2946 count count to move back by
2947 utf TRUE if in UTF mode
2948
2949 Returns: a possibly changed offset
2950 */
2951
2952 static PCRE2_SIZE
2953 backchars(uint8_t *subject, PCRE2_SIZE offset, uint32_t count, BOOL utf)
2954 {
2955 if (!utf || test_mode == PCRE32_MODE)
2956 return (count >= offset)? 0 : (offset - count);
2957
2958 else if (test_mode == PCRE8_MODE)
2959 {
2960 PCRE2_SPTR8 pp = (PCRE2_SPTR8)subject + offset;
2961 for (; count > 0 && pp > (PCRE2_SPTR8)subject; count--)
2962 {
2963 pp--;
2964 while ((*pp & 0xc0) == 0x80) pp--;
2965 }
2966 return pp - (PCRE2_SPTR8)subject;
2967 }
2968
2969 else /* 16-bit mode */
2970 {
2971 PCRE2_SPTR16 pp = (PCRE2_SPTR16)subject + offset;
2972 for (; count > 0 && pp > (PCRE2_SPTR16)subject; count--)
2973 {
2974 pp--;
2975 if ((*pp & 0xfc00) == 0xdc00) pp--;
2976 }
2977 return pp - (PCRE2_SPTR16)subject;
2978 }
2979 }
2980
2981
2982
2983 /*************************************************
2984 * Expand input buffers *
2985 *************************************************/
2986
2987 /* This function doubles the size of the input buffer and the buffer for
2988 keeping an 8-bit copy of patterns (pbuffer8), and copies the current buffers to
2989 the new ones.
2990
2991 Arguments: none
2992 Returns: nothing (aborts if malloc() fails)
2993 */
2994
2995 static void
2996 expand_input_buffers(void)
2997 {
2998 int new_pbuffer8_size = 2*pbuffer8_size;
2999 uint8_t *new_buffer = (uint8_t *)malloc(new_pbuffer8_size);
3000 uint8_t *new_pbuffer8 = (uint8_t *)malloc(new_pbuffer8_size);
3001
3002 if (new_buffer == NULL || new_pbuffer8 == NULL)
3003 {
3004 fprintf(stderr, "pcre2test: malloc(%d) failed\n", new_pbuffer8_size);
3005 exit(1);
3006 }
3007
3008 memcpy(new_buffer, buffer, pbuffer8_size);
3009 memcpy(new_pbuffer8, pbuffer8, pbuffer8_size);
3010
3011 pbuffer8_size = new_pbuffer8_size;
3012
3013 free(buffer);
3014 free(pbuffer8);
3015
3016 buffer = new_buffer;
3017 pbuffer8 = new_pbuffer8;
3018 }
3019
3020
3021
3022 /*************************************************
3023 * Read or extend an input line *
3024 *************************************************/
3025
3026 /* Input lines are read into buffer, but both patterns and data lines can be
3027 continued over multiple input lines. In addition, if the buffer fills up, we
3028 want to automatically expand it so as to be able to handle extremely large
3029 lines that are needed for certain stress tests, although this is less likely
3030 now that there are repetition features for both patterns and data. When the
3031 input buffer is expanded, the other two buffers must also be expanded likewise,
3032 and the contents of pbuffer, which are a copy of the input for callouts, must
3033 be preserved (for when expansion happens for a data line). This is not the most
3034 optimal way of handling this, but hey, this is just a test program!
3035
3036 Arguments:
3037 f the file to read
3038 start where in buffer to start (this *must* be within buffer)
3039 prompt for stdin or readline()
3040
3041 Returns: pointer to the start of new data
3042 could be a copy of start, or could be moved
3043 NULL if no data read and EOF reached
3044 */
3045
3046 static uint8_t *
3047 extend_inputline(FILE *f, uint8_t *start, const char *prompt)
3048 {
3049 uint8_t *here = start;
3050
3051 for (;;)
3052 {
3053 size_t rlen = (size_t)(pbuffer8_size - (here - buffer));
3054
3055 if (rlen > 1000)
3056 {
3057 size_t dlen;
3058
3059 /* If libreadline or libedit support is required, use readline() to read a
3060 line if the input is a terminal. Note that readline() removes the trailing
3061 newline, so we must put it back again, to be compatible with fgets(). */
3062
3063 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
3064 if (INTERACTIVE(f))
3065 {
3066 size_t len;
3067 char *s = readline(prompt);
3068 if (s == NULL) return (here == start)? NULL : start;
3069 len = strlen(s);
3070 if (len > 0) add_history(s);
3071 if (len > rlen - 1) len = rlen - 1;
3072 memcpy(here, s, len);
3073 here[len] = '\n';
3074 here[len+1] = 0;
3075 free(s);
3076 }
3077 else
3078 #endif
3079
3080 /* Read the next line by normal means, prompting if the file is a tty. */
3081
3082 {
3083 if (INTERACTIVE(f)) printf("%s", prompt);
3084 if (fgets((char *)here, rlen, f) == NULL)
3085 return (here == start)? NULL : start;
3086 }
3087
3088 dlen = strlen((char *)here);
3089 here += dlen;
3090
3091 /* Check for end of line reached. Take care not to read data from before
3092 start (dlen will be zero for a file starting with a binary zero). */
3093
3094 if (here > start && here[-1] == '\n') return start;
3095
3096 /* If we have not read a newline when reading a file, we have either filled
3097 the buffer or reached the end of the file. We can detect the former by
3098 checking that the string fills the buffer, and the latter by feof(). If
3099 neither of these is true, it means we read a binary zero which has caused
3100 strlen() to give a short length. This is a hard error because pcre2test
3101 expects to work with C strings. */
3102
3103 if (!INTERACTIVE(f) && dlen < rlen - 1 && !feof(f))
3104 {
3105 fprintf(outfile, "** Binary zero encountered in input\n");
3106 fprintf(outfile, "** pcre2test run abandoned\n");
3107 exit(1);
3108 }
3109 }
3110
3111 else
3112 {
3113 size_t start_offset = start - buffer;
3114 size_t here_offset = here - buffer;
3115 expand_input_buffers();
3116 start = buffer + start_offset;
3117 here = buffer + here_offset;
3118 }
3119 }
3120
3121 /* Control never gets here */
3122 }
3123
3124
3125
3126 /*************************************************
3127 * Case-independent strncmp() function *
3128 *************************************************/
3129
3130 /*
3131 Arguments:
3132 s first string
3133 t second string
3134 n number of characters to compare
3135
3136 Returns: < 0, = 0, or > 0, according to the comparison
3137 */
3138
3139 static int
3140 strncmpic(const uint8_t *s, const uint8_t *t, int n)
3141 {
3142 while (n--)
3143 {
3144 int c = tolower(*s++) - tolower(*t++);
3145 if (c) return c;
3146 }
3147 return 0;
3148 }
3149
3150
3151
3152 /*************************************************
3153 * Scan the main modifier list *
3154 *************************************************/
3155
3156 /* This function searches the modifier list for a long modifier name.
3157
3158 Argument:
3159 p start of the name
3160 lenp length of the name
3161
3162 Returns: an index in the modifier list, or -1 on failure
3163 */
3164
3165 static int
3166 scan_modifiers(const uint8_t *p, unsigned int len)
3167 {
3168 int bot = 0;
3169 int top = MODLISTCOUNT;
3170
3171 while (top > bot)
3172 {
3173 int mid = (bot + top)/2;
3174 unsigned int mlen = strlen(modlist[mid].name);
3175 int c = strncmp((char *)p, modlist[mid].name, (len < mlen)? len : mlen);
3176 if (c == 0)
3177 {
3178 if (len == mlen) return mid;
3179 c = (int)len - (int)mlen;
3180 }
3181 if (c > 0) bot = mid + 1; else top = mid;
3182 }
3183
3184 return -1;
3185
3186 }
3187
3188
3189
3190 /*************************************************
3191 * Check a modifer and find its field *
3192 *************************************************/
3193
3194 /* This function is called when a modifier has been identified. We check that
3195 it is allowed here and find the field that is to be changed.
3196
3197 Arguments:
3198 m the modifier list entry
3199 ctx CTX_PAT => pattern context
3200 CTX_POPPAT => pattern context for popped pattern
3201 CTX_DEFPAT => default pattern context
3202 CTX_DAT => data context
3203 CTX_DEFDAT => default data context
3204 pctl point to pattern control block
3205 dctl point to data control block
3206 c a single character or 0
3207
3208 Returns: a field pointer or NULL
3209 */
3210
3211 static void *
3212 check_modifier(modstruct *m, int ctx, patctl *pctl, datctl *dctl, uint32_t c)
3213 {
3214 void *field = NULL;
3215 PCRE2_SIZE offset = m->offset;
3216
3217 if (restrict_for_perl_test) switch(m->which)
3218 {
3219 case MOD_PNDP:
3220 case MOD_PATP:
3221 case MOD_PDP:
3222 break;
3223
3224 default:
3225 fprintf(outfile, "** '%s' is not allowed in a Perl-compatible test\n",
3226 m->name);
3227 return NULL;
3228 }
3229
3230 switch (m->which)
3231 {
3232 case MOD_CTC: /* Compile context modifier */
3233 if (ctx == CTX_DEFPAT) field = PTR(default_pat_context);
3234 else if (ctx == CTX_PAT) field = PTR(pat_context);
3235 break;
3236
3237 case MOD_CTM: /* Match context modifier */
3238 if (ctx == CTX_DEFDAT) field = PTR(default_dat_context);
3239 else if (ctx == CTX_DAT) field = PTR(dat_context);
3240 break;
3241
3242 case MOD_DAT: /* Data line modifier */
3243 if (dctl != NULL) field = dctl;
3244 break;
3245
3246 case MOD_PAT: /* Pattern modifier */
3247 case MOD_PATP: /* Allowed for Perl test */
3248 if (pctl != NULL) field = pctl;
3249 break;
3250
3251 case MOD_PD: /* Pattern or data line modifier */
3252 case MOD_PDP: /* Ditto, allowed for Perl test */
3253 case MOD_PND: /* Ditto, but not default pattern */
3254 case MOD_PNDP: /* Ditto, allowed for Perl test */
3255 if (dctl != NULL) field = dctl;
3256 else if (pctl != NULL && (m->which == MOD_PD || ctx != CTX_DEFPAT))
3257 field = pctl;
3258 break;
3259 }
3260
3261 if (field == NULL)
3262 {
3263 if (c == 0)
3264 fprintf(outfile, "** '%s' is not valid here\n", m->name);
3265 else
3266 fprintf(outfile, "** /%c is not valid here\n", c);
3267 return NULL;
3268 }
3269
3270 return (char *)field + offset;
3271 }
3272
3273
3274
3275 /*************************************************
3276 * Decode a modifier list *
3277 *************************************************/
3278
3279 /* A pointer to a control block is NULL when called in cases when that block is
3280 not relevant. They are never all relevant in one call. At least one of patctl
3281 and datctl is NULL. The second argument specifies which context to use for
3282 modifiers that apply to contexts.
3283
3284 Arguments:
3285 p point to modifier string
3286 ctx CTX_PAT => pattern context
3287 CTX_POPPAT => pattern context for popped pattern
3288 CTX_DEFPAT => default pattern context
3289 CTX_DAT => data context
3290 CTX_DEFDAT => default data context
3291 pctl point to pattern control block
3292 dctl point to data control block
3293
3294 Returns: TRUE if successful decode, FALSE otherwise
3295 */
3296
3297 static BOOL
3298 decode_modifiers(uint8_t *p, int ctx, patctl *pctl, datctl *dctl)
3299 {
3300 uint8_t *ep, *pp;
3301 long li;
3302 unsigned long uli;
3303 BOOL first = TRUE;
3304
3305 for (;;)
3306 {
3307 void *field;
3308 modstruct *m;
3309 BOOL off = FALSE;
3310 unsigned int i, len;
3311 int index;
3312 char *endptr;
3313
3314 /* Skip white space and commas. */
3315
3316 while (isspace(*p) || *p == ',') p++;
3317 if (*p == 0) break;
3318
3319 /* Find the end of the item; lose trailing whitespace at end of line. */
3320
3321 for (ep = p; *ep != 0 && *ep != ','; ep++);
3322 if (*ep == 0)
3323 {
3324 while (ep > p && isspace(ep[-1])) ep--;
3325 *ep = 0;
3326 }
3327
3328 /* Remember if the first character is '-'. */
3329
3330 if (*p == '-')
3331 {
3332 off = TRUE;
3333 p++;
3334 }
3335
3336 /* Find the length of a full-length modifier name, and scan for it. */
3337
3338 pp = p;
3339 while (pp < ep && *pp != '=') pp++;
3340 index = scan_modifiers(p, pp - p);
3341
3342 /* If the first modifier is unrecognized, try to interpret it as a sequence
3343 of single-character abbreviated modifiers. None of these modifiers have any
3344 associated data. They just set options or control bits. */
3345
3346 if (index < 0)
3347 {
3348 uint32_t cc;
3349 uint8_t *mp = p;
3350
3351 if (!first)
3352 {
3353 fprintf(outfile, "** Unrecognized modifier '%.*s'\n", (int)(ep-p), p);
3354 if (ep - p == 1)
3355 fprintf(outfile, "** Single-character modifiers must come first\n");
3356 return FALSE;
3357 }
3358
3359 for (cc = *p; cc != ',' && cc != '\n' && cc != 0; cc = *(++p))
3360 {
3361 for (i = 0; i < C1MODLISTCOUNT; i++)
3362 if (cc == c1modlist[i].onechar) break;
3363
3364 if (i >= C1MODLISTCOUNT)
3365 {
3366 fprintf(outfile, "** Unrecognized modifier '%c' in '%.*s'\n",
3367 *p, (int)(ep-mp), mp);
3368 return FALSE;
3369 }
3370
3371 if (c1modlist[i].index >= 0)
3372 {
3373 index = c1modlist[i].index;
3374 }
3375
3376 else
3377 {
3378 index = scan_modifiers((uint8_t *)(c1modlist[i].fullname),
3379 strlen(c1modlist[i].fullname));
3380 if (index < 0)
3381 {
3382 fprintf(outfile, "** Internal error: single-character equivalent "
3383 "modifier '%s' not found\n", c1modlist[i].fullname);
3384 return FALSE;
3385 }
3386 c1modlist[i].index = index; /* Cache for next time */
3387 }
3388
3389 field = check_modifier(modlist + index, ctx, pctl, dctl, *p);
3390 if (field == NULL) return FALSE;
3391 *((uint32_t *)field) |= modlist[index].value;
3392 }
3393
3394 continue; /* With tne next (fullname) modifier */
3395 }
3396
3397 /* We have a match on a full-name modifier. Check for the existence of data
3398 when needed. */
3399
3400 m = modlist + index; /* Save typing */
3401 if (m->type != MOD_CTL && m->type != MOD_OPT &&
3402 (m->type != MOD_IND || *pp == '='))
3403 {
3404 if (*pp++ != '=')
3405 {
3406 fprintf(outfile, "** '=' expected after '%s'\n", m->name);
3407 return FALSE;
3408 }
3409 if (off)
3410 {
3411 fprintf(outfile, "** '-' is not valid for '%s'\n", m->name);
3412 return FALSE;
3413 }
3414 }
3415
3416 /* These on/off types have no data. */
3417
3418 else if (*pp != ',' && *pp != '\n' && *pp != ' ' && *pp != 0)
3419 {
3420 fprintf(outfile, "** Unrecognized modifier '%.*s'\n", (int)(ep-p), p);
3421 return FALSE;
3422 }
3423
3424 /* Set the data length for those types that have data. Then find the field
3425 that is to be set. If check_modifier() returns NULL, it has already output an
3426 error message. */
3427
3428 len = ep - pp;
3429 field = check_modifier(m, ctx, pctl, dctl, 0);
3430 if (field == NULL) return FALSE;
3431
3432 /* Process according to data type. */
3433
3434 switch (m->type)
3435 {
3436 case MOD_CTL:
3437 case MOD_OPT:
3438 if (off) *((uint32_t *)field) &= ~m->value;
3439 else *((uint32_t *)field) |= m->value;
3440 break;
3441
3442 case MOD_BSR:
3443 if (len == 7 && strncmpic(pp, (const uint8_t *)"default", 7) == 0)
3444 {
3445 #ifdef BSR_ANYCRLF
3446 *((uint16_t *)field) = PCRE2_BSR_ANYCRLF;
3447 #else
3448 *((uint16_t *)field) = PCRE2_BSR_UNICODE;
3449 #endif
3450 if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 &= ~CTL_BSR_SET;
3451 else dctl->control2 &= ~CTL_BSR_SET;
3452 }
3453 else
3454 {
3455 if (len == 7 && strncmpic(pp, (const uint8_t *)"anycrlf", 7) == 0)
3456 *((uint16_t *)field) = PCRE2_BSR_ANYCRLF;
3457 else if (len == 7 && strncmpic(pp, (const uint8_t *)"unicode", 7) == 0)
3458 *((uint16_t *)field) = PCRE2_BSR_UNICODE;
3459 else goto INVALID_VALUE;
3460 if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 |= CTL_BSR_SET;
3461 else dctl->control2 |= CTL_BSR_SET;
3462 }
3463 pp = ep;
3464 break;
3465
3466 case MOD_IN2: /* One or two unsigned integers */
3467 if (!isdigit(*pp)) goto INVALID_VALUE;
3468 uli = strtoul((const char *)pp, &endptr, 10);
3469 if (U32OVERFLOW(uli)) goto INVALID_VALUE;
3470 ((uint32_t *)field)[0] = (uint32_t)uli;
3471 if (*endptr == ':')
3472 {
3473 uli = strtoul((const char *)endptr+1, &endptr, 10);
3474 if (U32OVERFLOW(uli)) goto INVALID_VALUE;
3475 ((uint32_t *)field)[1] = (uint32_t)uli;
3476 }
3477 else ((uint32_t *)field)[1] = 0;
3478 pp = (uint8_t *)endptr;
3479 break;
3480
3481 /* PCRE2_SIZE_MAX is usually SIZE_MAX, which may be greater, equal to, or
3482 less than ULONG_MAX. So first test for overflowing the long int, and then
3483 test for overflowing PCRE2_SIZE_MAX if it is smaller than ULONG_MAX. */
3484
3485 case MOD_SIZ: /* PCRE2_SIZE value */
3486 if (!isdigit(*pp)) goto INVALID_VALUE;
3487 uli = strtoul((const char *)pp, &endptr, 10);
3488 if (uli == ULONG_MAX) goto INVALID_VALUE;
3489 #if ULONG_MAX > PCRE2_SIZE_MAX
3490 if (uli > PCRE2_SIZE_MAX) goto INVALID_VALUE;
3491 #endif
3492 *((PCRE2_SIZE *)field) = (PCRE2_SIZE)uli;
3493 pp = (uint8_t *)endptr;
3494 break;
3495
3496 case MOD_IND: /* Unsigned integer with default */
3497 if (len == 0)
3498 {
3499 *((uint32_t *)field) = (uint32_t)(m->value);
3500 break;
3501 }
3502 /* Fall through */
3503
3504 case MOD_INT: /* Unsigned integer */
3505 if (!isdigit(*pp)) goto INVALID_VALUE;
3506 uli = strtoul((const char *)pp, &endptr, 10);
3507 if (U32OVERFLOW(uli)) goto INVALID_VALUE;
3508 *((uint32_t *)field) = (uint32_t)uli;
3509 pp = (uint8_t *)endptr;
3510 break;
3511
3512 case MOD_INS: /* Signed integer */
3513 if (!isdigit(*pp) && *pp != '-') goto INVALID_VALUE;
3514 li = strtol((const char *)pp, &endptr, 10);
3515 if (S32OVERFLOW(li)) goto INVALID_VALUE;
3516 *((int32_t *)field) = (int32_t)li;
3517 pp = (uint8_t *)endptr;
3518 break;
3519
3520 case MOD_NL:
3521 for (i = 0; i < sizeof(newlines)/sizeof(char *); i++)
3522 if (len == strlen(newlines[i]) &&
3523 strncmpic(pp, (const uint8_t *)newlines[i], len) == 0) break;
3524 if (i >= sizeof(newlines)/sizeof(char *)) goto INVALID_VALUE;
3525 if (i == 0)
3526 {
3527 *((uint16_t *)field) = NEWLINE_DEFAULT;
3528 if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 &= ~CTL_NL_SET;
3529 else dctl->control2 &= ~CTL_NL_SET;
3530 }
3531 else
3532 {
3533 *((uint16_t *)field) = i;
3534 if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 |= CTL_NL_SET;
3535 else dctl->control2 |= CTL_NL_SET;
3536 }
3537 pp = ep;
3538 break;
3539
3540 case MOD_NN: /* Name or (signed) number; may be several */
3541 if (isdigit(*pp) || *pp == '-')
3542 {
3543 int ct = MAXCPYGET - 1;
3544 int32_t value;
3545 li = strtol((const char *)pp, &endptr, 10);
3546 if (S32OVERFLOW(li)) goto INVALID_VALUE;
3547 value = (int32_t)li;
3548 field = (char *)field - m->offset + m->value; /* Adjust field ptr */
3549 if (value >= 0) /* Add new number */
3550 {
3551 while (*((int32_t *)field) >= 0 && ct-- > 0) /* Skip previous */
3552 field = (char *)field + sizeof(int32_t);
3553 if (ct <= 0)
3554 {
3555 fprintf(outfile, "** Too many numeric '%s' modifiers\n", m->name);
3556 return FALSE;
3557 }
3558 }
3559 *((int32_t *)field) = value;
3560 if (ct > 0) ((int32_t *)field)[1] = -1;
3561 pp = (uint8_t *)endptr;
3562 }
3563
3564 /* Multiple strings are put end to end. */
3565
3566 else
3567 {
3568 char *nn = (char *)field;
3569 if (len > 0) /* Add new name */
3570 {
3571 if (len > MAX_NAME_SIZE)
3572 {
3573 fprintf(outfile, "** Group name in '%s' is too long\n", m->name);
3574 return FALSE;
3575 }
3576 while (*nn != 0) nn += strlen(nn) + 1;
3577 if (nn + len + 2 - (char *)field > LENCPYGET)
3578 {
3579 fprintf(outfile, "** Too many characters in named '%s' modifiers\n",
3580 m->name);
3581 return FALSE;
3582 }
3583 memcpy(nn, pp, len);
3584 }
3585 nn[len] = 0 ;
3586 nn[len+1] = 0;
3587 pp = ep;
3588 }
3589 break;
3590
3591 case MOD_STR:
3592 if (len + 1 > m->value)
3593 {
3594 fprintf(outfile, "** Overlong value for '%s' (max %d code units)\n",
3595 m->name, m->value - 1);
3596 return FALSE;
3597 }
3598 memcpy(field, pp, len);
3599 ((uint8_t *)field)[len] = 0;
3600 pp = ep;
3601 break;
3602 }
3603
3604 if (*pp != ',' && *pp != '\n' && *pp != ' ' && *pp != 0)
3605 {
3606 fprintf(outfile, "** Comma expected after modifier item '%s'\n", m->name);
3607 return FALSE;
3608 }
3609
3610 p = pp;
3611 first = FALSE;
3612
3613 if (ctx == CTX_POPPAT &&
3614 (pctl->options != 0 ||
3615 pctl->tables_id != 0 ||
3616 pctl->locale[0] != 0 ||
3617 (pctl->control & NOTPOP_CONTROLS) != 0))
3618 {
3619 fprintf(outfile, "** '%s' is not valid here\n", m->name);
3620 return FALSE;
3621 }
3622 }
3623
3624 return TRUE;
3625
3626 INVALID_VALUE:
3627 fprintf(outfile, "** Invalid value in '%.*s'\n", (int)(ep-p), p);
3628 return FALSE;
3629 }
3630
3631
3632 /*************************************************
3633 * Get info from a pattern *
3634 *************************************************/
3635
3636 /* A wrapped call to pcre2_pattern_info(), applied to the current compiled
3637 pattern.
3638
3639 Arguments:
3640 what code for the required information
3641 where where to put the answer
3642 unsetok PCRE2_ERROR_UNSET is an "expected" result
3643
3644 Returns: the return from pcre2_pattern_info()
3645 */
3646
3647 static int
3648 pattern_info(int what, void *where, BOOL unsetok)
3649 {
3650 int rc;
3651 PCRE2_PATTERN_INFO(rc, compiled_code, what, where);
3652 if (rc >= 0) return 0;
3653 if (rc != PCRE2_ERROR_UNSET || !unsetok)
3654 {
3655 fprintf(outfile, "Error %d from pcre2_pattern_info_%d(%d)\n", rc, test_mode,
3656 what);
3657 if (rc == PCRE2_ERROR_BADMODE)
3658 fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
3659 "%d-bit mode\n", test_mode,
3660 8 * (FLD(compiled_code, flags) & PCRE2_MODE_MASK));
3661 }
3662 return rc;
3663 }
3664
3665
3666
3667 #ifdef SUPPORT_PCRE2_8
3668 /*************************************************
3669 * Show something in a list *
3670 *************************************************/
3671
3672 /* This function just helps to keep the code that uses it tidier. It's used for
3673 various lists of things where there needs to be introductory text before the
3674 first item. As these calls are all in the POSIX-support code, they happen only
3675 when 8-bit mode is supported. */
3676
3677 static void
3678 prmsg(const char **msg, const char *s)
3679 {
3680 fprintf(outfile, "%s %s", *msg, s);
3681 *msg = "";
3682 }
3683 #endif /* SUPPORT_PCRE2_8 */
3684
3685
3686
3687 /*************************************************
3688 * Show control bits *
3689 *************************************************/
3690
3691 /* Called for mutually exclusive controls and for unsupported POSIX controls.
3692 Because the bits are unique, this can be used for both pattern and data control
3693 words.
3694
3695 Arguments:
3696 controls control bits
3697 controls2 more control bits
3698 before text to print before
3699
3700 Returns: nothing
3701 */
3702
3703 static void
3704 show_controls(uint32_t controls, uint32_t controls2, const char *before)
3705 {
3706 fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
3707 before,
3708 ((controls & CTL_AFTERTEXT) != 0)? " aftertext" : "",
3709 ((controls & CTL_ALLAFTERTEXT) != 0)? " allaftertext" : "",
3710 ((controls & CTL_ALLCAPTURES) != 0)? " allcaptures" : "",
3711 ((controls & CTL_ALLUSEDTEXT) != 0)? " allusedtext" : "",
3712 ((controls & CTL_ALTGLOBAL) != 0)? " altglobal" : "",
3713 ((controls & CTL_BINCODE) != 0)? " bincode" : "",
3714 ((controls2 & CTL_BSR_SET) != 0)? " bsr" : "",
3715 ((controls & CTL_CALLOUT_CAPTURE) != 0)? " callout_capture" : "",
3716 ((controls & CTL_CALLOUT_INFO) != 0)? " callout_info" : "",
3717 ((controls & CTL_CALLOUT_NONE) != 0)? " callout_none" : "",
3718 ((controls & CTL_DFA) != 0)? " dfa" : "",
3719 ((controls & CTL_EXPAND) != 0)? " expand" : "",
3720 ((controls & CTL_FINDLIMITS) != 0)? " find_limits" : "",
3721 ((controls & CTL_FRAMESIZE) != 0)? " framesize" : "",
3722 ((controls & CTL_FULLBINCODE) != 0)? " fullbincode" : "",
3723 ((controls & CTL_GETALL) != 0)? " getall" : "",
3724 ((controls & CTL_GLOBAL) != 0)? " global" : "",
3725 ((controls & CTL_HEXPAT) != 0)? " hex" : "",
3726 ((controls & CTL_INFO) != 0)? " info" : "",
3727 ((controls & CTL_JITFAST) != 0)? " jitfast" : "",
3728 ((controls & CTL_JITVERIFY) != 0)? " jitverify" : "",
3729 ((controls & CTL_MARK) != 0)? " mark" : "",
3730 ((controls & CTL_MEMORY) != 0)? " memory" : "",
3731 ((controls2 & CTL_NL_SET) != 0)? " newline" : "",
3732 ((controls & CTL_NULLCONTEXT) != 0)? " null_context" : "",
3733 ((controls & CTL_POSIX) != 0)? " posix" : "",
3734 ((controls & CTL_POSIX_NOSUB) != 0)? " posix_nosub" : "",
3735 ((controls & CTL_PUSH) != 0)? " push" : "",
3736 ((controls & CTL_PUSHCOPY) != 0)? " pushcopy" : "",
3737 ((controls & CTL_PUSHTABLESCOPY) != 0)? " pushtablescopy" : "",
3738 ((controls & CTL_STARTCHAR) != 0)? " startchar" : "",
3739 ((controls2 & CTL2_SUBSTITUTE_EXTENDED) != 0)? " substitute_extended" : "",
3740 ((controls2 & CTL2_SUBSTITUTE_OVERFLOW_LENGTH) != 0)? " substitute_overflow_length" : "",
3741 ((controls2 & CTL2_SUBSTITUTE_UNKNOWN_UNSET) != 0)? " substitute_unknown_unset" : "",
3742 ((controls2 & CTL2_SUBSTITUTE_UNSET_EMPTY) != 0)? " substitute_unset_empty" : "",
3743 ((controls & CTL_USE_LENGTH) != 0)? " use_length" : "",
3744 ((controls & CTL_UTF8_INPUT) != 0)? " utf8_input" : "",
3745 ((controls & CTL_ZERO_TERMINATE) != 0)? " zero_terminate" : "");
3746 }
3747
3748
3749
3750 /*************************************************
3751 * Show compile options *
3752 *************************************************/
3753
3754 /* Called from show_pattern_info() and for unsupported POSIX options.
3755
3756 Arguments:
3757 options an options word
3758 before text to print before
3759 after text to print after
3760
3761 Returns: nothing
3762 */
3763
3764 static void
3765 show_compile_options(uint32_t options, const char *before, const char *after)
3766 {
3767 if (options == 0) fprintf(outfile, "%s <none>%s", before, after);
3768 else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
3769 before,
3770 ((options & PCRE2_ALT_BSUX) != 0)? " alt_bsux" : "",
3771 ((options & PCRE2_ALT_CIRCUMFLEX) != 0)? " alt_circumflex" : "",
3772 ((options & PCRE2_ALT_VERBNAMES) != 0)? " alt_verbnames" : "",
3773 ((options & PCRE2_ALLOW_EMPTY_CLASS) != 0)? " allow_empty_class" : "",
3774 ((options & PCRE2_ANCHORED) != 0)? " anchored" : "",
3775 ((options & PCRE2_AUTO_CALLOUT) != 0)? " auto_callout" : "",
3776 ((options & PCRE2_CASELESS) != 0)? " caseless" : "",
3777 ((options & PCRE2_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
3778 ((options & PCRE2_DOTALL) != 0)? " dotall" : "",
3779 ((options & PCRE2_DUPNAMES) != 0)? " dupnames" : "",
3780 ((options & PCRE2_EXTENDED) != 0)? " extended" : "",
3781 ((options & PCRE2_FIRSTLINE) != 0)? " firstline" : "",
3782 ((options & PCRE2_MATCH_UNSET_BACKREF) != 0)? " match_unset_backref" : "",
3783 ((options & PCRE2_MULTILINE) != 0)? " multiline" : "",
3784 ((options & PCRE2_NEVER_BACKSLASH_C) != 0)? " never_backslash_c" : "",
3785 ((options & PCRE2_NEVER_UCP) != 0)? " never_ucp" : "",
3786 ((options & PCRE2_NEVER_UTF) != 0)? " never_utf" : "",
3787 ((options & PCRE2_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3788 ((options & PCRE2_NO_AUTO_POSSESS) != 0)? " no_auto_possess" : "",
3789 ((options & PCRE2_NO_DOTSTAR_ANCHOR) != 0)? " no_dotstar_anchor" : "",
3790 ((options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
3791 ((options & PCRE2_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3792 ((options & PCRE2_UCP) != 0)? " ucp" : "",
3793 ((options & PCRE2_UNGREEDY) != 0)? " ungreedy" : "",
3794 ((options & PCRE2_USE_OFFSET_LIMIT) != 0)? " use_offset_limit" : "",
3795 ((options & PCRE2_UTF) != 0)? " utf" : "",
3796 after);
3797 }
3798
3799
3800
3801 #ifdef SUPPORT_PCRE2_8
3802 /*************************************************
3803 * Show match options *
3804 *************************************************/
3805
3806 /* Called for unsupported POSIX options. */
3807
3808 static void
3809 show_match_options(uint32_t options)
3810 {
3811 fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s",
3812 ((options & PCRE2_ANCHORED) != 0)? " anchored" : "",
3813 ((options & PCRE2_DFA_RESTART) != 0)? " dfa_restart" : "",
3814 ((options & PCRE2_DFA_SHORTEST) != 0)? " dfa_shortest" : "",
3815 ((options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
3816 ((options & PCRE2_NOTBOL) != 0)? " notbol" : "",
3817 ((options & PCRE2_NOTEMPTY) != 0)? " notempty" : "",
3818 ((options & PCRE2_NOTEMPTY_ATSTART) != 0)? " notempty_atstart" : "",
3819 ((options & PCRE2_NOTEOL) != 0)? " noteol" : "",
3820 ((options & PCRE2_PARTIAL_HARD) != 0)? " partial_hard" : "",
3821 ((options & PCRE2_PARTIAL_SOFT) != 0)? " partial_soft" : "");
3822 }
3823 #endif /* SUPPORT_PCRE2_8 */
3824
3825
3826
3827 /*************************************************
3828 * Show memory usage info for a pattern *
3829 *************************************************/
3830
3831 static void
3832 show_memory_info(void)
3833 {
3834 uint32_t name_count, name_entry_size;
3835 size_t size, cblock_size;
3836
3837 /* One of the test_mode values will always be true, but to stop a compiler
3838 warning we must initialize cblock_size. */
3839
3840 cblock_size = 0;
3841 #ifdef SUPPORT_PCRE2_8
3842 if (test_mode == PCRE8_MODE) cblock_size = sizeof(pcre2_real_code_8);
3843 #endif
3844 #ifdef SUPPORT_PCRE2_16
3845 if (test_mode == PCRE16_MODE) cblock_size = sizeof(pcre2_real_code_16);
3846 #endif
3847 #ifdef SUPPORT_PCRE2_32
3848 if (test_mode == PCRE32_MODE) cblock_size = sizeof(pcre2_real_code_32);
3849 #endif
3850
3851 (void)pattern_info(PCRE2_INFO_SIZE, &size, FALSE);
3852 (void)pattern_info(PCRE2_INFO_NAMECOUNT, &name_count, FALSE);
3853 (void)pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &name_entry_size, FALSE);
3854 fprintf(outfile, "Memory allocation (code space): %d\n",
3855 (int)(size - name_count*name_entry_size*code_unit_size - cblock_size));
3856 if (pat_patctl.jit != 0)
3857 {
3858 (void)pattern_info(PCRE2_INFO_JITSIZE, &size, FALSE);
3859 fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)size);
3860 }
3861 }
3862
3863
3864
3865 /*************************************************
3866 * Show frame size info for a pattern *
3867 *************************************************/
3868
3869 static void
3870 show_framesize(void)
3871 {
3872 size_t frame_size;
3873 (void)pattern_info(PCRE2_INFO_FRAMESIZE, &frame_size, FALSE);
3874 fprintf(outfile, "Frame size for pcre2_match(): %d\n", (int)frame_size);
3875 }
3876
3877
3878
3879 /*************************************************
3880 * Callback function for callout enumeration *
3881 *************************************************/
3882
3883 /* The only differences in the callout emumeration block for different code
3884 unit widths are that the pointers to the subject, the most recent MARK, and a
3885 callout argument string point to strings of the appropriate width. Casts can be
3886 used to deal with this.
3887
3888 Argument:
3889 cb pointer to enumerate block
3890 callout_data user data
3891
3892 Returns: 0
3893 */
3894
3895 static int callout_callback(pcre2_callout_enumerate_block_8 *cb,
3896 void *callout_data)
3897 {
3898 uint32_t i;
3899 BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
3900
3901 (void)callout_data; /* Not currently displayed */
3902
3903 fprintf(outfile, "Callout ");
3904 if (cb->callout_string != NULL)
3905 {
3906 uint32_t delimiter = CODE_UNIT(cb->callout_string, -1);
3907 fprintf(outfile, "%c", delimiter);
3908 PCHARSV(cb->callout_string, 0,
3909 cb->callout_string_length, utf, outfile);
3910 for (i = 0; callout_start_delims[i] != 0; i++)
3911 if (delimiter == callout_start_delims[i])
3912 {
3913 delimiter = callout_end_delims[i];
3914 break;
3915 }
3916 fprintf(outfile, "%c ", delimiter);
3917 }
3918 else fprintf(outfile, "%d ", cb->callout_number);
3919
3920 fprintf(outfile, "%.*s\n",
3921 (int)((cb->next_item_length == 0)? 1 : cb->next_item_length),
3922 pbuffer8 + cb->pattern_position);
3923
3924 return 0;
3925 }
3926
3927
3928
3929 /*************************************************
3930 * Show information about a pattern *
3931 *************************************************/
3932
3933 /* This function is called after a pattern has been compiled if any of the
3934 information-requesting controls have been set.
3935
3936 Arguments: none
3937
3938 Returns: PR_OK continue processing next line
3939 PR_SKIP skip to a blank line
3940 PR_ABEND abort the pcre2test run
3941 */
3942
3943 static int
3944 show_pattern_info(void)
3945 {
3946 uint32_t compile_options, overall_options;
3947
3948 if ((pat_patctl.control & (CTL_BINCODE|CTL_FULLBINCODE)) != 0)
3949 {
3950 fprintf(outfile, "------------------------------------------------------------------\n");
3951 PCRE2_PRINTINT((pat_patctl.control & CTL_FULLBINCODE) != 0);
3952 }
3953
3954 if ((pat_patctl.control & CTL_INFO) != 0)
3955 {
3956 void *nametable;
3957 uint8_t *start_bits;
3958 BOOL match_limit_set, depth_limit_set;
3959 uint32_t backrefmax, bsr_convention, capture_count, first_ctype, first_cunit,
3960 hasbackslashc, hascrorlf, jchanged, last_ctype, last_cunit, match_empty,
3961 match_limit, minlength, nameentrysize, namecount, newline_convention,
3962 depth_limit;
3963
3964 /* These info requests may return PCRE2_ERROR_UNSET. */
3965
3966 switch(pattern_info(PCRE2_INFO_MATCHLIMIT, &match_limit, TRUE))
3967 {
3968 case 0:
3969 match_limit_set = TRUE;
3970 break;
3971
3972 case PCRE2_ERROR_UNSET:
3973 match_limit_set = FALSE;
3974 break;
3975
3976 default:
3977 return PR_ABEND;
3978 }
3979
3980 switch(pattern_info(PCRE2_INFO_DEPTHLIMIT, &depth_limit, TRUE))
3981 {
3982 case 0:
3983 depth_limit_set = TRUE;
3984 break;
3985
3986 case PCRE2_ERROR_UNSET:
3987 depth_limit_set = FALSE;
3988 break;
3989
3990 default:
3991 return PR_ABEND;
3992 }
3993
3994 /* These info requests should always succeed. */
3995
3996 if (pattern_info(PCRE2_INFO_BACKREFMAX, &backrefmax, FALSE) +
3997 pattern_info(PCRE2_INFO_BSR, &bsr_convention, FALSE) +
3998 pattern_info(PCRE2_INFO_CAPTURECOUNT, &capture_count, FALSE) +
3999 pattern_info(PCRE2_INFO_FIRSTBITMAP, &start_bits, FALSE) +
4000 pattern_info(PCRE2_INFO_FIRSTCODEUNIT, &first_cunit, FALSE) +
4001 pattern_info(PCRE2_INFO_FIRSTCODETYPE, &first_ctype, FALSE) +
4002 pattern_info(PCRE2_INFO_HASBACKSLASHC, &hasbackslashc, FALSE) +
4003 pattern_info(PCRE2_INFO_HASCRORLF, &hascrorlf, FALSE) +
4004 pattern_info(PCRE2_INFO_JCHANGED, &jchanged, FALSE) +
4005 pattern_info(PCRE2_INFO_LASTCODEUNIT, &last_cunit, FALSE) +
4006 pattern_info(PCRE2_INFO_LASTCODETYPE, &last_ctype, FALSE) +
4007 pattern_info(PCRE2_INFO_MATCHEMPTY, &match_empty, FALSE) +
4008 pattern_info(PCRE2_INFO_MINLENGTH, &minlength, FALSE) +
4009 pattern_info(PCRE2_INFO_NAMECOUNT, &namecount, FALSE) +
4010 pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &nameentrysize, FALSE) +
4011 pattern_info(PCRE2_INFO_NAMETABLE, &nametable, FALSE) +
4012 pattern_info(PCRE2_INFO_NEWLINE, &newline_convention, FALSE)
4013 != 0)
4014 return PR_ABEND;
4015
4016 fprintf(outfile, "Capturing subpattern count = %d\n", capture_count);
4017
4018 if (backrefmax > 0)
4019 fprintf(outfile, "Max back reference = %d\n", backrefmax);
4020
4021 if (maxlookbehind > 0)
4022 fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
4023
4024 if (match_limit_set)
4025 fprintf(outfile, "Match limit = %u\n", match_limit);
4026
4027 if (depth_limit_set)
4028 fprintf(outfile, "Depth limit = %u\n", depth_limit);
4029
4030 if (namecount > 0)
4031 {
4032 fprintf(outfile, "Named capturing subpatterns:\n");
4033 for (; namecount > 0; namecount--)
4034 {
4035 int imm2_size = test_mode == PCRE8_MODE ? 2 : 1;
4036 uint32_t length = (uint32_t)STRLEN(nametable + imm2_size);
4037 fprintf(outfile, " ");
4038 PCHARSV(nametable, imm2_size, length, FALSE, outfile);
4039 while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
4040 #ifdef SUPPORT_PCRE2_32
4041 if (test_mode == PCRE32_MODE)
4042 fprintf(outfile, "%3d\n", (int)(((PCRE2_SPTR32)nametable)[0]));
4043 #endif
4044 #ifdef SUPPORT_PCRE2_16
4045 if (test_mode == PCRE16_MODE)
4046 fprintf(outfile, "%3d\n", (int)(((PCRE2_SPTR16)nametable)[0]));
4047 #endif
4048 #ifdef SUPPORT_PCRE2_8
4049 if (test_mode == PCRE8_MODE)
4050 fprintf(outfile, "%3d\n", (int)(
4051 ((((PCRE2_SPTR8)nametable)[0]) << 8) | ((PCRE2_SPTR8)nametable)[1]));
4052 #endif
4053 nametable = (void*)((PCRE2_SPTR8)nametable + nameentrysize * code_unit_size);
4054 }
4055 }
4056
4057 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
4058 if (hasbackslashc) fprintf(outfile, "Contains \\C\n");
4059 if (match_empty) fprintf(outfile, "May match empty string\n");
4060
4061 pattern_info(PCRE2_INFO_ARGOPTIONS, &compile_options, FALSE);
4062 pattern_info(PCRE2_INFO_ALLOPTIONS, &overall_options, FALSE);
4063
4064 /* Remove UTF/UCP if they were there only because of forbid_utf. This saves
4065 cluttering up the verification output of non-UTF test files. */
4066
4067 if ((pat_patctl.options & PCRE2_NEVER_UTF) == 0)
4068 {
4069 compile_options &= ~PCRE2_NEVER_UTF;
4070 overall_options &= ~PCRE2_NEVER_UTF;
4071 }
4072
4073 if ((pat_patctl.options & PCRE2_NEVER_UCP) == 0)
4074 {
4075 compile_options &= ~PCRE2_NEVER_UCP;
4076 overall_options &= ~PCRE2_NEVER_UCP;
4077 }
4078
4079 if ((compile_options|overall_options) != 0)
4080 {
4081 if (compile_options == overall_options)
4082 show_compile_options(compile_options, "Options:", "\n");
4083 else
4084 {
4085 show_compile_options(compile_options, "Compile options:", "\n");
4086 show_compile_options(overall_options, "Overall options:", "\n");
4087 }
4088 }
4089
4090 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
4091
4092 if ((pat_patctl.control2 & CTL_BSR_SET) != 0 ||
4093 (FLD(compiled_code, flags) & PCRE2_BSR_SET) != 0)
4094 fprintf(outfile, "\\R matches %s\n", (bsr_convention == PCRE2_BSR_UNICODE)?
4095 "any Unicode newline" : "CR, LF, or CRLF");
4096
4097 if ((FLD(compiled_code, flags) & PCRE2_NL_SET) != 0)
4098 {
4099 switch (newline_convention)
4100 {
4101 case PCRE2_NEWLINE_CR:
4102 fprintf(outfile, "Forced newline is CR\n");
4103 break;
4104
4105 case PCRE2_NEWLINE_LF:
4106 fprintf(outfile, "Forced newline is LF\n");
4107 break;
4108
4109 case PCRE2_NEWLINE_CRLF:
4110 fprintf(outfile, "Forced newline is CRLF\n");
4111 break;
4112
4113 case PCRE2_NEWLINE_ANYCRLF:
4114 fprintf(outfile, "Forced newline is CR, LF, or CRLF\n");
4115 break;
4116
4117 case PCRE2_NEWLINE_ANY:
4118 fprintf(outfile, "Forced newline is any Unicode newline\n");
4119 break;
4120
4121 default:
4122 break;
4123 }
4124 }
4125
4126 if (first_ctype == 2)
4127 {
4128 fprintf(outfile, "First code unit at start or follows newline\n");
4129 }
4130 else if (first_ctype == 1)
4131 {
4132 const char *caseless =
4133 ((FLD(compiled_code, flags) & PCRE2_FIRSTCASELESS) == 0)?
4134 "" : " (caseless)";
4135 if (PRINTOK(first_cunit))
4136 fprintf(outfile, "First code unit = \'%c\'%s\n", first_cunit, caseless);
4137 else
4138 {
4139 fprintf(outfile, "First code unit = ");
4140 pchar(first_cunit, FALSE, outfile);
4141 fprintf(outfile, "%s\n", caseless);
4142 }
4143 }
4144 else if (start_bits != NULL)
4145 {
4146 int i;
4147 int c = 24;
4148 fprintf(outfile, "Starting code units: ");
4149 for (i = 0; i < 256; i++)
4150 {
4151 if ((start_bits[i/8] & (1<<(i&7))) != 0)
4152 {
4153 if (c > 75)
4154 {
4155 fprintf(outfile, "\n ");
4156 c = 2;
4157 }
4158 if (PRINTOK(i) && i != ' ')
4159 {
4160 fprintf(outfile, "%c ", i);
4161 c += 2;
4162 }
4163 else
4164 {
4165 fprintf(outfile, "\\x%02x ", i);
4166 c += 5;
4167 }
4168 }
4169 }
4170 fprintf(outfile, "\n");
4171 }
4172
4173 if (last_ctype != 0)
4174 {
4175 const char *caseless =
4176 ((FLD(compiled_code, flags) & PCRE2_LASTCASELESS) == 0)?
4177 "" : " (caseless)";
4178 if (PRINTOK(last_cunit))
4179 fprintf(outfile, "Last code unit = \'%c\'%s\n", last_cunit, caseless);
4180 else
4181 {
4182 fprintf(outfile, "Last code unit = ");
4183 pchar(last_cunit, FALSE, outfile);
4184 fprintf(outfile, "%s\n", caseless);
4185 }
4186 }
4187
4188 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
4189
4190 if (pat_patctl.jit != 0 && (pat_patctl.control & CTL_JITVERIFY) != 0)
4191 {
4192 if (FLD(compiled_code, executable_jit) != NULL)
4193 fprintf(outfile, "JIT compilation was successful\n");
4194 else
4195 {
4196 #ifdef SUPPORT_JIT
4197 int len;
4198 fprintf(outfile, "JIT compilation was not successful");
4199 if (jitrc != 0)
4200 {
4201 fprintf(outfile, " (");
4202 PCRE2_GET_ERROR_MESSAGE(len, jitrc, pbuffer);
4203 PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, outfile);
4204 fprintf(outfile, ")");
4205 }
4206 fprintf(outfile, "\n");
4207 #else
4208 fprintf(outfile, "JIT support is not available in this version of PCRE2\n");
4209 #endif
4210 }
4211 }
4212 }
4213
4214 if ((pat_patctl.control & CTL_CALLOUT_INFO) != 0)
4215 {
4216 int errorcode;
4217 PCRE2_CALLOUT_ENUMERATE(errorcode, callout_callback, 0);
4218 if (errorcode != 0)
4219 {
4220 int len;
4221 fprintf(outfile, "Callout enumerate failed: error %d: ", errorcode);
4222 if (errorcode < 0)
4223 {
4224 PCRE2_GET_ERROR_MESSAGE(len, errorcode, pbuffer);
4225 PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, outfile);
4226 }
4227 fprintf(outfile, "\n");
4228 return PR_SKIP;
4229 }
4230 }
4231
4232 return PR_OK;
4233 }
4234
4235
4236
4237 /*************************************************
4238 * Handle serialization error *
4239 *************************************************/
4240
4241 /* Print an error message after a serialization failure.
4242
4243 Arguments:
4244 rc the error code
4245 msg an initial message for what failed
4246
4247 Returns: nothing
4248 */
4249
4250 static void
4251 serial_error(int rc, const char *msg)
4252 {
4253 fprintf(outfile, "%s failed: error %d: ", msg, rc);
4254 PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
4255 PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
4256 fprintf(outfile, "\n");
4257 }
4258
4259
4260
4261 /*************************************************
4262 * Open file for save/load commands *
4263 *************************************************/
4264
4265 /* This function decodes the file name and opens the file.
4266
4267 Arguments:
4268 buffptr point after the #command
4269 mode open mode
4270 fptr points to the FILE variable
4271
4272 Returns: PR_OK or PR_ABEND
4273 */
4274
4275 static int
4276 open_file(uint8_t *buffptr, const char *mode, FILE **fptr)
4277 {
4278 char *endf;
4279 char *filename = (char *)buffptr;
4280 while (isspace(*filename)) filename++;
4281 endf = filename + strlen8(filename);
4282 while (endf > filename && isspace(endf[-1])) endf--;
4283
4284 if (endf == filename)
4285 {
4286 fprintf(outfile, "** File name expected after #save\n");
4287 return PR_ABEND;
4288 }
4289
4290 *endf = 0;
4291 *fptr = fopen((const char *)filename, mode);
4292 if (*fptr == NULL)
4293 {
4294 fprintf(outfile, "** Failed to open '%s': %s\n", filename, strerror(errno));
4295 return PR_ABEND;
4296 }
4297
4298 return PR_OK;
4299 }
4300
4301
4302
4303 /*************************************************
4304 * Process command line *
4305 *************************************************/
4306
4307 /* This function is called for lines beginning with # and a character that is
4308 not ! or whitespace, when encountered between tests, which means that there is
4309 no compiled pattern (compiled_code is NULL). The line is in buffer.
4310
4311 Arguments: none
4312
4313 Returns: PR_OK continue processing next line
4314 PR_SKIP skip to a blank line
4315 PR_ABEND abort the pcre2test run
4316 */
4317
4318 static int
4319 process_command(void)
4320 {
4321 FILE *f;
4322 PCRE2_SIZE serial_size;
4323 size_t i;
4324 int rc, cmd, cmdlen;
4325 uint16_t first_listed_newline;
4326 const char *cmdname;
4327 uint8_t *argptr, *serial;
4328
4329 if (restrict_for_perl_test)
4330 {
4331 fprintf(outfile, "** #-commands are not allowed after #perltest\n");
4332 return PR_ABEND;
4333 }
4334
4335 cmd = CMD_UNKNOWN;
4336 cmdlen = 0;
4337
4338 for (i = 0; i < cmdlistcount; i++)
4339 {
4340 cmdname = cmdlist[i].name;
4341 cmdlen = strlen(cmdname);
4342 if (strncmp((char *)(buffer+1), cmdname, cmdlen) == 0 &&
4343 isspace(buffer[cmdlen+1]))
4344 {
4345 cmd = cmdlist[i].value;
4346 break;
4347 }
4348 }
4349
4350 argptr = buffer + cmdlen + 1;
4351
4352 switch(cmd)
4353 {
4354 case CMD_UNKNOWN:
4355 fprintf(outfile, "** Unknown command: %s", buffer);
4356 break;
4357
4358 case CMD_FORBID_UTF:
4359 forbid_utf = PCRE2_NEVER_UTF|PCRE2_NEVER_UCP;
4360 break;
4361
4362 case CMD_PERLTEST:
4363 restrict_for_perl_test = TRUE;
4364 break;
4365
4366 /* Set default pattern modifiers */
4367
4368 case CMD_PATTERN:
4369 (void)decode_modifiers(argptr, CTX_DEFPAT, &def_patctl, NULL);
4370 if (def_patctl.jit == 0 && (def_patctl.control & CTL_JITVERIFY) != 0)
4371 def_patctl.jit = 7;
4372 break;
4373
4374 /* Set default subject modifiers */
4375
4376 case CMD_SUBJECT:
4377 (void)decode_modifiers(argptr, CTX_DEFDAT, NULL, &def_datctl);
4378 break;
4379
4380 /* Check the default newline, and if not one of those listed, set up the
4381 first one to be forced. An empty list unsets. */
4382
4383 case CMD_NEWLINE_DEFAULT:
4384 local_newline_default = 0; /* Unset */
4385 first_listed_newline = 0;
4386 for (;;)
4387 {
4388 while (isspace(*argptr)) argptr++;
4389 if (*argptr == 0) break;
4390 for (i = 1; i < sizeof(newlines)/sizeof(char *); i++)
4391 {
4392 size_t nlen = strlen(newlines[i]);
4393 if (strncmpic(argptr, (const uint8_t *)newlines[i], nlen) == 0 &&
4394 isspace(argptr[nlen]))
4395 {
4396 if (i == NEWLINE_DEFAULT) return PR_OK; /* Default is valid */
4397 if (first_listed_newline == 0) first_listed_newline = i;
4398 }
4399 }
4400 while (*argptr != 0 && !isspace(*argptr)) argptr++;
4401 }
4402 local_newline_default = first_listed_newline;
4403 break;
4404
4405 /* Pop or copy a compiled pattern off the stack. Modifiers that do not affect
4406 the compiled pattern (e.g. to give information) are permitted. The default
4407 pattern modifiers are ignored. */
4408
4409 case CMD_POP:
4410 case CMD_POPCOPY:
4411 if (patstacknext <= 0)
4412 {
4413 fprintf(outfile, "** Can't pop off an empty stack\n");
4414 return PR_SKIP;
4415 }
4416 memset(&pat_patctl, 0, sizeof(patctl)); /* Completely unset */
4417 if (!decode_modifiers(argptr, CTX_POPPAT, &pat_patctl, NULL))
4418 return PR_SKIP;
4419
4420 if (cmd == CMD_POP)
4421 {
4422 SET(compiled_code, patstack[--patstacknext]);
4423 }
4424 else
4425 {
4426 PCRE2_CODE_COPY_FROM_VOID(compiled_code, patstack[patstacknext - 1]);
4427 }
4428
4429 if (pat_patctl.jit != 0)
4430 {
4431 PCRE2_JIT_COMPILE(jitrc, compiled_code, pat_patctl.jit);
4432 }
4433 if ((pat_patctl.control & CTL_MEMORY) != 0) show_memory_info();
4434 if ((pat_patctl.control & CTL_FRAMESIZE) != 0) show_framesize();
4435 if ((pat_patctl.control & CTL_ANYINFO) != 0)
4436 {
4437 rc = show_pattern_info();
4438 if (rc != PR_OK) return rc;
4439 }
4440 break;
4441
4442 /* Save the stack of compiled patterns to a file, then empty the stack. */
4443
4444 case CMD_SAVE:
4445 if (patstacknext <= 0)
4446 {
4447 fprintf(outfile, "** No stacked patterns to save\n");
4448 return PR_OK;
4449 }
4450
4451 rc = open_file(argptr+1, BINARY_OUTPUT_MODE, &f);
4452 if (rc != PR_OK) return rc;
4453
4454 PCRE2_SERIALIZE_ENCODE(rc, patstack, patstacknext, &serial, &serial_size,
4455 general_context);
4456 if (rc < 0)
4457 {
4458 serial_error(rc, "Serialization");
4459 fclose(f);
4460 break;
4461 }
4462
4463 /* Write the length at the start of the file to make it straightforward to
4464 get the right memory when re-loading. This saves having to read the file size
4465 in different operating systems. To allow for different endianness (even
4466 though reloading with the opposite endianness does not work), write the
4467 length byte-by-byte. */
4468
4469 for (i = 0; i < 4; i++) fputc((serial_size >> (i*8)) & 255, f);
4470 if (fwrite(serial, 1, serial_size, f) != serial_size)
4471 {
4472 fprintf(outfile, "** Wrong return from fwrite()\n");
4473 fclose(f);
4474 return PR_ABEND;
4475 }
4476
4477 fclose(f);
4478 PCRE2_SERIALIZE_FREE(serial);
4479 while(patstacknext > 0)
4480 {
4481 SET(compiled_code, patstack[--patstacknext]);
4482 SUB1(pcre2_code_free, compiled_code);
4483 }
4484 SET(compiled_code, NULL);
4485 break;
4486
4487 /* Load a set of compiled patterns from a file onto the stack */
4488
4489 case CMD_LOAD:
4490 rc = open_file(argptr+1, BINARY_INPUT_MODE, &f);
4491 if (rc != PR_OK) return rc;
4492
4493 serial_size = 0;
4494 for (i = 0; i < 4; i++) serial_size |= fgetc(f) << (i*8);
4495
4496 serial = malloc(serial_size);
4497 if (serial == NULL)
4498 {
4499 fprintf(outfile, "** Failed to get memory (size %lu) for #load\n",
4500 (unsigned long int)serial_size);
4501 fclose(f);
4502 return PR_ABEND;
4503 }
4504
4505 if (fread(serial, 1, serial_size, f) != serial_size)
4506 {
4507 fprintf(outfile, "** Wrong return from fread()\n");
4508 free(serial);
4509 fclose(f);
4510 return PR_ABEND;
4511 }
4512 fclose(f);
4513
4514 PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(rc, serial);
4515 if (rc < 0) serial_error(rc, "Get number of codes"); else
4516 {
4517 if (rc + patstacknext > PATSTACKSIZE)
4518 {
4519 fprintf(outfile, "** Not enough space on pattern stack for %d pattern%s\n",
4520 rc, (rc == 1)? "" : "s");
4521 rc = PATSTACKSIZE - patstacknext;
4522 fprintf(outfile, "** Decoding %d pattern%s\n", rc,
4523 (rc == 1)? "" : "s");
4524 }
4525 PCRE2_SERIALIZE_DECODE(rc, patstack + patstacknext, rc, serial,
4526 general_context);
4527 if (rc < 0) serial_error(rc, "Deserialization");
4528 else patstacknext += rc;
4529 }
4530
4531 free(serial);
4532 break;
4533 }
4534
4535 return PR_OK;
4536 }
4537
4538
4539
4540 /*************************************************
4541 * Process pattern line *
4542 *************************************************/
4543
4544 /* This function is called when the input buffer contains the start of a
4545 pattern. The first character is known to be a valid delimiter. The pattern is
4546 read, modifiers are interpreted, and a suitable local context is set up for
4547 this test. The pattern is then compiled.
4548
4549 Arguments: none
4550
4551 Returns: PR_OK continue processing next line
4552 PR_SKIP skip to a blank line
4553 PR_ABEND abort the pcre2test run
4554 */
4555
4556 static int
4557 process_pattern(void)
4558 {
4559 BOOL utf;
4560 uint32_t k;
4561 uint8_t *p = buffer;
4562 unsigned int delimiter = *p++;
4563 int errorcode;
4564 void *use_pat_context;
4565 PCRE2_SIZE patlen;
4566 PCRE2_SIZE valgrind_access_length;
4567 PCRE2_SIZE erroroffset;
4568
4569 /* Initialize the context and pattern/data controls for this test from the
4570 defaults. */
4571
4572 PATCTXCPY(pat_context, default_pat_context);
4573 memcpy(&pat_patctl, &def_patctl, sizeof(patctl));
4574
4575 /* Find the end of the pattern, reading more lines if necessary. */
4576
4577 for(;;)
4578 {
4579 while (*p != 0)
4580 {
4581 if (*p == '\\' && p[1] != 0) p++;
4582 else if (*p == delimiter) break;
4583 p++;
4584 }
4585 if (*p != 0) break;
4586 if ((p = extend_inputline(infile, p, " > ")) == NULL)
4587 {
4588 fprintf(outfile, "** Unexpected EOF\n");
4589 return PR_ABEND;
4590 }
4591 if (!INTERACTIVE(infile)) fprintf(outfile, "%s", (char *)p);
4592 }
4593
4594 /* If the first character after the delimiter is backslash, make the pattern
4595 end with backslash. This is purely to provide a way of testing for the error
4596 message when a pattern ends with backslash. */
4597
4598 if (p[1] == '\\') *p++ = '\\';
4599
4600 /* Terminate the pattern at the delimiter, and compute the length. */
4601
4602 *p++ = 0;
4603 patlen = p - buffer - 2;
4604
4605 /* Look for modifiers and options after the final delimiter. */
4606
4607 if (!decode_modifiers(p, CTX_PAT, &pat_patctl, NULL)) return PR_SKIP;
4608 utf = (pat_patctl.options & PCRE2_UTF) != 0;
4609
4610 /* The utf8_input modifier is not allowed in 8-bit mode, and is mutually
4611 exclusive with the utf modifier. */
4612
4613 if ((pat_patctl.control & CTL_UTF8_INPUT) != 0)
4614 {
4615 if (test_mode == PCRE8_MODE)
4616 {
4617 fprintf(outfile, "** The utf8_input modifier is not allowed in 8-bit mode\n");
4618 return PR_SKIP;
4619 }
4620 if (utf)
4621 {
4622 fprintf(outfile, "** The utf and utf8_input modifiers are mutually exclusive\n");
4623 return PR_SKIP;
4624 }
4625 }
4626
4627 /* Check for mutually exclusive modifiers. At present, these are all in the
4628 first control word. */
4629
4630 for (k = 0; k < sizeof(exclusive_pat_controls)/sizeof(uint32_t); k++)
4631 {
4632 uint32_t c = pat_patctl.control & exclusive_pat_controls[k];
4633 if (c != 0 && c != (c & (~c+1)))
4634 {
4635 show_controls(c, 0, "** Not allowed together:");
4636 fprintf(outfile, "\n");
4637 return PR_SKIP;
4638 }
4639 }
4640
4641 /* Assume full JIT compile for jitverify and/or jitfast if nothing else was
4642 specified. */
4643
4644 if (pat_patctl.jit == 0 &&
4645 (pat_patctl.control & (CTL_JITVERIFY|CTL_JITFAST)) != 0)
4646 pat_patctl.jit = 7;
4647
4648 /* Now copy the pattern to pbuffer8 for use in 8-bit testing and for reflecting
4649 in callouts. Convert from hex if requested (literal strings in quotes may be
4650 present within the hexadecimal pairs). The result must necessarily be fewer
4651 characters so will always fit in pbuffer8. */
4652
4653 if ((pat_patctl.control & CTL_HEXPAT) != 0)
4654 {
4655 uint8_t *pp, *pt;
4656 uint32_t c, d;
4657
4658 pt = pbuffer8;
4659 for (pp = buffer + 1; *pp != 0; pp++)
4660 {
4661 if (isspace(*pp)) continue;
4662 c = *pp++;
4663
4664 /* Handle a literal substring */
4665
4666 if (c == '\'' || c == '"')
4667 {
4668 uint8_t *pq = pp;
4669 for (;; pp++)
4670 {
4671 d = *pp;
4672 if (d == 0)
4673 {
4674 fprintf(outfile, "** Missing closing quote in hex pattern: "
4675 "opening quote is at offset " PTR_SPEC ".\n", pq - buffer - 2);
4676 return PR_SKIP;
4677 }
4678 if (d == c) break;
4679 *pt++ = d;
4680 }
4681 }
4682
4683 /* Expect a hex pair */
4684
4685 else
4686 {
4687 if (!isxdigit(c))
4688 {
4689 fprintf(outfile, "** Unexpected non-hex-digit '%c' at offset "
4690 PTR_SPEC " in hex pattern: quote missing?\n", c, pp - buffer - 2);
4691 return PR_SKIP;
4692 }
4693 if (*pp == 0)
4694 {
4695 fprintf(outfile, "** Odd number of digits in hex pattern\n");
4696 return PR_SKIP;
4697 }
4698 d = *pp;
4699 if (!isxdigit(d))
4700 {
4701 fprintf(outfile, "** Unexpected non-hex-digit '%c' at offset "
4702 PTR_SPEC " in hex pattern: quote missing?\n", d, pp - buffer - 1);
4703 return PR_SKIP;
4704 }
4705 c = toupper(c);
4706 d = toupper(d);
4707 *pt++ = ((isdigit(c)? (c - '0') : (c - 'A' + 10)) << 4) +
4708 (isdigit(d)? (d - '0') : (d - 'A' + 10));
4709 }
4710 }
4711 *pt = 0;
4712 patlen = pt - pbuffer8;
4713 }
4714
4715 /* If not a hex string, process for repetition expansion if requested. */
4716
4717 else if ((pat_patctl.control & CTL_EXPAND) != 0)
4718 {
4719 uint8_t *pp, *pt;
4720
4721 pt = pbuffer8;
4722 for (pp = buffer + 1; *pp != 0; pp++)
4723 {
4724 uint8_t *pc = pp;
4725 uint32_t count = 1;
4726 size_t length = 1;
4727
4728 /* Check for replication syntax; if not found, the defaults just set will
4729 prevail and one character will be copied. */
4730
4731 if (pp[0] == '\\' && pp[1] == '[')
4732 {
4733 uint8_t *pe;
4734 for (pe = pp + 2; *pe != 0; pe++)
4735 {
4736 if (pe[0] == ']' && pe[1] == '{')
4737 {
4738 uint32_t clen = pe - pc - 2;
4739 uint32_t i = 0;
4740 unsigned long uli;
4741 char *endptr;
4742
4743 pe += 2;
4744 uli = strtoul((const char *)pe, &endptr, 10);
4745 if (U32OVERFLOW(uli))
4746 {
4747 fprintf(outfile, "** Pattern repeat count too large\n");
4748 return PR_SKIP;
4749 }
4750
4751 i = (uint32_t)uli;
4752 pe = (uint8_t *)endptr;
4753 if (*pe == '}')
4754 {
4755 if (i == 0)
4756 {
4757 fprintf(outfile, "** Zero repeat not allowed\n");
4758 return PR_SKIP;
4759 }
4760 pc += 2;
4761 count = i;
4762 length = clen;
4763 pp = pe;
4764 break;
4765 }
4766 }
4767 }
4768 }
4769
4770 /* Add to output. If the buffer is too small expand it. The function for
4771 expanding buffers always keeps buffer and pbuffer8 in step as far as their
4772 size goes. */
4773
4774 while (pt + count * length > pbuffer8 + pbuffer8_size)
4775 {
4776 size_t pc_offset = pc - buffer;
4777 size_t pp_offset = pp - buffer;
4778 size_t pt_offset = pt - pbuffer8;
4779 expand_input_buffers();
4780 pc = buffer + pc_offset;
4781 pp = buffer + pp_offset;
4782 pt = pbuffer8 + pt_offset;
4783 }
4784
4785 for (; count > 0; count--)
4786 {
4787 memcpy(pt, pc, length);
4788 pt += length;
4789 }
4790 }
4791
4792 *pt = 0;
4793 patlen = pt - pbuffer8;
4794
4795 if ((pat_patctl.control & CTL_INFO) != 0)
4796 fprintf(outfile, "Expanded: %s\n", pbuffer8);
4797 }
4798
4799 /* Neither hex nor expanded, just copy the input verbatim. */
4800
4801 else
4802 {
4803 strncpy((char *)pbuffer8, (char *)(buffer+1), patlen + 1);
4804 }
4805
4806 /* Sort out character tables */
4807
4808 if (pat_patctl.locale[0] != 0)
4809 {
4810 if (pat_patctl.tables_id != 0)
4811 {
4812 fprintf(outfile, "** 'Locale' and 'tables' must not both be set\n");
4813 return PR_SKIP;
4814 }
4815 if (setlocale(LC_CTYPE, (const char *)pat_patctl.locale) == NULL)
4816 {
4817 fprintf(outfile, "** Failed to set locale '%s'\n", pat_patctl.locale);
4818 return PR_SKIP;
4819 }
4820 if (strcmp((const char *)pat_patctl.locale, (const char *)locale_name) != 0)
4821 {
4822 strcpy((char *)locale_name, (char *)pat_patctl.locale);
4823 if (locale_tables != NULL) free((void *)locale_tables);
4824 PCRE2_MAKETABLES(locale_tables);
4825 }
4826 use_tables = locale_tables;
4827 }
4828
4829 else switch (pat_patctl.tables_id)
4830 {
4831 case 0: use_tables = NULL; break;
4832 case 1: use_tables = tables1; break;
4833 case 2: use_tables = tables2; break;
4834 default:
4835 fprintf(outfile, "** 'Tables' must specify 0, 1, or 2.\n");
4836 return PR_SKIP;
4837 }
4838
4839 PCRE2_SET_CHARACTER_TABLES(pat_context, use_tables);
4840
4841 /* Set up for the stackguard test. */
4842
4843 if (pat_patctl.stackguard_test != 0)
4844 {
4845 PCRE2_SET_COMPILE_RECURSION_GUARD(pat_context, stack_guard, NULL);
4846 }
4847
4848 /* Handle compiling via the POSIX interface, which doesn't support the
4849 timing, showing, or debugging options, nor the ability to pass over
4850 local character tables. Neither does it have 16-bit or 32-bit support. */
4851
4852 if ((pat_patctl.control & CTL_POSIX) != 0)
4853 {
4854 #ifdef SUPPORT_PCRE2_8
4855 int rc;
4856 int cflags = 0;
4857 const char *msg = "** Ignored with POSIX interface:";
4858 #endif
4859
4860 if (test_mode != PCRE8_MODE)
4861 {
4862 fprintf(outfile, "** The POSIX interface is available only in 8-bit mode\n");
4863 return PR_SKIP;
4864 }
4865
4866 #ifdef SUPPORT_PCRE2_8
4867 /* Check for features that the POSIX interface does not support. */
4868
4869 if (pat_patctl.locale[0] != 0) prmsg(&msg, "locale");
4870 if (pat_patctl.replacement[0] != 0) prmsg(&msg, "replace");
4871 if (pat_patctl.tables_id != 0) prmsg(&msg, "tables");
4872 if (pat_patctl.stackguard_test != 0) prmsg(&msg, "stackguard");
4873 if (timeit > 0) prmsg(&msg, "timing");
4874 if (pat_patctl.jit != 0) prmsg(&msg, "JIT");
4875
4876 if ((pat_patctl.options & ~POSIX_SUPPORTED_COMPILE_OPTIONS) != 0)
4877 {
4878 show_compile_options(
4879 pat_patctl.options & ~POSIX_SUPPORTED_COMPILE_OPTIONS, msg, "");
4880 msg = "";
4881 }
4882 if ((pat_patctl.control & ~POSIX_SUPPORTED_COMPILE_CONTROLS) != 0 ||
4883 (pat_patctl.control2 & ~POSIX_SUPPORTED_COMPILE_CONTROLS2) != 0)
4884 {
4885 show_controls(pat_patctl.control & ~POSIX_SUPPORTED_COMPILE_CONTROLS,
4886 pat_patctl.control2 & ~POSIX_SUPPORTED_COMPILE_CONTROLS2, msg);
4887 msg = "";
4888 }
4889
4890 if (local_newline_default != 0) prmsg(&msg, "#newline_default");
4891
4892 if (msg[0] == 0) fprintf(outfile, "\n");
4893
4894 /* Translate PCRE2 options to POSIX options and then compile. */
4895
4896 if (utf) cflags |= REG_UTF;
4897 if ((pat_patctl.control & CTL_POSIX_NOSUB) != 0) cflags |= REG_NOSUB;
4898 if ((pat_patctl.options & PCRE2_UCP) != 0) cflags |= REG_UCP;
4899 if ((pat_patctl.options & PCRE2_CASELESS) != 0) cflags |= REG_ICASE;
4900 if ((pat_patctl.options & PCRE2_MULTILINE) != 0) cflags |= REG_NEWLINE;
4901 if ((pat_patctl.options & PCRE2_DOTALL) != 0) cflags |= REG_DOTALL;
4902 if ((pat_patctl.options & PCRE2_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
4903
4904 rc = regcomp(&preg, (char *)pbuffer8, cflags);
4905
4906 /* Compiling failed */
4907
4908 if (rc != 0)
4909 {
4910 size_t bsize, usize;
4911 int psize;
4912
4913 preg.re_pcre2_code = NULL; /* In case something was left in there */
4914 preg.re_match_data = NULL;
4915
4916 bsize = (pat_patctl.regerror_buffsize != 0)?
4917 pat_patctl.regerror_buffsize : pbuffer8_size;
4918 if (bsize + 8 < pbuffer8_size)
4919 memcpy(pbuffer8 + bsize, "DEADBEEF", 8);
4920 usize = regerror(rc, &preg, (char *)pbuffer8, bsize);
4921
4922 /* Inside regerror(), snprintf() is used. If the buffer is too small, some
4923 versions of snprintf() put a zero byte at the end, but others do not.
4924 Therefore, we print a maximum of one less than the size of the buffer. */
4925
4926 psize = (int)bsize - 1;
4927 fprintf(outfile, "Failed: POSIX code %d: %.*s\n", rc, psize, pbuffer8);
4928 if (usize > bsize)
4929 {
4930 fprintf(outfile, "** regerror() message truncated\n");
4931 if (memcmp(pbuffer8 + bsize, "DEADBEEF", 8) != 0)
4932 fprintf(outfile, "** regerror() buffer overflow\n");
4933 }
4934 return PR_SKIP;
4935 }
4936
4937 /* Compiling succeeded. Check that the values in the preg block are sensible.
4938 It can happen that pcre2test is accidentally linked with a different POSIX
4939 library which succeeds, but of course puts different things into preg. In
4940 this situation, calling regfree() may cause a segfault (or invalid free() in
4941 valgrind), so ensure that preg.re_pcre2_code is NULL, which suppresses the
4942 calling of regfree() on exit. */
4943
4944 if (preg.re_pcre2_code == NULL ||
4945 ((pcre2_real_code_8 *)preg.re_pcre2_code)->magic_number != MAGIC_NUMBER ||
4946 ((pcre2_real_code_8 *)preg.re_pcre2_code)->top_bracket != preg.re_nsub ||
4947 preg.re_match_data == NULL ||
4948 preg.re_cflags != cflags)
4949 {
4950 fprintf(outfile,
4951 "** The regcomp() function returned zero (success), but the values set\n"
4952 "** in the preg block are not valid for PCRE2. Check that pcre2test is\n"
4953 "** linked with PCRE2's pcre2posix module (-lpcre2-posix) and not with\n"
4954 "** some other POSIX regex library.\n**\n");
4955 preg.re_pcre2_code = NULL;
4956 return PR_ABEND;
4957 }
4958
4959 return PR_OK;
4960 #endif /* SUPPORT_PCRE2_8 */
4961 }
4962
4963 /* Handle compiling via the native interface. Controls that act later are
4964 ignored with "push". Replacements are locked out. */
4965
4966 if ((pat_patctl.control & (CTL_PUSH|CTL_PUSHCOPY|CTL_PUSHTABLESCOPY)) != 0)
4967 {
4968 if (pat_patctl.replacement[0] != 0)
4969 {
4970 fprintf(outfile, "** Replacement text is not supported with 'push'.\n");
4971 return PR_OK;
4972 }
4973 if ((pat_patctl.control & ~PUSH_SUPPORTED_COMPILE_CONTROLS) != 0 ||
4974 (pat_patctl.control2 & ~PUSH_SUPPORTED_COMPILE_CONTROLS2) != 0)
4975 {
4976 show_controls(pat_patctl.control & ~PUSH_SUPPORTED_COMPILE_CONTROLS,
4977 pat_patctl.control2 & ~PUSH_SUPPORTED_COMPILE_CONTROLS2,
4978 "** Ignored when compiled pattern is stacked with 'push':");
4979 fprintf(outfile, "\n");
4980 }
4981 if ((pat_patctl.control & PUSH_COMPILE_ONLY_CONTROLS) != 0 ||
4982 (pat_patctl.control2 & PUSH_COMPILE_ONLY_CONTROLS2) != 0)
4983 {
4984 show_controls(pat_patctl.control & PUSH_COMPILE_ONLY_CONTROLS,
4985 pat_patctl.control2 & PUSH_COMPILE_ONLY_CONTROLS2,
4986 "** Applies only to compile when pattern is stacked with 'push':");
4987 fprintf(outfile, "\n");
4988 }
4989 }
4990
4991 /* Convert the input in non-8-bit modes. */
4992
4993 errorcode = 0;
4994
4995 #ifdef SUPPORT_PCRE2_16
4996 if (test_mode == PCRE16_MODE) errorcode = to16(pbuffer8, utf, &patlen);
4997 #endif
4998
4999 #ifdef SUPPORT_PCRE2_32
5000 if (test_mode == PCRE32_MODE) errorcode = to32(pbuffer8, utf, &patlen);
5001 #endif
5002
5003 switch(errorcode)
5004 {
5005 case -1:
5006 fprintf(outfile, "** Failed: invalid UTF-8 string cannot be "
5007 "converted to %d-bit string\n", (test_mode == PCRE16_MODE)? 16:32);
5008 return PR_SKIP;
5009
5010 case -2:
5011 fprintf(outfile, "** Failed: character value greater than 0x10ffff "
5012 "cannot be converted to UTF\n");
5013 return PR_SKIP;
5014
5015 case -3:
5016 fprintf(outfile, "** Failed: character value greater than 0xffff "
5017 "cannot be converted to 16-bit in non-UTF mode\n");
5018 return PR_SKIP;
5019
5020 default:
5021 break;
5022 }
5023
5024 /* The pattern is now in pbuffer[8|16|32], with the length in code units in
5025 patlen. By default we pass a zero-terminated pattern, but a length is passed if
5026 "use_length" was specified or this is a hex pattern (which might contain binary
5027 zeros). When valgrind is supported, arrange for the unused part of the buffer
5028 to be marked as no access. */
5029
5030 valgrind_access_length = patlen;
5031 if ((pat_patctl.control & (CTL_HEXPAT|CTL_USE_LENGTH)) == 0)
5032 {
5033 patlen = PCRE2_ZERO_TERMINATED;
5034 valgrind_access_length += 1; /* For the terminating zero */
5035 }
5036
5037 #ifdef SUPPORT_VALGRIND
5038 #ifdef SUPPORT_PCRE2_8
5039 if (test_mode == PCRE8_MODE && pbuffer8 != NULL)
5040 {
5041 VALGRIND_MAKE_MEM_NOACCESS(pbuffer8 + valgrind_access_length,
5042 pbuffer8_size - valgrind_access_length);
5043 }
5044 #endif
5045 #ifdef SUPPORT_PCRE2_16
5046 if (test_mode == PCRE16_MODE && pbuffer16 != NULL)
5047 {
5048 VALGRIND_MAKE_MEM_NOACCESS(pbuffer16 + valgrind_access_length,
5049 pbuffer16_size - valgrind_access_length*sizeof(uint16_t));
5050 }
5051 #endif
5052 #ifdef SUPPORT_PCRE2_32
5053 if (test_mode == PCRE32_MODE && pbuffer32 != NULL)
5054 {
5055 VALGRIND_MAKE_MEM_NOACCESS(pbuffer32 + valgrind_access_length,
5056 pbuffer32_size - valgrind_access_length*sizeof(uint32_t));
5057 }
5058 #endif
5059 #else /* Valgrind not supported */
5060 (void)valgrind_access_length; /* Avoid compiler warning */
5061 #endif
5062
5063 /* If #newline_default has been used and the library was not compiled with an
5064 appropriate default newline setting, local_newline_default will be non-zero. We
5065 use this if there is no explicit newline modifier. */
5066
5067 if ((pat_patctl.control2 & CTL_NL_SET) == 0 && local_newline_default != 0)
5068 {
5069 SETFLD(pat_context, newline_convention, local_newline_default);
5070 }
5071
5072 /* The null_context modifier is used to test calling pcre2_compile() with a
5073 NULL context. */
5074
5075 use_pat_context = ((pat_patctl.control & CTL_NULLCONTEXT) != 0)?
5076 NULL : PTR(pat_context);
5077
5078 /* Compile many times when timing. */
5079
5080 if (timeit > 0)
5081 {
5082 int i;
5083 clock_t time_taken = 0;
5084 for (i = 0; i < timeit; i++)
5085 {
5086 clock_t start_time = clock();
5087 PCRE2_COMPILE(compiled_code, pbuffer, patlen,
5088 pat_patctl.options|forbid_utf, &errorcode, &erroroffset, use_pat_context);
5089 time_taken += clock() - start_time;
5090 if (TEST(compiled_code, !=, NULL))
5091 { SUB1(pcre2_code_free, compiled_code); }
5092 }
5093 total_compile_time += time_taken;
5094 fprintf(outfile, "Compile time %.4f milliseconds\n",
5095 (((double)time_taken * 1000.0) / (double)timeit) /
5096 (double)CLOCKS_PER_SEC);
5097 }
5098
5099 /* A final compile that is used "for real". */
5100
5101 PCRE2_COMPILE(compiled_code, pbuffer, patlen, pat_patctl.options|forbid_utf,
5102 &errorcode, &erroroffset, use_pat_context);
5103
5104 /* Call the JIT compiler if requested. When timing, we must free and recompile
5105 the pattern each time because that is the only way to free the JIT compiled
5106 code. We know that compilation will always succeed. */
5107
5108 if (TEST(compiled_code, !=, NULL) && pat_patctl.jit != 0)
5109 {
5110 if (timeit > 0)
5111 {
5112 int i;
5113 clock_t time_taken = 0;
5114 for (i = 0; i < timeit; i++)
5115 {
5116 clock_t start_time;
5117 SUB1(pcre2_code_free, compiled_code);
5118 PCRE2_COMPILE(compiled_code, pbuffer, patlen,
5119 pat_patctl.options|forbid_utf, &errorcode, &erroroffset,
5120 use_pat_context);
5121 start_time = clock();
5122 PCRE2_JIT_COMPILE(jitrc,compiled_code, pat_patctl.jit);
5123 time_taken += clock() - start_time;
5124 }
5125 total_jit_compile_time += time_taken;
5126 fprintf(outfile, "JIT compile %.4f milliseconds\n",
5127 (((double)time_taken * 1000.0) / (double)timeit) /
5128 (double)CLOCKS_PER_SEC);
5129 }
5130 else
5131 {
5132 PCRE2_JIT_COMPILE(jitrc, compiled_code, pat_patctl.jit);
5133 }
5134 }
5135
5136 /* If valgrind is supported, mark the pbuffer as accessible again. The 16-bit
5137 and 32-bit buffers can be marked completely undefined, but we must leave the
5138 pattern in the 8-bit buffer defined because it may be read from a callout
5139 during matching. */
5140
5141 #ifdef SUPPORT_VALGRIND
5142 #ifdef SUPPORT_PCRE2_8
5143 if (test_mode == PCRE8_MODE)
5144 {
5145 VALGRIND_MAKE_MEM_UNDEFINED(pbuffer8 + valgrind_access_length,
5146 pbuffer8_size - valgrind_access_length);
5147 }
5148 #endif
5149 #ifdef SUPPORT_PCRE2_16
5150 if (test_mode == PCRE16_MODE)
5151 {
5152 VALGRIND_MAKE_MEM_UNDEFINED(pbuffer16, pbuffer16_size);
5153 }
5154 #endif
5155 #ifdef SUPPORT_PCRE2_32
5156 if (test_mode == PCRE32_MODE)
5157 {
5158 VALGRIND_MAKE_MEM_UNDEFINED(pbuffer32, pbuffer32_size);
5159 }
5160 #endif
5161 #endif
5162
5163 /* Compilation failed; go back for another re, skipping to blank line
5164 if non-interactive. */
5165
5166 if (TEST(compiled_code, ==, NULL))
5167 {
5168 int len;
5169 fprintf(outfile, "Failed: error %d at offset %d: ", errorcode,
5170 (int)erroroffset);
5171 PCRE2_GET_ERROR_MESSAGE(len, errorcode, pbuffer);
5172 PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, outfile);
5173 fprintf(outfile, "\n");
5174 return PR_SKIP;
5175 }
5176
5177 /* If forbid_utf is non-zero, we are running a non-UTF test. UTF and UCP are
5178 locked out at compile time, but we must also check for occurrences of \P, \p,
5179 and \X, which are only supported when Unicode is supported. */
5180
5181 if (forbid_utf != 0)
5182 {
5183 if ((FLD(compiled_code, flags) & PCRE2_HASBKPORX) != 0)
5184 {
5185 fprintf(outfile, "** \\P, \\p, and \\X are not allowed after the "
5186 "#forbid_utf command\n");
5187 return PR_SKIP;
5188 }
5189 }
5190
5191 /* Remember the maximum lookbehind, for partial matching. */
5192
5193 if (pattern_info(PCRE2_INFO_MAXLOOKBEHIND, &maxlookbehind, FALSE) != 0)
5194 return PR_ABEND;
5195
5196 /* If an explicit newline modifier was given, set the information flag in the
5197 pattern so that it is preserved over push/pop. */
5198
5199 if ((pat_patctl.control2 & CTL_NL_SET) != 0)
5200 {
5201 SETFLD(compiled_code, flags, FLD(compiled_code, flags) | PCRE2_NL_SET);
5202 }
5203
5204 /* Output code size and other information if requested. */
5205
5206 if ((pat_patctl.control & CTL_MEMORY) != 0) show_memory_info();
5207 if ((pat_patctl.control & CTL_FRAMESIZE) != 0) show_framesize();
5208 if ((pat_patctl.control & CTL_ANYINFO) != 0)
5209 {
5210 int rc = show_pattern_info();
5211 if (rc != PR_OK) return rc;
5212 }
5213
5214 /* The "push" control requests that the compiled pattern be remembered on a
5215 stack. This is mainly for testing the serialization functionality. */
5216
5217 if ((pat_patctl.control & CTL_PUSH) != 0)
5218 {
5219 if (patstacknext >= PATSTACKSIZE)
5220 {
5221 fprintf(outfile, "** Too many pushed patterns (max %d)\n", PATSTACKSIZE);
5222 return PR_ABEND;
5223 }
5224 patstack[patstacknext++] = PTR(compiled_code);
5225 SET(compiled_code, NULL);
5226 }
5227
5228 /* The "pushcopy" and "pushtablescopy" controls are similar, but push a
5229 copy of the pattern, the latter with a copy of its character tables. This tests
5230 the pcre2_code_copy() and pcre2_code_copy_with_tables() functions. */
5231
5232 if ((pat_patctl.control & (CTL_PUSHCOPY|CTL_PUSHTABLESCOPY)) != 0)
5233 {
5234 if (patstacknext >= PATSTACKSIZE)
5235 {
5236 fprintf(outfile, "** Too many pushed patterns (max %d)\n", PATSTACKSIZE);
5237 return PR_ABEND;
5238 }
5239 if ((pat_patctl.control & CTL_PUSHCOPY) != 0)
5240 {
5241 PCRE2_CODE_COPY_TO_VOID(patstack[patstacknext++], compiled_code);
5242 }
5243 else
5244 {
5245 PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(patstack[patstacknext++],
5246 compiled_code); }
5247 }
5248
5249 return PR_OK;
5250 }
5251
5252
5253
5254 /*************************************************
5255 * Check match or depth limit *
5256 *************************************************/
5257
5258 static int
5259 check_match_limit(uint8_t *pp, size_t ulen, int errnumber, const char *msg)
5260 {
5261 int capcount;
5262 uint32_t min = 0;
5263 uint32_t mid = 64;
5264 uint32_t max = UINT32_MAX;
5265
5266 PCRE2_SET_MATCH_LIMIT(dat_context, max);
5267 PCRE2_SET_DEPTH_LIMIT(dat_context, max);
5268
5269 for (;;)
5270 {
5271 if (errnumber == PCRE2_ERROR_MATCHLIMIT)
5272 {
5273 PCRE2_SET_MATCH_LIMIT(dat_context, mid);
5274 }
5275 else
5276 {
5277 PCRE2_SET_DEPTH_LIMIT(dat_context, mid);
5278 }
5279
5280 if ((pat_patctl.control & CTL_JITFAST) != 0)
5281 PCRE2_JIT_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
5282 dat_datctl.options, match_data, PTR(dat_context));
5283 else
5284 PCRE2_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
5285 dat_datctl.options, match_data, PTR(dat_context));
5286
5287 if (capcount == errnumber)
5288 {
5289 min = mid;
5290 mid = (mid == max - 1)? max : (max != UINT32_MAX)? (min + max)/2 : mid*2;
5291 }
5292 else if (capcount >= 0 ||
5293 capcount == PCRE2_ERROR_NOMATCH ||
5294 capcount == PCRE2_ERROR_PARTIAL)
5295 {
5296 if (mid == min + 1)
5297 {
5298 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
5299 break;
5300 }
5301 max = mid;
5302 mid = (min + mid)/2;
5303 }
5304 else break; /* Some other error */
5305 }
5306
5307 return capcount;
5308 }
5309
5310
5311
5312 /*************************************************
5313 * Callout function *
5314 *************************************************/
5315
5316 /* Called from a PCRE2 library as a result of the (?C) item. We print out where
5317 we are in the match. Yield zero unless more callouts than the fail count, or
5318 the callout data is not zero. The only differences in the callout block for
5319 different code unit widths are that the pointers to the subject, the most
5320 recent MARK, and a callout argument string point to strings of the appropriate
5321 width. Casts can be used to deal with this.
5322
5323 Argument: a pointer to a callout block
5324 Return:
5325 */
5326
5327 static int
5328 callout_function(pcre2_callout_block_8 *cb, void *callout_data_ptr)
5329 {
5330 uint32_t i, pre_start, post_start, subject_length;
5331 PCRE2_SIZE current_position;
5332 BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
5333 BOOL callout_capture = (dat_datctl.control & CTL_CALLOUT_CAPTURE) != 0;
5334
5335 /* This FILE is used for echoing the subject. This is done only once in simple
5336 cases. */
5337
5338 FILE *f = (first_callout || callout_capture || cb->callout_string != NULL)?
5339 outfile : NULL;
5340
5341 /* For a callout with a string argument, show the string first because there
5342 isn't a tidy way to fit it in the rest of the data. */
5343
5344 if (cb->callout_string != NULL)
5345 {
5346 uint32_t delimiter = CODE_UNIT(cb->callout_string, -1);
5347 fprintf(outfile, "Callout (%lu): %c",
5348 (unsigned long int)cb->callout_string_offset, delimiter);
5349 PCHARSV(cb->callout_string, 0,
5350 cb->callout_string_length, utf, outfile);
5351 for (i = 0; callout_start_delims[i] != 0; i++)
5352 if (delimiter == callout_start_delims[i])
5353 {
5354 delimiter = callout_end_delims[i];
5355 break;
5356 }
5357 fprintf(outfile, "%c", delimiter);
5358 if (!callout_capture) fprintf(outfile, "\n");
5359 }
5360
5361 /* Show captured strings if required */
5362
5363 if (callout_capture)
5364 {
5365 if (cb->callout_string == NULL)
5366 fprintf(outfile, "Callout %d:", cb->callout_number);
5367 fprintf(outfile, " last capture = %d\n", cb->capture_last);
5368 for (i = 2; i < cb->capture_top * 2; i += 2)
5369 {
5370 fprintf(outfile, "%2d: ", i/2);
5371 if (cb->offset_vector[i] == PCRE2_UNSET)
5372 fprintf(outfile, "<unset>");
5373 else
5374 {
5375 PCHARSV(cb->subject, cb->offset_vector[i],
5376 cb->offset_vector[i+1] - cb->offset_vector[i], utf, f);
5377 }
5378 fprintf(outfile, "\n");
5379 }
5380 }
5381
5382 /* Re-print the subject in canonical form (with escapes for non-printing
5383 characters), the first time, or if giving full details. On subsequent calls in
5384 the same match, we use PCHARS() just to find the printed lengths of the
5385 substrings. */
5386
5387 if (f != NULL) fprintf(f, "--->");
5388
5389 /* The subject before the match start. */
5390
5391 PCHARS(pre_start, cb->subject, 0, cb->start_match, utf, f);
5392
5393 /* If a lookbehind is involved, the current position may be earlier than the
5394 match start. If so, use the match start instead. */
5395
5396 current_position = (cb->current_position >= cb->start_match)?
5397 cb->current_position : cb->start_match;
5398
5399 /* The subject between the match start and the current position. */
5400
5401 PCHARS(post_start, cb->subject, cb->start_match,
5402 current_position - cb->start_match, utf, f);
5403
5404 /* Print from the current position to the end. */
5405
5406 PCHARSV(cb->subject, current_position, cb->subject_length - current_position,
5407 utf, f);
5408
5409 /* Calculate the total subject printed length (no print). */
5410
5411 PCHARS(subject_length, cb->subject, 0, cb->subject_length, utf, NULL);
5412
5413 if (f != NULL) fprintf(f, "\n");
5414
5415 /* For automatic callouts, show the pattern offset. Otherwise, for a numerical
5416 callout whose number has not already been shown with captured strings, show the
5417 number here. A callout with a string argument has been displayed above. */
5418
5419 if (cb->callout_number == 255)
5420 {
5421 fprintf(outfile, "%+3d ", (int)cb->pattern_position);
5422 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
5423 }
5424 else
5425 {
5426 if (callout_capture || cb->callout_string != NULL) fprintf(outfile, " ");
5427 else fprintf(outfile, "%3d ", cb->callout_number);
5428 }
5429
5430 /* Now show position indicators */
5431
5432 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
5433 fprintf(outfile, "^");
5434
5435 if (post_start > 0)
5436 {
5437 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
5438 fprintf(outfile, "^");
5439 }
5440
5441 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
5442 fprintf(outfile, " ");
5443
5444 if (cb->next_item_length != 0)
5445 fprintf(outfile, "%.*s", (int)(cb->next_item_length),
5446 pbuffer8 + cb->pattern_position);
5447
5448 fprintf(outfile, "\n");
5449 first_callout = FALSE;
5450
5451 if (cb->mark != last_callout_mark)
5452 {
5453 if (cb->mark == NULL)
5454 fprintf(outfile, "Latest Mark: <unset>\n");
5455 else
5456 {
5457 fprintf(outfile, "Latest Mark: ");
5458 PCHARSV(cb->mark, 0, -1, utf, outfile);
5459 putc('\n', outfile);
5460 }
5461 last_callout_mark = cb->mark;
5462 }
5463
5464 if (callout_data_ptr != NULL)
5465 {
5466 int callout_data = *((int32_t *)callout_data_ptr);
5467 if (callout_data != 0)
5468 {
5469 fprintf(outfile, "Callout data = %d\n", callout_data);
5470 return callout_data;
5471 }
5472 }
5473
5474 callout_count++;
5475
5476 if (cb->callout_number == dat_datctl.cerror[0] &&
5477 callout_count >= dat_datctl.cerror[1])
5478 return PCRE2_ERROR_CALLOUT;
5479
5480 if (cb->callout_number == dat_datctl.cfail[0] &&
5481 callout_count >= dat_datctl.cfail[1])
5482 return 1;
5483
5484 return 0;
5485 }
5486
5487
5488
5489 /*************************************************
5490 * Handle *MARK and copy/get tests *
5491 *************************************************/
5492
5493 /* This function is called after complete and partial matches. It runs the
5494 tests for substring extraction.
5495
5496 Arguments:
5497 utf TRUE for utf
5498 capcount return from pcre2_match()
5499
5500 Returns: nothing
5501 */
5502
5503 static void
5504 copy_and_get(BOOL utf, int capcount)
5505 {
5506 int i;
5507 uint8_t *nptr;
5508
5509 /* Test copy strings by number */
5510
5511 for (i = 0; i < MAXCPYGET && dat_datctl.copy_numbers[i] >= 0; i++)
5512 {
5513 int rc;
5514 PCRE2_SIZE length, length2;
5515 uint32_t copybuffer[256];
5516 uint32_t n = (uint32_t)(dat_datctl.copy_numbers[i]);
5517 length = sizeof(copybuffer)/code_unit_size;
5518 PCRE2_SUBSTRING_COPY_BYNUMBER(rc, match_data, n, copybuffer, &length);
5519 if (rc < 0)
5520 {
5521 fprintf(outfile, "Copy substring %d failed (%d): ", n, rc);
5522 PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
5523 PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
5524 fprintf(outfile, "\n");
5525 }
5526 else
5527 {
5528 PCRE2_SUBSTRING_LENGTH_BYNUMBER(rc, match_data, n, &length2);
5529 if (rc < 0)
5530 {
5531 fprintf(outfile, "Get substring %d length failed (%d): ", n, rc);
5532 PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
5533 PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
5534 fprintf(outfile, "\n");
5535 }
5536 else if (length2 != length)
5537 {
5538 fprintf(outfile, "Mismatched substring lengths: %lu %lu\n",
5539 (unsigned long int)length, (unsigned long int)length2);
5540 }
5541 fprintf(outfile, "%2dC ", n);
5542 PCHARSV(copybuffer, 0, length, utf, outfile);
5543 fprintf(outfile, " (%lu)\n", (unsigned long)length);
5544 }
5545 }
5546
5547 /* Test copy strings by name */
5548
5549 nptr = dat_datctl.copy_names;
5550 for (;;)
5551 {
5552 int rc;
5553 int groupnumber;
5554 PCRE2_SIZE length, length2;
5555 uint32_t copybuffer[256];
5556 int namelen = strlen((const char *)nptr);
5557 #if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32
5558 PCRE2_SIZE cnl = namelen;
5559 #endif
5560 if (namelen == 0) break;
5561
5562 #ifdef SUPPORT_PCRE2_8
5563 if (test_mode == PCRE8_MODE) strcpy((char *)pbuffer8, (char *)nptr);
5564 #endif
5565 #ifdef SUPPORT_PCRE2_16
5566 if (test_mode == PCRE16_MODE)(void)to16(nptr, utf, &cnl);
5567 #endif
5568 #ifdef SUPPORT_PCRE2_32
5569 if (test_mode == PCRE32_MODE)(void)to32(nptr, utf, &cnl);
5570 #endif
5571
5572 PCRE2_SUBSTRING_NUMBER_FROM_NAME(groupnumber, compiled_code, pbuffer);
5573 if (groupnumber < 0 && groupnumber != PCRE2_ERROR_NOUNIQUESUBSTRING)
5574 fprintf(outfile, "Number not found for group '%s'\n", nptr);
5575
5576 length = sizeof(copybuffer)/code_unit_size;
5577 PCRE2_SUBSTRING_COPY_BYNAME(rc, match_data, pbuffer, copybuffer, &length);
5578 if (rc < 0)
5579 {
5580 fprintf(outfile, "Copy substring '%s' failed (%d): ", nptr, rc);
5581 PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
5582 PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
5583 fprintf(outfile, "\n");
5584 }
5585 else
5586 {
5587 PCRE2_SUBSTRING_LENGTH_BYNAME(rc, match_data, pbuffer, &length2);
5588 if (rc < 0)
5589 {
5590 fprintf(outfile, "Get substring '%s' length failed (%d): ", nptr, rc);
5591 PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
5592 PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
5593 fprintf(outfile, "\n");
5594 }
5595 else if (length2 != length)
5596 {
5597 fprintf(outfile, "Mismatched substring lengths: %lu %lu\n",
5598 (unsigned long int)length, (unsigned long int)length2);
5599 }
5600 fprintf(outfile, " C ");
5601 PCHARSV(copybuffer, 0, length, utf, outfile);
5602 fprintf(outfile, " (%lu) %s", (unsigned long)length, nptr);
5603 if (groupnumber >= 0) fprintf(outfile, " (group %d)\n", groupnumber);
5604 else fprintf(outfile, " (non-unique)\n");
5605 }
5606 nptr += namelen + 1;
5607 }
5608
5609 /* Test get strings by number */
5610
5611 for (i = 0; i < MAXCPYGET && dat_datctl.get_numbers[i] >= 0; i++)
5612 {
5613 int rc;
5614 PCRE2_SIZE length;
5615 void *gotbuffer;
5616 uint32_t n = (uint32_t)(dat_datctl.get_numbers[i]);
5617 PCRE2_SUBSTRING_GET_BYNUMBER(rc, match_data, n, &gotbuffer, &length);
5618 if (rc < 0)
5619 {
5620 fprintf(outfile, "Get substring %d failed (%d): ", n, rc);
5621 PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
5622 PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
5623 fprintf(outfile, "\n");
5624 }
5625 else
5626 {
5627 fprintf(outfile, "%2dG ", n);
5628 PCHARSV(gotbuffer, 0, length, utf, outfile);
5629 fprintf(outfile, " (%lu)\n", (unsigned long)length);
5630 PCRE2_SUBSTRING_FREE(gotbuffer);
5631 }
5632 }
5633
5634 /* Test get strings by name */
5635
5636 nptr = dat_datctl.get_names;
5637 for (;;)
5638 {
5639 PCRE2_SIZE length;
5640 void *gotbuffer;
5641 int rc;
5642 int groupnumber;
5643 int namelen = strlen((const char *)nptr);
5644 #if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32
5645 PCRE2_SIZE cnl = namelen;
5646 #endif
5647 if (namelen == 0) break;
5648
5649 #ifdef SUPPORT_PCRE2_8
5650 if (test_mode == PCRE8_MODE) strcpy((char *)pbuffer8, (char *)nptr);
5651 #endif
5652 #ifdef SUPPORT_PCRE2_16
5653 if (test_mode == PCRE16_MODE)(void)to16(nptr, utf, &cnl);
5654 #endif
5655 #ifdef SUPPORT_PCRE2_32
5656 if (test_mode == PCRE32_MODE)(void)to32(nptr, utf, &cnl);
5657 #endif
5658
5659 PCRE2_SUBSTRING_NUMBER_FROM_NAME(groupnumber, compiled_code, pbuffer);
5660 if (groupnumber < 0 && groupnumber != PCRE2_ERROR_NOUNIQUESUBSTRING)
5661 fprintf(outfile, "Number not found for group '%s'\n", nptr);
5662
5663 PCRE2_SUBSTRING_GET_BYNAME(rc, match_data, pbuffer, &gotbuffer, &length);
5664 if (rc < 0)
5665 {
5666 fprintf(outfile, "Get substring '%s' failed (%d): ", nptr, rc);
5667 PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
5668 PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
5669 fprintf(outfile, "\n");
5670 }
5671 else
5672 {
5673 fprintf(outfile, " G ");
5674 PCHARSV(gotbuffer, 0, length, utf, outfile);
5675 fprintf(outfile, " (%lu) %s", (unsigned long)length, nptr);
5676 if (groupnumber >= 0) fprintf(outfile, " (group %d)\n", groupnumber);
5677 else fprintf(outfile, " (non-unique)\n");
5678 PCRE2_SUBSTRING_FREE(gotbuffer);
5679 }
5680 nptr += namelen + 1;
5681 }
5682
5683 /* Test getting the complete list of captured strings. */
5684
5685 if ((dat_datctl.control & CTL_GETALL) != 0)
5686 {
5687 int rc;
5688 void **stringlist;
5689 PCRE2_SIZE *lengths;
5690 PCRE2_SUBSTRING_LIST_GET(rc, match_data, &stringlist, &lengths);
5691 if (rc < 0)
5692 {
5693 fprintf(outfile, "get substring list failed (%d): ", rc);
5694 PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
5695 PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
5696 fprintf(outfile, "\n");
5697 }
5698 else
5699 {
5700 for (i = 0; i < capcount; i++)
5701 {
5702 fprintf(outfile, "%2dL ", i);
5703 PCHARSV(stringlist[i], 0, lengths[i], utf, outfile);
5704 putc('\n', outfile);
5705 }
5706 if (stringlist[i] != NULL)
5707 fprintf(outfile, "string list not terminated by NULL\n");
5708 PCRE2_SUBSTRING_LIST_FREE(stringlist);
5709 }
5710 }
5711 }
5712
5713
5714
5715 /*************************************************
5716 * Process a data line *
5717 *************************************************/
5718
5719 /* The line is in buffer; it will not be empty.
5720
5721 Arguments: none
5722
5723 Returns: PR_OK continue processing next line
5724 PR_SKIP skip to a blank line
5725 PR_ABEND abort the pcre2test run
5726 */
5727
5728 static int
5729 process_data(void)
5730 {
5731 PCRE2_SIZE len, ulen, arg_ulen;
5732 uint32_t gmatched;
5733 uint32_t c, k;
5734 uint32_t g_notempty = 0;
5735 uint8_t *p, *pp, *start_rep;
5736 size_t needlen;
5737 void *use_dat_context;
5738 BOOL utf;
5739
5740 #ifdef SUPPORT_PCRE2_8
5741 uint8_t *q8 = NULL;
5742 #endif
5743 #ifdef SUPPORT_PCRE2_16
5744 uint16_t *q16 = NULL;
5745 #endif
5746 #ifdef SUPPORT_PCRE2_32
5747 uint32_t *q32 = NULL;
5748 #endif
5749
5750 /* Copy the default context and data control blocks to the active ones. Then
5751 copy from the pattern the controls that can be set in either the pattern or the
5752 data. This allows them to be overridden in the data line. We do not do this for
5753 options because those that are common apply separately to compiling and
5754 matching. */
5755
5756 DATCTXCPY(dat_context, default_dat_context);
5757 memcpy(&dat_datctl, &def_datctl, sizeof(datctl));
5758 dat_datctl.control |= (pat_patctl.control & CTL_ALLPD);
5759 dat_datctl.control2 |= (pat_patctl.control2 & CTL2_ALLPD);
5760 strcpy((char *)dat_datctl.replacement, (char *)pat_patctl.replacement);
5761
5762 /* Initialize for scanning the data line. */
5763
5764 #ifdef SUPPORT_PCRE2_8
5765 utf = ((((pat_patctl.control & CTL_POSIX) != 0)?
5766 ((pcre2_real_code_8 *)preg.re_pcre2_code)->overall_options :
5767 FLD(compiled_code, overall_options)) & PCRE2_UTF) != 0;
5768 #else
5769 utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
5770 #endif
5771
5772 start_rep = NULL;
5773 len = strlen((const char *)buffer);
5774 while (len > 0 && isspace(buffer[len-1])) len--;
5775 buffer[len] = 0;
5776 p = buffer;
5777 while (isspace(*p)) p++;
5778
5779 /* Check that the data is well-formed UTF-8 if we're in UTF mode. To create
5780 invalid input to pcre2_match(), you must use \x?? or \x{} sequences. */
5781
5782 if (utf)
5783 {
5784 uint8_t *q;
5785 uint32_t cc;
5786 int n = 1;
5787 for (q = p; n > 0 && *q; q += n) n = utf82ord(q, &cc);
5788 if (n <= 0)
5789 {
5790 fprintf(outfile, "** Failed: invalid UTF-8 string cannot be used as input "
5791 "in UTF mode\n");
5792 return PR_OK;
5793 }
5794 }
5795
5796 #ifdef SUPPORT_VALGRIND
5797 /* Mark the dbuffer as addressable but undefined again. */
5798 if (dbuffer != NULL)
5799 {
5800 VALGRIND_MAKE_MEM_UNDEFINED(dbuffer, dbuffer_size);
5801 }
5802 #endif
5803
5804 /* Allocate a buffer to hold the data line; len+1 is an upper bound on
5805 the number of code units that will be needed (though the buffer may have to be
5806 extended if replication is involved). */
5807
5808 needlen = (size_t)((len+1) * code_unit_size);
5809 if (dbuffer == NULL || needlen >= dbuffer_size)
5810 {
5811 while (needlen >= dbuffer_size) dbuffer_size *= 2;
5812 dbuffer = (uint8_t *)realloc(dbuffer, dbuffer_size);
5813 if (dbuffer == NULL)
5814 {
5815 fprintf(stderr, "pcre2test: realloc(%d) failed\n", (int)dbuffer_size);
5816 exit(1);
5817 }
5818 }
5819 SETCASTPTR(q, dbuffer); /* Sets q8, q16, or q32, as appropriate. */
5820
5821 /* Scan the data line, interpreting data escapes, and put the result into a
5822 buffer of the appropriate width. In UTF mode, input is always UTF-8; otherwise,
5823 in 16- and 32-bit modes, it can be forced to UTF-8 by the utf8_input modifier.
5824 */
5825
5826 while ((c = *p++) != 0)
5827 {
5828 int32_t i = 0;
5829 size_t replen;
5830
5831 /* ] may mark the end of a replicated sequence */
5832
5833 if (c == ']' && start_rep != NULL)
5834 {
5835 long li;
5836 char *endptr;
5837 size_t qoffset = CAST8VAR(q) - dbuffer;
5838 size_t rep_offset = start_rep - dbuffer;
5839
5840 if (*p++ != '{')
5841 {
5842 fprintf(outfile, "** Expected '{' after \\[....]\n");
5843 return PR_OK;
5844 }
5845
5846 li = strtol((const char *)p, &endptr, 10);
5847 if (S32OVERFLOW(li))
5848 {
5849 fprintf(outfile, "** Repeat count too large\n");
5850 return PR_OK;
5851 }
5852
5853 p = (uint8_t *)endptr;
5854 if (*p++ != '}')
5855 {
5856 fprintf(outfile, "** Expected '}' after \\[...]{...\n");
5857 return PR_OK;
5858 }
5859
5860 i = (int32_t)li;
5861 if (i-- == 0)
5862 {
5863 fprintf(outfile, "** Zero repeat not allowed\n");
5864 return PR_OK;
5865 }
5866
5867 replen = CAST8VAR(q) - start_rep;
5868 needlen += replen * i;
5869
5870 if (needlen >= dbuffer_size)
5871 {
5872 while (needlen >= dbuffer_size) dbuffer_size *= 2;
5873 dbuffer = (uint8_t *)realloc(dbuffer, dbuffer_size);
5874 if (dbuffer == NULL)
5875 {
5876 fprintf(stderr, "pcre2test: realloc(%d) failed\n", (int)dbuffer_size);
5877 exit(1);
5878 }
5879 SETCASTPTR(q, dbuffer + qoffset);
5880 start_rep = dbuffer + rep_offset;
5881 }
5882
5883 while (i-- > 0)
5884 {
5885 memcpy(CAST8VAR(q), start_rep, replen);
5886 SETPLUS(q, replen/code_unit_size);
5887 }
5888
5889 start_rep = NULL;
5890 continue;
5891 }
5892
5893 /* Handle a non-escaped character. In non-UTF 32-bit mode with utf8_input
5894 set, do the fudge for setting the top bit. */
5895
5896 if (c != '\\')
5897 {
5898 uint32_t topbit = 0;
5899 if (test_mode == PCRE32_MODE && c == 0xff && *p != 0)
5900 {
5901 topbit = 0x80000000;
5902 c = *p++;
5903 }
5904 if ((utf || (pat_patctl.control & CTL_UTF8_INPUT) != 0) &&
5905 HASUTF8EXTRALEN(c)) { GETUTF8INC(c, p); }
5906 c |= topbit;
5907 }
5908
5909 /* Handle backslash escapes */
5910
5911 else switch ((c = *p++))
5912 {
5913 case '\\': break;
5914 case 'a': c = CHAR_BEL; break;
5915 case 'b': c = '\b'; break;
5916 case 'e': c = CHAR_ESC; break;
5917 case 'f': c = '\f'; break;
5918 case 'n': c = '\n'; break;
5919 case 'r': c = '\r'; break;
5920 case 't': c = '\t'; break;
5921 case 'v': c = '\v'; break;
5922
5923 case '0': case '1': case '2': case '3':
5924 case '4': case '5': case '6': case '7':
5925 c -= '0';
5926 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
5927 c = c * 8 + *p++ - '0';
5928 break;
5929
5930 case 'o':
5931 if (*p == '{')
5932 {
5933 uint8_t *pt = p;
5934 c = 0;
5935 for (pt++; isdigit(*pt) && *pt != '8' && *pt != '9'; pt++)
5936 {
5937 if (++i == 12)
5938 fprintf(outfile, "** Too many octal digits in \\o{...} item; "
5939 "using only the first twelve.\n");
5940 else c = c * 8 + *pt - '0';
5941 }
5942 if (*pt == '}') p = pt + 1;
5943 else fprintf(outfile, "** Missing } after \\o{ (assumed)\n");
5944 }
5945 break;
5946
5947 case 'x':
5948 if (*p == '{')
5949 {
5950 uint8_t *pt = p;
5951 c = 0;
5952
5953 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
5954 when isxdigit() is a macro that refers to its argument more than
5955 once. This is banned by the C Standard, but apparently happens in at
5956 least one MacOS environment. */
5957
5958 for (pt++; isxdigit(*pt); pt++)
5959 {
5960 if (++i == 9)
5961 fprintf(outfile, "** Too many hex digits in \\x{...} item; "
5962 "using only the first eight.\n");
5963 else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
5964 }
5965 if (*pt == '}')
5966 {
5967 p = pt + 1;
5968 break;
5969 }
5970 /* Not correct form for \x{...}; fall through */
5971 }
5972
5973 /* \x without {} always defines just one byte in 8-bit mode. This
5974 allows UTF-8 characters to be constructed byte by byte, and also allows
5975 invalid UTF-8 sequences to be made. Just copy the byte in UTF-8 mode.
5976 Otherwise, pass it down as data. */
5977
5978 c = 0;
5979 while (i++ < 2 && isxdigit(*p))
5980 {
5981 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
5982 p++;
5983 }
5984 #if defined SUPPORT_PCRE2_8
5985 if (utf && (test_mode == PCRE8_MODE))
5986 {
5987 *q8++ = c;
5988 continue;
5989 }
5990 #endif
5991 break;
5992
5993 case 0: /* \ followed by EOF allows for an empty line */
5994 p--;
5995 continue;
5996
5997 case '=': /* \= terminates the data, starts modifiers */
5998 goto ENDSTRING;
5999
6000 case '[': /* \[ introduces a replicated character sequence */
6001 if (start_rep != NULL)
6002 {
6003 fprintf(outfile, "** Nested replication is not supported\n");
6004 return PR_OK;
6005 }
6006 start_rep = CAST8VAR(q);
6007 continue;
6008
6009 default:
6010 if (isalnum(c))
6011 {
6012 fprintf(outfile, "** Unrecognized escape sequence \"\\%c\"\n", c);
6013 return PR_OK;
6014 }
6015 }
6016
6017 /* We now have a character value in c that may be greater than 255.
6018 In 8-bit mode we convert to UTF-8 if we are in UTF mode. Values greater
6019 than 127 in UTF mode must have come from \x{...} or octal constructs
6020 because values from \x.. get this far only in non-UTF mode. */
6021
6022 #ifdef SUPPORT_PCRE2_8
6023 if (test_mode == PCRE8_MODE)
6024 {
6025 if (utf)
6026 {
6027 if (c > 0x7fffffff)
6028 {
6029 fprintf(outfile, "** Character \\x{%x} is greater than 0x7fffffff "
6030 "and so cannot be converted to UTF-8\n", c);
6031 return PR_OK;
6032 }
6033 q8 += ord2utf8(c, q8);
6034 }
6035 else
6036 {
6037 if (c > 0xffu)
6038 {
6039 fprintf(outfile, "** Character \\x{%x} is greater than 255 "
6040 "and UTF-8 mode is not enabled.\n", c);
6041 fprintf(outfile, "** Truncation will probably give the wrong "
6042 "result.\n");
6043 }
6044 *q8++ = c;
6045 }
6046 }
6047 #endif
6048 #ifdef SUPPORT_PCRE2_16
6049 if (test_mode == PCRE16_MODE)
6050 {
6051 if (utf)
6052 {
6053 if (c > 0x10ffffu)
6054 {
6055 fprintf(outfile, "** Failed: character \\x{%x} is greater than "
6056 "0x10ffff and so cannot be converted to UTF-16\n", c);
6057 return PR_OK;
6058 }
6059 else if (c >= 0x10000u)
6060 {
6061 c-= 0x10000u;
6062 *q16++ = 0xD800 | (c >> 10);
6063 *q16++ = 0xDC00 | (c & 0x3ff);
6064 }
6065 else
6066 *q16++ = c;
6067 }
6068 else
6069 {
6070 if (c > 0xffffu)
6071 {
6072 fprintf(outfile, "** Character \\x{%x} is greater than 0xffff "
6073 "and UTF-16 mode is not enabled.\n", c);
6074 fprintf(outfile, "** Truncation will probably give the wrong "
6075 "result.\n");
6076 }
6077
6078 *q16++ = c;
6079 }
6080 }
6081 #endif
6082 #ifdef SUPPORT_PCRE2_32
6083 if (test_mode == PCRE32_MODE)
6084 {
6085 *q32++ = c;
6086 }
6087 #endif
6088 }
6089
6090 ENDSTRING:
6091 SET(*q, 0);
6092 len = CASTVAR(uint8_t *, q) - dbuffer; /* Length in bytes */
6093 ulen = len/code_unit_size; /* Length in code units */
6094 arg_ulen = ulen; /* Value to use in match arg */
6095
6096 /* If the string was terminated by \= we must now interpret modifiers. */
6097
6098 if (p[-1] != 0 && !decode_modifiers(p, CTX_DAT, NULL, &dat_datctl))
6099 return PR_OK;
6100
6101 /* Check for mutually exclusive modifiers. At present, these are all in the
6102 first control word. */
6103
6104 for (k = 0; k < sizeof(exclusive_dat_controls)/sizeof(uint32_t); k++)
6105 {
6106 c = dat_datctl.control & exclusive_dat_controls[k];
6107 if (c != 0 && c != (c & (~c+1)))
6108 {
6109 show_controls(c, 0, "** Not allowed together:");
6110 fprintf(outfile, "\n");
6111 return PR_OK;
6112 }
6113 }
6114
6115 if (pat_patctl.replacement[0] != 0 &&
6116 (dat_datctl.control & CTL_NULLCONTEXT) != 0)
6117 {
6118 fprintf(outfile, "** Replacement text is not supported with null_context.\n");
6119 return PR_OK;
6120 }
6121
6122 /* We now have the subject in dbuffer, with len containing the byte length, and
6123 ulen containing the code unit length, with a copy in arg_ulen for use in match
6124 function arguments (this gets changed to PCRE2_ZERO_TERMINATED when the
6125 zero_terminate modifier is present).
6126
6127 Move the data to the end of the buffer so that a read over the end can be
6128 caught by valgrind or other means. If we have explicit valgrind support, mark
6129 the unused start of the buffer unaddressable. If we are using the POSIX
6130 interface, or testing zero-termination, we must include the terminating zero in
6131 the usable data. */
6132
6133 c = code_unit_size * (((pat_patctl.control & CTL_POSIX) +
6134 (dat_datctl.control & CTL_ZERO_TERMINATE) != 0)? 1:0);
6135 pp = memmove(dbuffer + dbuffer_size - len - c, dbuffer, len + c);
6136 #ifdef SUPPORT_VALGRIND
6137 VALGRIND_MAKE_MEM_NOACCESS(dbuffer, dbuffer_size - (len + c));
6138 #endif
6139
6140 /* Now pp points to the subject string. POSIX matching is only possible in
6141 8-bit mode, and it does not support timing or other fancy features. Some were
6142 checked at compile time, but we need to check the match-time settings here. */
6143
6144 #ifdef SUPPORT_PCRE2_8
6145 if ((pat_patctl.control & CTL_POSIX) != 0)
6146 {
6147 int rc;
6148 int eflags = 0;
6149 regmatch_t *pmatch = NULL;
6150 const char *msg = "** Ignored with POSIX interface:";
6151
6152 if (dat_datctl.cerror[0] != CFORE_UNSET || dat_datctl.cerror[1] != CFORE_UNSET)
6153 prmsg(&msg, "callout_error");
6154 if (dat_datctl.cfail[0] != CFORE_UNSET || dat_datctl.cfail[1] != CFORE_UNSET)
6155 prmsg(&msg, "callout_fail");
6156 if (dat_datctl.copy_numbers[0] >= 0 || dat_datctl.copy_names[0] != 0)
6157 prmsg(&msg, "copy");
6158 if (dat_datctl.get_numbers[0] >= 0 || dat_datctl.get_names[0] != 0)
6159 prmsg(&msg, "get");
6160 if (dat_datctl.jitstack != 0) prmsg(&msg, "jitstack");
6161 if (dat_datctl.offset != 0) prmsg(&msg, "offset");
6162
6163 if ((dat_datctl.options & ~POSIX_SUPPORTED_MATCH_OPTIONS) != 0)
6164 {
6165 fprintf(outfile, "%s", msg);
6166 show_match_options(dat_datctl.options & ~POSIX_SUPPORTED_MATCH_OPTIONS);
6167 msg = "";
6168 }
6169 if ((dat_datctl.control & ~POSIX_SUPPORTED_MATCH_CONTROLS) != 0 ||
6170 (dat_datctl.control2 & ~POSIX_SUPPORTED_MATCH_CONTROLS2) != 0)
6171 {
6172 show_controls(dat_datctl.control & ~POSIX_SUPPORTED_MATCH_CONTROLS,
6173 dat_datctl.control2 & ~POSIX_SUPPORTED_MATCH_CONTROLS2, msg);
6174 msg = "";
6175 }
6176
6177 if (msg[0] == 0) fprintf(outfile, "\n");
6178
6179 if (dat_datctl.oveccount > 0)
6180 {
6181 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * dat_datctl.oveccount);
6182 if (pmatch == NULL)
6183 {
6184 fprintf(outfile, "** Failed to get memory for recording matching "
6185 "information (size set = %du)\n", dat_datctl.oveccount);
6186 return PR_OK;
6187 }
6188 }
6189
6190 if ((dat_datctl.options & PCRE2_NOTBOL) != 0) eflags |= REG_NOTBOL;
6191 if ((dat_datctl.options & PCRE2_NOTEOL) != 0) eflags |= REG_NOTEOL;
6192 if ((dat_datctl.options & PCRE2_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
6193
6194 rc = regexec(&preg, (const char *)pp, dat_datctl.oveccount, pmatch, eflags);
6195 if (rc != 0)
6196 {
6197 (void)regerror(rc, &preg, (char *)pbuffer8, pbuffer8_size);
6198 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, pbuffer8);
6199 }
6200 else if ((pat_patctl.control & CTL_POSIX_NOSUB) != 0)
6201 fprintf(outfile, "Matched with REG_NOSUB\n");
6202 else if (dat_datctl.oveccount == 0)
6203 fprintf(outfile, "Matched without capture\n");
6204 else
6205 {
6206 size_t i;
6207 for (i = 0; i < (size_t)dat_datctl.oveccount; i++)
6208 {
6209 if (pmatch[i].rm_so >= 0)
6210 {
6211 PCRE2_SIZE start = pmatch[i].rm_so;
6212 PCRE2_SIZE end = pmatch[i].rm_eo;
6213 if (start > end)
6214 {
6215 start = pmatch[i].rm_eo;
6216 end = pmatch[i].rm_so;
6217 fprintf(outfile, "Start of matched string is beyond its end - "
6218 "displaying from end to start.\n");
6219 }
6220 fprintf(outfile, "%2d: ", (int)i);
6221 PCHARSV(pp, start, end - start, utf, outfile);
6222 fprintf(outfile, "\n");
6223
6224 if ((i == 0 && (dat_datctl.control & CTL_AFTERTEXT) != 0) ||
6225 (dat_datctl.control & CTL_ALLAFTERTEXT) != 0)
6226 {
6227 fprintf(outfile, "%2d+ ", (int)i);
6228 /* Note: don't use the start/end variables here because we want to
6229 show the text from what is reported as the end. */
6230 PCHARSV(pp, pmatch[i].rm_eo, len - pmatch[i].rm_eo, utf, outfile);
6231 fprintf(outfile, "\n"); }
6232 }
6233 }
6234 }
6235 free(pmatch);
6236 return PR_OK;
6237 }
6238 #endif /* SUPPORT_PCRE2_8 */
6239
6240 /* Handle matching via the native interface. Check for consistency of
6241 modifiers. */
6242
6243 if ((dat_datctl.control & (CTL_DFA|CTL_FINDLIMITS)) == (CTL_DFA|CTL_FINDLIMITS))
6244 {
6245 fprintf(outfile, "** Finding match limits is not relevant for DFA matching: ignored\n");
6246 dat_datctl.control &= ~CTL_FINDLIMITS;
6247 }
6248
6249 /* ALLUSEDTEXT is not supported with JIT, but JIT is not used with DFA
6250 matching, even if the JIT compiler was used. */
6251
6252 if ((dat_datctl.control & (CTL_ALLUSEDTEXT|CTL_DFA)) == CTL_ALLUSEDTEXT &&
6253 FLD(compiled_code, executable_jit) != NULL)
6254 {
6255 fprintf(outfile, "** Showing all consulted text is not supported by JIT: ignored\n");
6256 dat_datctl.control &= ~CTL_ALLUSEDTEXT;
6257 }
6258
6259 /* Handle passing the subject as zero-terminated. */
6260
6261 if ((dat_datctl.control & CTL_ZERO_TERMINATE) != 0)
6262 arg_ulen = PCRE2_ZERO_TERMINATED;
6263
6264 /* The nullcontext modifier is used to test calling pcre2_[jit_]match() with a
6265 NULL context. */
6266
6267 use_dat_context = ((dat_datctl.control & CTL_NULLCONTEXT) != 0)?
6268 NULL : PTR(dat_context);
6269
6270 /* Enable display of malloc/free if wanted. */
6271
6272 show_memory = (dat_datctl.control & CTL_MEMORY) != 0;
6273
6274 /* Create and assign a JIT stack if requested. */
6275
6276 if (dat_datctl.jitstack != 0)
6277 {
6278 if (dat_datctl.jitstack != jit_stack_size)
6279 {
6280 PCRE2_JIT_STACK_FREE(jit_stack);
6281 PCRE2_JIT_STACK_CREATE(jit_stack, 1, dat_datctl.jitstack * 1024, NULL);
6282 jit_stack_size = dat_datctl.jitstack;
6283 }
6284 PCRE2_JIT_STACK_ASSIGN(dat_context, jit_callback, jit_stack);
6285 }
6286
6287 /* Or de-assign */
6288
6289 else if (jit_stack != NULL)
6290 {
6291 PCRE2_JIT_STACK_ASSIGN(dat_context, NULL, NULL);
6292 PCRE2_JIT_STACK_FREE(jit_stack);
6293 jit_stack = NULL;
6294 jit_stack_size = 0;
6295 }
6296
6297 /* When no JIT stack is assigned, we must ensure that there is a JIT callback
6298 if we want to verify that JIT was actually used. */
6299
6300 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_stack == NULL)
6301 {
6302 PCRE2_JIT_STACK_ASSIGN(dat_context, jit_callback, NULL);
6303 }
6304
6305 /* Adjust match_data according to size of offsets required. A size of zero
6306 causes a new match data block to be obtained that exactly fits the pattern. */
6307
6308 if (dat_datctl.oveccount == 0)
6309 {
6310 PCRE2_MATCH_DATA_FREE(match_data);
6311 PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(match_data, compiled_code, NULL);
6312 PCRE2_GET_OVECTOR_COUNT(max_oveccount, match_data);
6313 }
6314 else if (dat_datctl.oveccount <= max_oveccount)
6315 {
6316 SETFLD(match_data, oveccount, dat_datctl.oveccount);
6317 }
6318 else
6319 {
6320 max_oveccount = dat_datctl.oveccount;
6321 PCRE2_MATCH_DATA_FREE(match_data);
6322 PCRE2_MATCH_DATA_CREATE(match_data, max_oveccount, NULL);
6323 }
6324
6325 if (CASTVAR(void *, match_data) == NULL)
6326 {
6327 fprintf(outfile, "** Failed to get memory for recording matching "
6328 "information (size requested: %d)\n", dat_datctl.oveccount);
6329 max_oveccount = 0;
6330 return PR_OK;
6331 }
6332
6333 /* Replacement processing is ignored for DFA matching. */
6334
6335 if (dat_datctl.replacement[0] != 0 && (dat_datctl.control & CTL_DFA) != 0)
6336 {
6337 fprintf(outfile, "** Ignored for DFA matching: replace\n");
6338 dat_datctl.replacement[0] = 0;
6339 }
6340
6341 /* If a replacement string is provided, call pcre2_substitute() instead of one
6342 of the matching functions. First we have to convert the replacement string to
6343 the appropriate width. */
6344
6345 if (dat_datctl.replacement[0] != 0)
6346 {
6347 int rc;
6348 uint8_t *pr;
6349 uint8_t rbuffer[REPLACE_BUFFSIZE];
6350 uint8_t nbuffer[REPLACE_BUFFSIZE];
6351 uint32_t xoptions;
6352 PCRE2_SIZE rlen, nsize, erroroffset;
6353 BOOL badutf = FALSE;
6354
6355 #ifdef SUPPORT_PCRE2_8
6356 uint8_t *r8 = NULL;
6357 #endif
6358 #ifdef SUPPORT_PCRE2_16
6359 uint16_t *r16 = NULL;
6360 #endif
6361 #ifdef SUPPORT_PCRE2_32
6362 uint32_t *r32 = NULL;
6363 #endif
6364
6365 if (timeitm)
6366 fprintf(outfile, "** Timing is not supported with replace: ignored\n");
6367
6368 xoptions = (((dat_datctl.control & CTL_GLOBAL) == 0)? 0 :
6369 PCRE2_SUBSTITUTE_GLOBAL) |
6370 (((dat_datctl.control2 & CTL2_SUBSTITUTE_EXTENDED) == 0)? 0 :
6371 PCRE2_SUBSTITUTE_EXTENDED) |
6372 (((dat_datctl.control2 & CTL2_SUBSTITUTE_OVERFLOW_LENGTH) == 0)? 0 :
6373 PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) |
6374 (((dat_datctl.control2 & CTL2_SUBSTITUTE_UNKNOWN_UNSET) == 0)? 0 :
6375 PCRE2_SUBSTITUTE_UNKNOWN_UNSET) |
6376 (((dat_datctl.control2 & CTL2_SUBSTITUTE_UNSET_EMPTY) == 0)? 0 :
6377 PCRE2_SUBSTITUTE_UNSET_EMPTY);
6378
6379 SETCASTPTR(r, rbuffer); /* Sets r8, r16, or r32, as appropriate. */
6380 pr = dat_datctl.replacement;
6381
6382 /* If the replacement starts with '[<number>]' we interpret that as length
6383 value for the replacement buffer. */
6384
6385 nsize = REPLACE_BUFFSIZE/code_unit_size;
6386 if (*pr == '[')
6387 {
6388 PCRE2_SIZE n = 0;
6389 while ((c = *(++pr)) >= CHAR_0 && c <= CHAR_9) n = n * 10 + c - CHAR_0;
6390 if (*pr++ != ']')
6391 {
6392 fprintf(outfile, "Bad buffer size in replacement string\n");
6393 return PR_OK;
6394 }
6395 if (n > nsize)
6396 {
6397 fprintf(outfile, "Replacement buffer setting (%lu) is too large "
6398 "(max %lu)\n", (unsigned long int)n, (unsigned long int)nsize);
6399 return PR_OK;
6400 }
6401 nsize = n;
6402 }
6403
6404 /* Now copy the replacement string to a buffer of the appropriate width. No
6405 escape processing is done for replacements. In UTF mode, check for an invalid
6406 UTF-8 input string, and if it is invalid, just copy its code units without
6407 UTF interpretation. This provides a means of checking that an invalid string
6408 is detected. Otherwise, UTF-8 can be used to include wide characters in a
6409 replacement. */
6410
6411 if (utf) badutf = valid_utf(pr, strlen((const char *)pr), &erroroffset);
6412
6413 /* Not UTF or invalid UTF-8: just copy the code units. */
6414
6415 if (!utf || badutf)
6416 {
6417 while ((c = *pr++) != 0)
6418 {
6419 #ifdef SUPPORT_PCRE2_8
6420 if (test_mode == PCRE8_MODE) *r8++ = c;
6421 #endif
6422 #ifdef SUPPORT_PCRE2_16
6423 if (test_mode == PCRE16_MODE) *r16++ = c;
6424 #endif
6425 #ifdef SUPPORT_PCRE2_32
6426 if (test_mode == PCRE32_MODE) *r32++ = c;
6427 #endif
6428 }
6429 }
6430
6431 /* Valid UTF-8 replacement string */
6432
6433 else while ((c = *pr++) != 0)
6434 {
6435 if (HASUTF8EXTRALEN(c)) { GETUTF8INC(c, pr); }
6436
6437 #ifdef SUPPORT_PCRE2_8
6438 if (test_mode == PCRE8_MODE) r8 += ord2utf8(c, r8);
6439 #endif
6440
6441 #ifdef SUPPORT_PCRE2_16
6442 if (test_mode == PCRE16_MODE)
6443 {
6444 if (c >= 0x10000u)
6445 {
6446 c-= 0x10000u;
6447 *r16++ = 0xD800 | (c >> 10);
6448 *r16++ = 0xDC00 | (c & 0x3ff);
6449 }
6450 else *r16++ = c;
6451 }
6452 #endif
6453
6454 #ifdef SUPPORT_PCRE2_32
6455 if (test_mode == PCRE32_MODE) *r32++ = c;
6456 #endif
6457 }
6458
6459 SET(*r, 0);
6460 if ((dat_datctl.control & CTL_ZERO_TERMINATE) != 0)
6461 rlen = PCRE2_ZERO_TERMINATED;
6462 else
6463 rlen = (CASTVAR(uint8_t *, r) - rbuffer)/code_unit_size;
6464 PCRE2_SUBSTITUTE(rc, compiled_code, pp, arg_ulen, dat_datctl.offset,
6465 dat_datctl.options|xoptions, match_data, dat_context,
6466 rbuffer, rlen, nbuffer, &nsize);
6467
6468 if (rc < 0)
6469 {
6470 PCRE2_SIZE msize;
6471 fprintf(outfile, "Failed: error %d", rc);
6472 if (rc != PCRE2_ERROR_NOMEMORY && nsize != PCRE2_UNSET)
6473 fprintf(outfile, " at offset %ld in replacement", (long int)nsize);
6474 fprintf(outfile, ": ");
6475 PCRE2_GET_ERROR_MESSAGE(msize, rc, pbuffer);
6476 PCHARSV(CASTVAR(void *, pbuffer), 0, msize, FALSE, outfile);
6477 if (rc == PCRE2_ERROR_NOMEMORY &&
6478 (xoptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) != 0)
6479 fprintf(outfile, ": %ld code units are needed", (long int)nsize);
6480 }
6481 else
6482 {
6483 fprintf(outfile, "%2d: ", rc);
6484 PCHARSV(nbuffer, 0, nsize, utf, outfile);
6485 }
6486
6487 fprintf(outfile, "\n");
6488 } /* End of substitution handling */
6489
6490 /* When a replacement string is not provided, run a loop for global matching
6491 with one of the basic matching functions. */
6492
6493 else for (gmatched = 0;; gmatched++)
6494 {
6495 PCRE2_SIZE j;
6496 int capcount;
6497 PCRE2_SIZE *ovector;
6498 PCRE2_SIZE ovecsave[2];
6499
6500 ovector = FLD(match_data, ovector);
6501
6502 /* After the first time round a global loop, for a normal global (/g)
6503 iteration, save the current ovector[0,1] so that we can check that they do
6504 change each time. Otherwise a matching bug that returns the same string
6505 causes an infinite loop. It has happened! */
6506
6507 if (gmatched > 0 && (dat_datctl.control & CTL_GLOBAL) != 0)
6508 {
6509 ovecsave[0] = ovector[0];
6510 ovecsave[1] = ovector[1];
6511 }
6512
6513 /* For altglobal (or first time round the loop), set an "unset" value. */
6514
6515 else ovecsave[0] = ovecsave[1] = PCRE2_UNSET;
6516
6517 /* Fill the ovector with junk to detect elements that do not get set
6518 when they should be. */
6519
6520 for (j = 0; j < 2*dat_datctl.oveccount; j++) ovector[j] = JUNK_OFFSET;
6521
6522 /* When matching is via pcre2_match(), we will detect the use of JIT via the
6523 stack callback function. */
6524
6525 jit_was_used = (pat_patctl.control & CTL_JITFAST) != 0;
6526
6527 /* Do timing if required. */
6528
6529 if (timeitm > 0)
6530 {
6531 int i;
6532 clock_t start_time, time_taken;
6533
6534 if ((dat_datctl.control & CTL_DFA) != 0)
6535 {
6536 if ((dat_datctl.options & PCRE2_DFA_RESTART) != 0)
6537 {
6538 fprintf(outfile, "Timing DFA restarts is not supported\n");
6539 return PR_OK;
6540 }
6541 if (dfa_workspace == NULL)
6542 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
6543 start_time = clock();
6544 for (i = 0; i < timeitm; i++)
6545 {
6546 PCRE2_DFA_MATCH(capcount, compiled_code, pp, arg_ulen,
6547 dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
6548 use_dat_context, dfa_workspace, DFA_WS_DIMENSION);
6549 }
6550 }
6551
6552 else if ((pat_patctl.control & CTL_JITFAST) != 0)
6553 {
6554 start_time = clock();
6555 for (i = 0; i < timeitm; i++)
6556 {
6557 PCRE2_JIT_MATCH(capcount, compiled_code, pp, arg_ulen,
6558 dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
6559 use_dat_context);
6560 }
6561 }
6562
6563 else
6564 {
6565 start_time = clock();
6566 for (i = 0; i < timeitm; i++)
6567 {
6568 PCRE2_MATCH(capcount, compiled_code, pp, arg_ulen,
6569 dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
6570 use_dat_context);
6571 }
6572 }
6573 total_match_time += (time_taken = clock() - start_time);
6574 fprintf(outfile, "Match time %.4f milliseconds\n",
6575 (((double)time_taken * 1000.0) / (double)timeitm) /
6576 (double)CLOCKS_PER_SEC);
6577 }
6578
6579 /* Find the match and depth limits if requested. The depth limit
6580 is not relevant for JIT. */
6581
6582 if ((dat_datctl.control & CTL_FINDLIMITS) != 0)
6583 {
6584 capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_MATCHLIMIT, "match");
6585 if (FLD(compiled_code, executable_jit) == NULL)
6586 (void)check_match_limit(pp, arg_ulen, PCRE2_ERROR_DEPTHLIMIT,
6587 "depth");
6588 }
6589
6590 /* Otherwise just run a single match, setting up a callout if required (the
6591 default). There is a copy of the pattern in pbuffer8 for use by callouts. */
6592
6593 else
6594 {
6595 if ((dat_datctl.control & CTL_CALLOUT_NONE) == 0)
6596 {
6597 PCRE2_SET_CALLOUT(dat_context, callout_function,
6598 (void *)(&dat_datctl.callout_data));
6599 first_callout = TRUE;
6600 last_callout_mark = NULL;
6601 callout_count = 0;
6602 }
6603 else
6604 {
6605 PCRE2_SET_CALLOUT(dat_context, NULL, NULL); /* No callout */
6606 }
6607
6608 /* Run a single DFA or