/[pcre2]/code/trunk/src/pcre2test.c
ViewVC logotype

Contents of /code/trunk/src/pcre2test.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 697 - (show annotations)
Tue Mar 21 18:36:13 2017 UTC (2 years, 2 months ago) by ph10
File MIME type: text/plain
File size: 258747 byte(s)
Error occurred while calculating annotation data.
Previous patch was not quite complete.
1 /*************************************************
2 * PCRE2 testing program *
3 *************************************************/
4
5 /* PCRE2 is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language. In 2014
7 the API was completely revised and '2' was added to the name, because the old
8 API, which had lasted for 16 years, could not accommodate new requirements. At
9 the same time, this testing program was re-designed because its original
10 hacked-up (non-) design had also run out of steam.
11
12 Written by Philip Hazel
13 Original code Copyright (c) 1997-2012 University of Cambridge
14 Rewritten code Copyright (c) 2016-2017 University of Cambridge
15
16 -----------------------------------------------------------------------------
17 Redistribution and use in source and binary forms, with or without
18 modification, are permitted provided that the following conditions are met:
19
20 * Redistributions of source code must retain the above copyright notice,
21 this list of conditions and the following disclaimer.
22
23 * Redistributions in binary form must reproduce the above copyright
24 notice, this list of conditions and the following disclaimer in the
25 documentation and/or other materials provided with the distribution.
26
27 * Neither the name of the University of Cambridge nor the names of its
28 contributors may be used to endorse or promote products derived from
29 this software without specific prior written permission.
30
31 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
32 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
35 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
36 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
37 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
38 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
39 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
41 POSSIBILITY OF SUCH DAMAGE.
42 -----------------------------------------------------------------------------
43 */
44
45
46 /* This program supports testing of the 8-bit, 16-bit, and 32-bit PCRE2
47 libraries in a single program, though its input and output are always 8-bit.
48 It is different from modules such as pcre2_compile.c in the library itself,
49 which are compiled separately for each code unit width. If two widths are
50 enabled, for example, pcre2_compile.c is compiled twice. In contrast,
51 pcre2test.c is compiled only once, and linked with all the enabled libraries.
52 Therefore, it must not make use of any of the macros from pcre2.h or
53 pcre2_internal.h that depend on PCRE2_CODE_UNIT_WIDTH. It does, however, make
54 use of SUPPORT_PCRE2_8, SUPPORT_PCRE2_16, and SUPPORT_PCRE2_32, to ensure that
55 it references only the enabled library functions. */
56
57 #ifdef HAVE_CONFIG_H
58 #include "config.h"
59 #endif
60
61 #include <ctype.h>
62 #include <stdio.h>
63 #include <string.h>
64 #include <stdlib.h>
65 #include <time.h>
66 #include <locale.h>
67 #include <errno.h>
68
69 #if defined NATIVE_ZOS
70 #include "pcrzoscs.h"
71 /* That header is not included in the main PCRE2 distribution because other
72 apparatus is needed to compile pcre2test for z/OS. The header can be found in
73 the special z/OS distribution, which is available from www.zaconsultants.net or
74 from www.cbttape.org. */
75 #endif
76
77 #ifdef HAVE_UNISTD_H
78 #include <unistd.h>
79 #endif
80
81 /* Both libreadline and libedit are optionally supported. The user-supplied
82 original patch uses readline/readline.h for libedit, but in at least one system
83 it is installed as editline/readline.h, so the configuration code now looks for
84 that first, falling back to readline/readline.h. */
85
86 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
87 #if defined(SUPPORT_LIBREADLINE)
88 #include <readline/readline.h>
89 #include <readline/history.h>
90 #else
91 #if defined(HAVE_EDITLINE_READLINE_H)
92 #include <editline/readline.h>
93 #else
94 #include <readline/readline.h>
95 #endif
96 #endif
97 #endif
98
99 /* Put the test for interactive input into a macro so that it can be changed if
100 required for different environments. */
101
102 #define INTERACTIVE(f) isatty(fileno(f))
103
104
105 /* ---------------------- System-specific definitions ---------------------- */
106
107 /* A number of things vary for Windows builds. Originally, pcretest opened its
108 input and output without "b"; then I was told that "b" was needed in some
109 environments, so it was added for release 5.0 to both the input and output. (It
110 makes no difference on Unix-like systems.) Later I was told that it is wrong
111 for the input on Windows. I've now abstracted the modes into macros that are
112 set here, to make it easier to fiddle with them, and removed "b" from the input
113 mode under Windows. The BINARY versions are used when saving/restoring compiled
114 patterns. */
115
116 #if defined(_WIN32) || defined(WIN32)
117 #include <io.h> /* For _setmode() */
118 #include <fcntl.h> /* For _O_BINARY */
119 #define INPUT_MODE "r"
120 #define OUTPUT_MODE "wb"
121 #define BINARY_INPUT_MODE "rb"
122 #define BINARY_OUTPUT_MODE "wb"
123
124 #ifndef isatty
125 #define isatty _isatty /* This is what Windows calls them, I'm told, */
126 #endif /* though in some environments they seem to */
127 /* be already defined, hence the #ifndefs. */
128 #ifndef fileno
129 #define fileno _fileno
130 #endif
131
132 /* A user sent this fix for Borland Builder 5 under Windows. */
133
134 #ifdef __BORLANDC__
135 #define _setmode(handle, mode) setmode(handle, mode)
136 #endif
137
138 /* Not Windows */
139
140 #else
141 #include <sys/time.h> /* These two includes are needed */
142 #include <sys/resource.h> /* for setrlimit(). */
143 #if defined NATIVE_ZOS /* z/OS uses non-binary I/O */
144 #define INPUT_MODE "r"
145 #define OUTPUT_MODE "w"
146 #define BINARY_INPUT_MODE "rb"
147 #define BINARY_OUTPUT_MODE "wb"
148 #else
149 #define INPUT_MODE "rb"
150 #define OUTPUT_MODE "wb"
151 #define BINARY_INPUT_MODE "rb"
152 #define BINARY_OUTPUT_MODE "wb"
153 #endif
154 #endif
155
156 #ifdef __VMS
157 #include <ssdef.h>
158 void vms_setsymbol( char *, char *, int );
159 #endif
160
161 /* VC doesn't support "%td". */
162 #ifdef _MSC_VER
163 #define PTR_SPEC "%lu"
164 #else
165 #define PTR_SPEC "%td"
166 #endif
167
168 /* ------------------End of system-specific definitions -------------------- */
169
170 /* Glueing macros that are used in several places below. */
171
172 #define glue(a,b) a##b
173 #define G(a,b) glue(a,b)
174
175 /* Miscellaneous parameters and manifests */
176
177 #ifndef CLOCKS_PER_SEC
178 #ifdef CLK_TCK
179 #define CLOCKS_PER_SEC CLK_TCK
180 #else
181 #define CLOCKS_PER_SEC 100
182 #endif
183 #endif
184
185 #define CFORE_UNSET UINT32_MAX /* Unset value for cfail/cerror fields */
186 #define DFA_WS_DIMENSION 1000 /* Size of DFA workspace */
187 #define DEFAULT_OVECCOUNT 15 /* Default ovector count */
188 #define JUNK_OFFSET 0xdeadbeef /* For initializing ovector */
189 #define LOCALESIZE 32 /* Size of locale name */
190 #define LOOPREPEAT 500000 /* Default loop count for timing */
191 #define PATSTACKSIZE 20 /* Pattern stack for save/restore testing */
192 #define REPLACE_MODSIZE 100 /* Field for reading 8-bit replacement */
193 #define VERSION_SIZE 64 /* Size of buffer for the version strings */
194
195 /* Make sure the buffer into which replacement strings are copied is big enough
196 to hold them as 32-bit code units. */
197
198 #define REPLACE_BUFFSIZE 1024 /* This is a byte value */
199
200 /* Execution modes */
201
202 #define PCRE8_MODE 8
203 #define PCRE16_MODE 16
204 #define PCRE32_MODE 32
205
206 /* Processing returns */
207
208 enum { PR_OK, PR_SKIP, PR_ABEND };
209
210 /* The macro PRINTABLE determines whether to print an output character as-is or
211 as a hex value when showing compiled patterns. is We use it in cases when the
212 locale has not been explicitly changed, so as to get consistent output from
213 systems that differ in their output from isprint() even in the "C" locale. */
214
215 #ifdef EBCDIC
216 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
217 #else
218 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
219 #endif
220
221 #define PRINTOK(c) ((use_tables != NULL && c < 256)? isprint(c) : PRINTABLE(c))
222
223 /* We have to include some of the library source files because we need
224 to use some of the macros, internal structure definitions, and other internal
225 values - pcre2test has "inside information" compared to an application program
226 that strictly follows the PCRE2 API.
227
228 Before including pcre2_internal.h we define PRIV so that it does not get
229 defined therein. This ensures that PRIV names in the included files do not
230 clash with those in the libraries. Also, although pcre2_internal.h does itself
231 include pcre2.h, we explicitly include it beforehand, along with pcre2posix.h,
232 so that the PCRE2_EXP_xxx macros get set appropriately for an application, not
233 for building the library. */
234
235 #define PRIV(name) name
236 #define PCRE2_CODE_UNIT_WIDTH 0
237 #include "pcre2.h"
238 #include "pcre2posix.h"
239 #include "pcre2_internal.h"
240
241 /* We need access to some of the data tables that PCRE2 uses. Defining
242 PCRE2_PCRETEST makes some minor changes in the files. The previous definition
243 of PRIV avoids name clashes. */
244
245 #define PCRE2_PCRE2TEST
246 #include "pcre2_tables.c"
247 #include "pcre2_ucd.c"
248
249 /* 32-bit integer values in the input are read by strtoul() or strtol(). The
250 check needed for overflow depends on whether long ints are in fact longer than
251 ints. They are defined not to be shorter. */
252
253 #if ULONG_MAX > UINT32_MAX
254 #define U32OVERFLOW(x) (x > UINT32_MAX)
255 #else
256 #define U32OVERFLOW(x) (x == UINT32_MAX)
257 #endif
258
259 #if LONG_MAX > INT32_MAX
260 #define S32OVERFLOW(x) (x > INT32_MAX || x < INT32_MIN)
261 #else
262 #define S32OVERFLOW(x) (x == INT32_MAX || x == INT32_MIN)
263 #endif
264
265 /* When PCRE2_CODE_UNIT_WIDTH is zero, pcre2_internal.h does not include
266 pcre2_intmodedep.h, which is where mode-dependent macros and structures are
267 defined. We can now include it for each supported code unit width. Because
268 PCRE2_CODE_UNIT_WIDTH was defined as zero before including pcre2.h, it will
269 have left PCRE2_SUFFIX defined as a no-op. We must re-define it appropriately
270 while including these files, and then restore it to a no-op. Because LINK_SIZE
271 may be changed in 16-bit mode and forced to 1 in 32-bit mode, the order of
272 these inclusions should not be changed. */
273
274 #undef PCRE2_SUFFIX
275 #undef PCRE2_CODE_UNIT_WIDTH
276
277 #ifdef SUPPORT_PCRE2_8
278 #define PCRE2_CODE_UNIT_WIDTH 8
279 #define PCRE2_SUFFIX(a) G(a,8)
280 #include "pcre2_intmodedep.h"
281 #include "pcre2_printint.c"
282 #undef PCRE2_CODE_UNIT_WIDTH
283 #undef PCRE2_SUFFIX
284 #endif /* SUPPORT_PCRE2_8 */
285
286 #ifdef SUPPORT_PCRE2_16
287 #define PCRE2_CODE_UNIT_WIDTH 16
288 #define PCRE2_SUFFIX(a) G(a,16)
289 #include "pcre2_intmodedep.h"
290 #include "pcre2_printint.c"
291 #undef PCRE2_CODE_UNIT_WIDTH
292 #undef PCRE2_SUFFIX
293 #endif /* SUPPORT_PCRE2_16 */
294
295 #ifdef SUPPORT_PCRE2_32
296 #define PCRE2_CODE_UNIT_WIDTH 32
297 #define PCRE2_SUFFIX(a) G(a,32)
298 #include "pcre2_intmodedep.h"
299 #include "pcre2_printint.c"
300 #undef PCRE2_CODE_UNIT_WIDTH
301 #undef PCRE2_SUFFIX
302 #endif /* SUPPORT_PCRE2_32 */
303
304 #define PCRE2_SUFFIX(a) a
305
306 /* We need to be able to check input text for UTF-8 validity, whatever code
307 widths are actually available, because the input to pcre2test is always in
308 8-bit code units. So we include the UTF validity checking function for 8-bit
309 code units. */
310
311 extern int valid_utf(PCRE2_SPTR8, PCRE2_SIZE, PCRE2_SIZE *);
312
313 #define PCRE2_CODE_UNIT_WIDTH 8
314 #undef PCRE2_SPTR
315 #define PCRE2_SPTR PCRE2_SPTR8
316 #include "pcre2_valid_utf.c"
317 #undef PCRE2_CODE_UNIT_WIDTH
318 #undef PCRE2_SPTR
319
320 /* If we have 8-bit support, default to it; if there is also 16-or 32-bit
321 support, it can be selected by a command-line option. If there is no 8-bit
322 support, there must be 16- or 32-bit support, so default to one of them. The
323 config function, JIT stack, contexts, and version string are the same in all
324 modes, so use the form of the first that is available. */
325
326 #if defined SUPPORT_PCRE2_8
327 #define DEFAULT_TEST_MODE PCRE8_MODE
328 #define VERSION_TYPE PCRE2_UCHAR8
329 #define PCRE2_CONFIG pcre2_config_8
330 #define PCRE2_JIT_STACK pcre2_jit_stack_8
331 #define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_8
332 #define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_8
333 #define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_8
334 #define VERSION_TYPE PCRE2_UCHAR8
335
336 #elif defined SUPPORT_PCRE2_16
337 #define DEFAULT_TEST_MODE PCRE16_MODE
338 #define VERSION_TYPE PCRE2_UCHAR16
339 #define PCRE2_CONFIG pcre2_config_16
340 #define PCRE2_JIT_STACK pcre2_jit_stack_16
341 #define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_16
342 #define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_16
343 #define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_16
344
345 #elif defined SUPPORT_PCRE2_32
346 #define DEFAULT_TEST_MODE PCRE32_MODE
347 #define VERSION_TYPE PCRE2_UCHAR32
348 #define PCRE2_CONFIG pcre2_config_32
349 #define PCRE2_JIT_STACK pcre2_jit_stack_32
350 #define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_32
351 #define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_32
352 #define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_32
353 #endif
354
355 /* ------------- Structure and table for handling #-commands ------------- */
356
357 typedef struct cmdstruct {
358 const char *name;
359 int value;
360 } cmdstruct;
361
362 enum { CMD_FORBID_UTF, CMD_LOAD, CMD_NEWLINE_DEFAULT, CMD_PATTERN,
363 CMD_PERLTEST, CMD_POP, CMD_POPCOPY, CMD_SAVE, CMD_SUBJECT, CMD_UNKNOWN };
364
365 static cmdstruct cmdlist[] = {
366 { "forbid_utf", CMD_FORBID_UTF },
367 { "load", CMD_LOAD },
368 { "newline_default", CMD_NEWLINE_DEFAULT },
369 { "pattern", CMD_PATTERN },
370 { "perltest", CMD_PERLTEST },
371 { "pop", CMD_POP },
372 { "popcopy", CMD_POPCOPY },
373 { "save", CMD_SAVE },
374 { "subject", CMD_SUBJECT }};
375
376 #define cmdlistcount sizeof(cmdlist)/sizeof(cmdstruct)
377
378 /* ------------- Structures and tables for handling modifiers -------------- */
379
380 /* Table of names for newline types. Must be kept in step with the definitions
381 of PCRE2_NEWLINE_xx in pcre2.h. */
382
383 static const char *newlines[] = {
384 "DEFAULT", "CR", "LF", "CRLF", "ANY", "ANYCRLF" };
385
386 /* Modifier types and applicability */
387
388 enum { MOD_CTC, /* Applies to a compile context */
389 MOD_CTM, /* Applies to a match context */
390 MOD_PAT, /* Applies to a pattern */
391 MOD_PATP, /* Ditto, OK for Perl test */
392 MOD_DAT, /* Applies to a data line */
393 MOD_PD, /* Applies to a pattern or a data line */
394 MOD_PDP, /* As MOD_PD, OK for Perl test */
395 MOD_PND, /* As MOD_PD, but not for a default pattern */
396 MOD_PNDP, /* As MOD_PND, OK for Perl test */
397 MOD_CTL, /* Is a control bit */
398 MOD_BSR, /* Is a BSR value */
399 MOD_IN2, /* Is one or two unsigned integers */
400 MOD_INS, /* Is a signed integer */
401 MOD_INT, /* Is an unsigned integer */
402 MOD_IND, /* Is an unsigned integer, but no value => default */
403 MOD_NL, /* Is a newline value */
404 MOD_NN, /* Is a number or a name; more than one may occur */
405 MOD_OPT, /* Is an option bit */
406 MOD_SIZ, /* Is a PCRE2_SIZE value */
407 MOD_STR }; /* Is a string */
408
409 /* Control bits. Some apply to compiling, some to matching, but some can be set
410 either on a pattern or a data line, so they must all be distinct. There are now
411 so many of them that they are split into two fields. */
412
413 #define CTL_AFTERTEXT 0x00000001u
414 #define CTL_ALLAFTERTEXT 0x00000002u
415 #define CTL_ALLCAPTURES 0x00000004u
416 #define CTL_ALLUSEDTEXT 0x00000008u
417 #define CTL_ALTGLOBAL 0x00000010u
418 #define CTL_BINCODE 0x00000020u
419 #define CTL_CALLOUT_CAPTURE 0x00000040u
420 #define CTL_CALLOUT_INFO 0x00000080u
421 #define CTL_CALLOUT_NONE 0x00000100u
422 #define CTL_DFA 0x00000200u
423 #define CTL_EXPAND 0x00000400u
424 #define CTL_FINDLIMITS 0x00000800u
425 #define CTL_FRAMESIZE 0x00001000u
426 #define CTL_FULLBINCODE 0x00002000u
427 #define CTL_GETALL 0x00004000u
428 #define CTL_GLOBAL 0x00008000u
429 #define CTL_HEXPAT 0x00010000u /* Same word as USE_LENGTH */
430 #define CTL_INFO 0x00020000u
431 #define CTL_JITFAST 0x00040000u
432 #define CTL_JITVERIFY 0x00080000u
433 #define CTL_MARK 0x00100000u
434 #define CTL_MEMORY 0x00200000u
435 #define CTL_NULLCONTEXT 0x00400000u
436 #define CTL_POSIX 0x00800000u
437 #define CTL_POSIX_NOSUB 0x01000000u
438 #define CTL_PUSH 0x02000000u /* These three must be */
439 #define CTL_PUSHCOPY 0x04000000u /* all in the same */
440 #define CTL_PUSHTABLESCOPY 0x08000000u /* word. */
441 #define CTL_STARTCHAR 0x10000000u
442 #define CTL_USE_LENGTH 0x20000000u /* Same word as HEXPAT */
443 #define CTL_UTF8_INPUT 0x40000000u
444 #define CTL_ZERO_TERMINATE 0x80000000u
445
446 /* Second control word */
447
448 #define CTL2_SUBSTITUTE_EXTENDED 0x00000001u
449 #define CTL2_SUBSTITUTE_OVERFLOW_LENGTH 0x00000002u
450 #define CTL2_SUBSTITUTE_UNKNOWN_UNSET 0x00000004u
451 #define CTL2_SUBSTITUTE_UNSET_EMPTY 0x00000008u
452
453 #define CTL_NL_SET 0x40000000u /* Informational */
454 #define CTL_BSR_SET 0x80000000u /* Informational */
455
456 /* Combinations */
457
458 #define CTL_DEBUG (CTL_FULLBINCODE|CTL_INFO) /* For setting */
459 #define CTL_ANYINFO (CTL_DEBUG|CTL_BINCODE|CTL_CALLOUT_INFO)
460 #define CTL_ANYGLOB (CTL_ALTGLOBAL|CTL_GLOBAL)
461
462 /* These are all the controls that may be set either on a pattern or on a
463 data line. */
464
465 #define CTL_ALLPD (CTL_AFTERTEXT|\
466 CTL_ALLAFTERTEXT|\
467 CTL_ALLCAPTURES|\
468 CTL_ALLUSEDTEXT|\
469 CTL_ALTGLOBAL|\
470 CTL_GLOBAL|\
471 CTL_MARK|\
472 CTL_MEMORY|\
473 CTL_STARTCHAR|\
474 CTL_UTF8_INPUT)
475
476 #define CTL2_ALLPD (CTL2_SUBSTITUTE_EXTENDED|\
477 CTL2_SUBSTITUTE_OVERFLOW_LENGTH|\
478 CTL2_SUBSTITUTE_UNKNOWN_UNSET|\
479 CTL2_SUBSTITUTE_UNSET_EMPTY)
480
481 /* Structures for holding modifier information for patterns and subject strings
482 (data). Fields containing modifiers that can be set either for a pattern or a
483 subject must be at the start and in the same order in both cases so that the
484 same offset in the big table below works for both. */
485
486 typedef struct patctl { /* Structure for pattern modifiers. */
487 uint32_t options; /* Must be in same position as datctl */
488 uint32_t control; /* Must be in same position as datctl */
489 uint32_t control2; /* Must be in same position as datctl */
490 uint8_t replacement[REPLACE_MODSIZE]; /* So must this */
491 uint32_t jit;
492 uint32_t stackguard_test;
493 uint32_t tables_id;
494 uint32_t regerror_buffsize;
495 uint8_t locale[LOCALESIZE];
496 } patctl;
497
498 #define MAXCPYGET 10
499 #define LENCPYGET 64
500
501 typedef struct datctl { /* Structure for data line modifiers. */
502 uint32_t options; /* Must be in same position as patctl */
503 uint32_t control; /* Must be in same position as patctl */
504 uint32_t control2; /* Must be in same position as patctl */
505 uint8_t replacement[REPLACE_MODSIZE]; /* So must this */
506 uint32_t cerror[2];
507 uint32_t cfail[2];
508 int32_t callout_data;
509 int32_t copy_numbers[MAXCPYGET];
510 int32_t get_numbers[MAXCPYGET];
511 uint32_t jitstack;
512 uint32_t oveccount;
513 uint32_t offset;
514 uint8_t copy_names[LENCPYGET];
515 uint8_t get_names[LENCPYGET];
516 } datctl;
517
518 /* Ids for which context to modify. */
519
520 enum { CTX_PAT, /* Active pattern context */
521 CTX_POPPAT, /* Ditto, for a popped pattern */
522 CTX_DEFPAT, /* Default pattern context */
523 CTX_DAT, /* Active data (match) context */
524 CTX_DEFDAT }; /* Default data (match) context */
525
526 /* Macros to simplify the big table below. */
527
528 #define CO(name) offsetof(PCRE2_REAL_COMPILE_CONTEXT, name)
529 #define MO(name) offsetof(PCRE2_REAL_MATCH_CONTEXT, name)
530 #define PO(name) offsetof(patctl, name)
531 #define PD(name) PO(name)
532 #define DO(name) offsetof(datctl, name)
533
534 /* Table of all long-form modifiers. Must be in collating sequence of modifier
535 name because it is searched by binary chop. */
536
537 typedef struct modstruct {
538 const char *name;
539 uint16_t which;
540 uint16_t type;
541 uint32_t value;
542 PCRE2_SIZE offset;
543 } modstruct;
544
545 static modstruct modlist[] = {
546 { "aftertext", MOD_PNDP, MOD_CTL, CTL_AFTERTEXT, PO(control) },
547 { "allaftertext", MOD_PNDP, MOD_CTL, CTL_ALLAFTERTEXT, PO(control) },
548 { "allcaptures", MOD_PND, MOD_CTL, CTL_ALLCAPTURES, PO(control) },
549 { "allow_empty_class", MOD_PAT, MOD_OPT, PCRE2_ALLOW_EMPTY_CLASS, PO(options) },
550 { "allusedtext", MOD_PNDP, MOD_CTL, CTL_ALLUSEDTEXT, PO(control) },
551 { "alt_bsux", MOD_PAT, MOD_OPT, PCRE2_ALT_BSUX, PO(options) },
552 { "alt_circumflex", MOD_PAT, MOD_OPT, PCRE2_ALT_CIRCUMFLEX, PO(options) },
553 { "alt_verbnames", MOD_PAT, MOD_OPT, PCRE2_ALT_VERBNAMES, PO(options) },
554 { "altglobal", MOD_PND, MOD_CTL, CTL_ALTGLOBAL, PO(control) },
555 { "anchored", MOD_PD, MOD_OPT, PCRE2_ANCHORED, PD(options) },
556 { "auto_callout", MOD_PAT, MOD_OPT, PCRE2_AUTO_CALLOUT, PO(options) },
557 { "bincode", MOD_PAT, MOD_CTL, CTL_BINCODE, PO(control) },
558 { "bsr", MOD_CTC, MOD_BSR, 0, CO(bsr_convention) },
559 { "callout_capture", MOD_DAT, MOD_CTL, CTL_CALLOUT_CAPTURE, DO(control) },
560 { "callout_data", MOD_DAT, MOD_INS, 0, DO(callout_data) },
561 { "callout_error", MOD_DAT, MOD_IN2, 0, DO(cerror) },
562 { "callout_fail", MOD_DAT, MOD_IN2, 0, DO(cfail) },
563 { "callout_info", MOD_PAT, MOD_CTL, CTL_CALLOUT_INFO, PO(control) },
564 { "callout_none", MOD_DAT, MOD_CTL, CTL_CALLOUT_NONE, DO(control) },
565 { "caseless", MOD_PATP, MOD_OPT, PCRE2_CASELESS, PO(options) },
566 { "copy", MOD_DAT, MOD_NN, DO(copy_numbers), DO(copy_names) },
567 { "debug", MOD_PAT, MOD_CTL, CTL_DEBUG, PO(control) },
568 { "depth_limit", MOD_CTM, MOD_INT, 0, MO(depth_limit) },
569 { "dfa", MOD_DAT, MOD_CTL, CTL_DFA, DO(control) },
570 { "dfa_restart", MOD_DAT, MOD_OPT, PCRE2_DFA_RESTART, DO(options) },
571 { "dfa_shortest", MOD_DAT, MOD_OPT, PCRE2_DFA_SHORTEST, DO(options) },
572 { "dollar_endonly", MOD_PAT, MOD_OPT, PCRE2_DOLLAR_ENDONLY, PO(options) },
573 { "dotall", MOD_PATP, MOD_OPT, PCRE2_DOTALL, PO(options) },
574 { "dupnames", MOD_PATP, MOD_OPT, PCRE2_DUPNAMES, PO(options) },
575 { "expand", MOD_PAT, MOD_CTL, CTL_EXPAND, PO(control) },
576 { "extended", MOD_PATP, MOD_OPT, PCRE2_EXTENDED, PO(options) },
577 { "find_limits", MOD_DAT, MOD_CTL, CTL_FINDLIMITS, DO(control) },
578 { "firstline", MOD_PAT, MOD_OPT, PCRE2_FIRSTLINE, PO(options) },
579 { "framesize", MOD_PAT, MOD_CTL, CTL_FRAMESIZE, PO(control) },
580 { "fullbincode", MOD_PAT, MOD_CTL, CTL_FULLBINCODE, PO(control) },
581 { "get", MOD_DAT, MOD_NN, DO(get_numbers), DO(get_names) },
582 { "getall", MOD_DAT, MOD_CTL, CTL_GETALL, DO(control) },
583 { "global", MOD_PNDP, MOD_CTL, CTL_GLOBAL, PO(control) },
584 { "hex", MOD_PAT, MOD_CTL, CTL_HEXPAT, PO(control) },
585 { "info", MOD_PAT, MOD_CTL, CTL_INFO, PO(control) },
586 { "jit", MOD_PAT, MOD_IND, 7, PO(jit) },
587 { "jitfast", MOD_PAT, MOD_CTL, CTL_JITFAST, PO(control) },
588 { "jitstack", MOD_DAT, MOD_INT, 0, DO(jitstack) },
589 { "jitverify", MOD_PAT, MOD_CTL, CTL_JITVERIFY, PO(control) },
590 { "locale", MOD_PAT, MOD_STR, LOCALESIZE, PO(locale) },
591 { "mark", MOD_PNDP, MOD_CTL, CTL_MARK, PO(control) },
592 { "match_limit", MOD_CTM, MOD_INT, 0, MO(match_limit) },
593 { "match_unset_backref", MOD_PAT, MOD_OPT, PCRE2_MATCH_UNSET_BACKREF, PO(options) },
594 { "max_pattern_length", MOD_CTC, MOD_SIZ, 0, CO(max_pattern_length) },
595 { "memory", MOD_PD, MOD_CTL, CTL_MEMORY, PD(control) },
596 { "multiline", MOD_PATP, MOD_OPT, PCRE2_MULTILINE, PO(options) },
597 { "never_backslash_c", MOD_PAT, MOD_OPT, PCRE2_NEVER_BACKSLASH_C, PO(options) },
598 { "never_ucp", MOD_PAT, MOD_OPT, PCRE2_NEVER_UCP, PO(options) },
599 { "never_utf", MOD_PAT, MOD_OPT, PCRE2_NEVER_UTF, PO(options) },
600 { "newline", MOD_CTC, MOD_NL, 0, CO(newline_convention) },
601 { "no_auto_capture", MOD_PAT, MOD_OPT, PCRE2_NO_AUTO_CAPTURE, PO(options) },
602 { "no_auto_possess", MOD_PATP, MOD_OPT, PCRE2_NO_AUTO_POSSESS, PO(options) },
603 { "no_dotstar_anchor", MOD_PAT, MOD_OPT, PCRE2_NO_DOTSTAR_ANCHOR, PO(options) },
604 { "no_jit", MOD_DAT, MOD_OPT, PCRE2_NO_JIT, DO(options) },
605 { "no_start_optimize", MOD_PATP, MOD_OPT, PCRE2_NO_START_OPTIMIZE, PO(options) },
606 { "no_utf_check", MOD_PD, MOD_OPT, PCRE2_NO_UTF_CHECK, PD(options) },
607 { "notbol", MOD_DAT, MOD_OPT, PCRE2_NOTBOL, DO(options) },
608 { "notempty", MOD_DAT, MOD_OPT, PCRE2_NOTEMPTY, DO(options) },
609 { "notempty_atstart", MOD_DAT, MOD_OPT, PCRE2_NOTEMPTY_ATSTART, DO(options) },
610 { "noteol", MOD_DAT, MOD_OPT, PCRE2_NOTEOL, DO(options) },
611 { "null_context", MOD_PD, MOD_CTL, CTL_NULLCONTEXT, PO(control) },
612 { "offset", MOD_DAT, MOD_INT, 0, DO(offset) },
613 { "offset_limit", MOD_CTM, MOD_SIZ, 0, MO(offset_limit)},
614 { "ovector", MOD_DAT, MOD_INT, 0, DO(oveccount) },
615 { "parens_nest_limit", MOD_CTC, MOD_INT, 0, CO(parens_nest_limit) },
616 { "partial_hard", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_HARD, DO(options) },
617 { "partial_soft", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_SOFT, DO(options) },
618 { "ph", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_HARD, DO(options) },
619 { "posix", MOD_PAT, MOD_CTL, CTL_POSIX, PO(control) },
620 { "posix_nosub", MOD_PAT, MOD_CTL, CTL_POSIX|CTL_POSIX_NOSUB, PO(control) },
621 { "ps", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_SOFT, DO(options) },
622 { "push", MOD_PAT, MOD_CTL, CTL_PUSH, PO(control) },
623 { "pushcopy", MOD_PAT, MOD_CTL, CTL_PUSHCOPY, PO(control) },
624 { "pushtablescopy", MOD_PAT, MOD_CTL, CTL_PUSHTABLESCOPY, PO(control) },
625 { "recursion_limit", MOD_CTM, MOD_INT, 0, MO(depth_limit) }, /* Obsolete synonym */
626 { "regerror_buffsize", MOD_PAT, MOD_INT, 0, PO(regerror_buffsize) },
627 { "replace", MOD_PND, MOD_STR, REPLACE_MODSIZE, PO(replacement) },
628 { "stackguard", MOD_PAT, MOD_INT, 0, PO(stackguard_test) },
629 { "startchar", MOD_PND, MOD_CTL, CTL_STARTCHAR, PO(control) },
630 { "startoffset", MOD_DAT, MOD_INT, 0, DO(offset) },
631 { "substitute_extended", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_EXTENDED, PO(control2) },
632 { "substitute_overflow_length", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_OVERFLOW_LENGTH, PO(control2) },
633 { "substitute_unknown_unset", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_UNKNOWN_UNSET, PO(control2) },
634 { "substitute_unset_empty", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_UNSET_EMPTY, PO(control2) },
635 { "tables", MOD_PAT, MOD_INT, 0, PO(tables_id) },
636 { "ucp", MOD_PATP, MOD_OPT, PCRE2_UCP, PO(options) },
637 { "ungreedy", MOD_PAT, MOD_OPT, PCRE2_UNGREEDY, PO(options) },
638 { "use_length", MOD_PAT, MOD_CTL, CTL_USE_LENGTH, PO(control) },
639 { "use_offset_limit", MOD_PAT, MOD_OPT, PCRE2_USE_OFFSET_LIMIT, PO(options) },
640 { "utf", MOD_PATP, MOD_OPT, PCRE2_UTF, PO(options) },
641 { "utf8_input", MOD_PAT, MOD_CTL, CTL_UTF8_INPUT, PO(control) },
642 { "zero_terminate", MOD_DAT, MOD_CTL, CTL_ZERO_TERMINATE, DO(control) }
643 };
644
645 #define MODLISTCOUNT sizeof(modlist)/sizeof(modstruct)
646
647 /* Controls and options that are supported for use with the POSIX interface. */
648
649 #define POSIX_SUPPORTED_COMPILE_OPTIONS ( \
650 PCRE2_CASELESS|PCRE2_DOTALL|PCRE2_MULTILINE|PCRE2_UCP|PCRE2_UTF| \
651 PCRE2_UNGREEDY)
652
653 #define POSIX_SUPPORTED_COMPILE_CONTROLS ( \
654 CTL_AFTERTEXT|CTL_ALLAFTERTEXT|CTL_EXPAND|CTL_POSIX|CTL_POSIX_NOSUB)
655
656 #define POSIX_SUPPORTED_COMPILE_CONTROLS2 (0)
657
658 #define POSIX_SUPPORTED_MATCH_OPTIONS ( \
659 PCRE2_NOTBOL|PCRE2_NOTEMPTY|PCRE2_NOTEOL)
660
661 #define POSIX_SUPPORTED_MATCH_CONTROLS (CTL_AFTERTEXT|CTL_ALLAFTERTEXT)
662 #define POSIX_SUPPORTED_MATCH_CONTROLS2 (0)
663
664 /* Control bits that are not ignored with 'push'. */
665
666 #define PUSH_SUPPORTED_COMPILE_CONTROLS ( \
667 CTL_BINCODE|CTL_CALLOUT_INFO|CTL_FULLBINCODE|CTL_HEXPAT|CTL_INFO| \
668 CTL_JITVERIFY|CTL_MEMORY|CTL_FRAMESIZE|CTL_PUSH|CTL_PUSHCOPY| \
669 CTL_PUSHTABLESCOPY|CTL_USE_LENGTH)
670
671 #define PUSH_SUPPORTED_COMPILE_CONTROLS2 (CTL_BSR_SET|CTL_NL_SET)
672
673 /* Controls that apply only at compile time with 'push'. */
674
675 #define PUSH_COMPILE_ONLY_CONTROLS CTL_JITVERIFY
676 #define PUSH_COMPILE_ONLY_CONTROLS2 (0)
677
678 /* Controls that are forbidden with #pop or #popcopy. */
679
680 #define NOTPOP_CONTROLS (CTL_HEXPAT|CTL_POSIX|CTL_POSIX_NOSUB|CTL_PUSH| \
681 CTL_PUSHCOPY|CTL_PUSHTABLESCOPY|CTL_USE_LENGTH)
682
683 /* Pattern controls that are mutually exclusive. At present these are all in
684 the first control word. Note that CTL_POSIX_NOSUB is always accompanied by
685 CTL_POSIX, so it doesn't need its own entries. */
686
687 static uint32_t exclusive_pat_controls[] = {
688 CTL_POSIX | CTL_HEXPAT,
689 CTL_POSIX | CTL_PUSH,
690 CTL_POSIX | CTL_PUSHCOPY,
691 CTL_POSIX | CTL_PUSHTABLESCOPY,
692 CTL_POSIX | CTL_USE_LENGTH,
693 CTL_EXPAND | CTL_HEXPAT };
694
695 /* Data controls that are mutually exclusive. At present these are all in the
696 first control word. */
697
698 static uint32_t exclusive_dat_controls[] = {
699 CTL_ALLUSEDTEXT | CTL_STARTCHAR,
700 CTL_FINDLIMITS | CTL_NULLCONTEXT };
701
702 /* Table of single-character abbreviated modifiers. The index field is
703 initialized to -1, but the first time the modifier is encountered, it is filled
704 in with the index of the full entry in modlist, to save repeated searching when
705 processing multiple test items. This short list is searched serially, so its
706 order does not matter. */
707
708 typedef struct c1modstruct {
709 const char *fullname;
710 uint32_t onechar;
711 int index;
712 } c1modstruct;
713
714 static c1modstruct c1modlist[] = {
715 { "bincode", 'B', -1 },
716 { "info", 'I', -1 },
717 { "global", 'g', -1 },
718 { "caseless", 'i', -1 },
719 { "multiline", 'm', -1 },
720 { "dotall", 's', -1 },
721 { "extended", 'x', -1 }
722 };
723
724 #define C1MODLISTCOUNT sizeof(c1modlist)/sizeof(c1modstruct)
725
726 /* Table of arguments for the -C command line option. Use macros to make the
727 table itself easier to read. */
728
729 #if defined SUPPORT_PCRE2_8
730 #define SUPPORT_8 1
731 #endif
732 #if defined SUPPORT_PCRE2_16
733 #define SUPPORT_16 1
734 #endif
735 #if defined SUPPORT_PCRE2_32
736 #define SUPPORT_32 1
737 #endif
738
739 #ifndef SUPPORT_8
740 #define SUPPORT_8 0
741 #endif
742 #ifndef SUPPORT_16
743 #define SUPPORT_16 0
744 #endif
745 #ifndef SUPPORT_32
746 #define SUPPORT_32 0
747 #endif
748
749 #ifdef EBCDIC
750 #define SUPPORT_EBCDIC 1
751 #define EBCDIC_NL CHAR_LF
752 #else
753 #define SUPPORT_EBCDIC 0
754 #define EBCDIC_NL 0
755 #endif
756
757 #ifdef NEVER_BACKSLASH_C
758 #define BACKSLASH_C 0
759 #else
760 #define BACKSLASH_C 1
761 #endif
762
763 typedef struct coptstruct {
764 const char *name;
765 uint32_t type;
766 uint32_t value;
767 } coptstruct;
768
769 enum { CONF_BSR,
770 CONF_FIX,
771 CONF_FIZ,
772 CONF_INT,
773 CONF_NL
774 };
775
776 static coptstruct coptlist[] = {
777 { "backslash-C", CONF_FIX, BACKSLASH_C },
778 { "bsr", CONF_BSR, PCRE2_CONFIG_BSR },
779 { "ebcdic", CONF_FIX, SUPPORT_EBCDIC },
780 { "ebcdic-nl", CONF_FIZ, EBCDIC_NL },
781 { "jit", CONF_INT, PCRE2_CONFIG_JIT },
782 { "linksize", CONF_INT, PCRE2_CONFIG_LINKSIZE },
783 { "newline", CONF_NL, PCRE2_CONFIG_NEWLINE },
784 { "pcre2-16", CONF_FIX, SUPPORT_16 },
785 { "pcre2-32", CONF_FIX, SUPPORT_32 },
786 { "pcre2-8", CONF_FIX, SUPPORT_8 },
787 { "unicode", CONF_INT, PCRE2_CONFIG_UNICODE }
788 };
789
790 #define COPTLISTCOUNT sizeof(coptlist)/sizeof(coptstruct)
791
792 #undef SUPPORT_8
793 #undef SUPPORT_16
794 #undef SUPPORT_32
795 #undef SUPPORT_EBCDIC
796
797
798 /* ----------------------- Static variables ------------------------ */
799
800 static FILE *infile;
801 static FILE *outfile;
802
803 static const void *last_callout_mark;
804 static PCRE2_JIT_STACK *jit_stack = NULL;
805 static size_t jit_stack_size = 0;
806
807 static BOOL first_callout;
808 static BOOL jit_was_used;
809 static BOOL restrict_for_perl_test = FALSE;
810 static BOOL show_memory = FALSE;
811
812 static int code_unit_size; /* Bytes */
813 static int jitrc; /* Return from JIT compile */
814 static int test_mode = DEFAULT_TEST_MODE;
815 static int timeit = 0;
816 static int timeitm = 0;
817
818 clock_t total_compile_time = 0;
819 clock_t total_jit_compile_time = 0;
820 clock_t total_match_time = 0;
821
822 static uint32_t dfa_matched;
823 static uint32_t forbid_utf = 0;
824 static uint32_t maxlookbehind;
825 static uint32_t max_oveccount;
826 static uint32_t callout_count;
827
828 static uint16_t local_newline_default = 0;
829
830 static VERSION_TYPE jittarget[VERSION_SIZE];
831 static VERSION_TYPE version[VERSION_SIZE];
832 static VERSION_TYPE uversion[VERSION_SIZE];
833
834 static patctl def_patctl;
835 static patctl pat_patctl;
836 static datctl def_datctl;
837 static datctl dat_datctl;
838
839 static void *patstack[PATSTACKSIZE];
840 static int patstacknext = 0;
841
842 #ifdef SUPPORT_PCRE2_8
843 static regex_t preg = { NULL, NULL, 0, 0, 0 };
844 #endif
845
846 static int *dfa_workspace = NULL;
847 static const uint8_t *locale_tables = NULL;
848 static const uint8_t *use_tables = NULL;
849 static uint8_t locale_name[32];
850
851 /* We need buffers for building 16/32-bit strings; 8-bit strings don't need
852 rebuilding, but set up the same naming scheme for use in macros. The "buffer"
853 buffer is where all input lines are read. Its size is the same as pbuffer8.
854 Pattern lines are always copied to pbuffer8 for use in callouts, even if they
855 are actually compiled from pbuffer16 or pbuffer32. */
856
857 static size_t pbuffer8_size = 50000; /* Initial size, bytes */
858 static uint8_t *pbuffer8 = NULL;
859 static uint8_t *buffer = NULL;
860
861 /* The dbuffer is where all processed data lines are put. In non-8-bit modes it
862 is cast as needed. For long data lines it grows as necessary. */
863
864 static size_t dbuffer_size = 1u << 14; /* Initial size, bytes */
865 static uint8_t *dbuffer = NULL;
866
867
868 /* ---------------- Mode-dependent variables -------------------*/
869
870 #ifdef SUPPORT_PCRE2_8
871 static pcre2_code_8 *compiled_code8;
872 static pcre2_general_context_8 *general_context8, *general_context_copy8;
873 static pcre2_compile_context_8 *pat_context8, *default_pat_context8;
874 static pcre2_match_context_8 *dat_context8, *default_dat_context8;
875 static pcre2_match_data_8 *match_data8;
876 #endif
877
878 #ifdef SUPPORT_PCRE2_16
879 static pcre2_code_16 *compiled_code16;
880 static pcre2_general_context_16 *general_context16, *general_context_copy16;
881 static pcre2_compile_context_16 *pat_context16, *default_pat_context16;
882 static pcre2_match_context_16 *dat_context16, *default_dat_context16;
883 static pcre2_match_data_16 *match_data16;
884 static PCRE2_SIZE pbuffer16_size = 0; /* Set only when needed */
885 static uint16_t *pbuffer16 = NULL;
886 #endif
887
888 #ifdef SUPPORT_PCRE2_32
889 static pcre2_code_32 *compiled_code32;
890 static pcre2_general_context_32 *general_context32, *general_context_copy32;
891 static pcre2_compile_context_32 *pat_context32, *default_pat_context32;
892 static pcre2_match_context_32 *dat_context32, *default_dat_context32;
893 static pcre2_match_data_32 *match_data32;
894 static PCRE2_SIZE pbuffer32_size = 0; /* Set only when needed */
895 static uint32_t *pbuffer32 = NULL;
896 #endif
897
898
899 /* ---------------- Macros that work in all modes ----------------- */
900
901 #define CAST8VAR(x) CASTVAR(uint8_t *, x)
902 #define SET(x,y) SETOP(x,y,=)
903 #define SETPLUS(x,y) SETOP(x,y,+=)
904 #define strlen8(x) strlen((char *)x)
905
906
907 /* ---------------- Mode-dependent, runtime-testing macros ------------------*/
908
909 /* Define macros for variables and functions that must be selected dynamically
910 depending on the mode setting (8, 16, 32). These are dependent on which modes
911 are supported. */
912
913 #if (defined (SUPPORT_PCRE2_8) + defined (SUPPORT_PCRE2_16) + \
914 defined (SUPPORT_PCRE2_32)) >= 2
915
916 /* ----- All three modes supported ----- */
917
918 #if defined(SUPPORT_PCRE2_8) && defined(SUPPORT_PCRE2_16) && defined(SUPPORT_PCRE2_32)
919
920 #define CASTFLD(t,a,b) ((test_mode == PCRE8_MODE)? (t)(G(a,8)->b) : \
921 (test_mode == PCRE16_MODE)? (t)(G(a,16)->b) : (t)(G(a,32)->b))
922
923 #define CASTVAR(t,x) ( \
924 (test_mode == PCRE8_MODE)? (t)G(x,8) : \
925 (test_mode == PCRE16_MODE)? (t)G(x,16) : (t)G(x,32))
926
927 #define CODE_UNIT(a,b) ( \
928 (test_mode == PCRE8_MODE)? (uint32_t)(((PCRE2_SPTR8)(a))[b]) : \
929 (test_mode == PCRE16_MODE)? (uint32_t)(((PCRE2_SPTR16)(a))[b]) : \
930 (uint32_t)(((PCRE2_SPTR32)(a))[b]))
931
932 #define DATCTXCPY(a,b) \
933 if (test_mode == PCRE8_MODE) \
934 memcpy(G(a,8),G(b,8),sizeof(pcre2_match_context_8)); \
935 else if (test_mode == PCRE16_MODE) \
936 memcpy(G(a,16),G(b,16),sizeof(pcre2_match_context_16)); \
937 else memcpy(G(a,32),G(b,32),sizeof(pcre2_match_context_32))
938
939 #define FLD(a,b) ((test_mode == PCRE8_MODE)? G(a,8)->b : \
940 (test_mode == PCRE16_MODE)? G(a,16)->b : G(a,32)->b)
941
942 #define PATCTXCPY(a,b) \
943 if (test_mode == PCRE8_MODE) \
944 memcpy(G(a,8),G(b,8),sizeof(pcre2_compile_context_8)); \
945 else if (test_mode == PCRE16_MODE) \
946 memcpy(G(a,16),G(b,16),sizeof(pcre2_compile_context_16)); \
947 else memcpy(G(a,32),G(b,32),sizeof(pcre2_compile_context_32))
948
949 #define PCHARS(lv, p, offset, len, utf, f) \
950 if (test_mode == PCRE32_MODE) \
951 lv = pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f); \
952 else if (test_mode == PCRE16_MODE) \
953 lv = pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f); \
954 else \
955 lv = pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
956
957 #define PCHARSV(p, offset, len, utf, f) \
958 if (test_mode == PCRE32_MODE) \
959 (void)pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f); \
960 else if (test_mode == PCRE16_MODE) \
961 (void)pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f); \
962 else \
963 (void)pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
964
965 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
966 if (test_mode == PCRE8_MODE) \
967 a = pcre2_callout_enumerate_8(compiled_code8, \
968 (int (*)(struct pcre2_callout_enumerate_block_8 *, void *))b,c); \
969 else if (test_mode == PCRE16_MODE) \
970 a = pcre2_callout_enumerate_16(compiled_code16, \
971 (int(*)(struct pcre2_callout_enumerate_block_16 *, void *))b,c); \
972 else \
973 a = pcre2_callout_enumerate_32(compiled_code32, \
974 (int (*)(struct pcre2_callout_enumerate_block_32 *, void *))b,c)
975
976 #define PCRE2_CODE_COPY_FROM_VOID(a,b) \
977 if (test_mode == PCRE8_MODE) \
978 G(a,8) = pcre2_code_copy_8(b); \
979 else if (test_mode == PCRE16_MODE) \
980 G(a,16) = pcre2_code_copy_16(b); \
981 else \
982 G(a,32) = pcre2_code_copy_32(b)
983
984 #define PCRE2_CODE_COPY_TO_VOID(a,b) \
985 if (test_mode == PCRE8_MODE) \
986 a = (void *)pcre2_code_copy_8(G(b,8)); \
987 else if (test_mode == PCRE16_MODE) \
988 a = (void *)pcre2_code_copy_16(G(b,16)); \
989 else \
990 a = (void *)pcre2_code_copy_32(G(b,32))
991
992 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) \
993 if (test_mode == PCRE8_MODE) \
994 a = (void *)pcre2_code_copy_with_tables_8(G(b,8)); \
995 else if (test_mode == PCRE16_MODE) \
996 a = (void *)pcre2_code_copy_with_tables_16(G(b,16)); \
997 else \
998 a = (void *)pcre2_code_copy_with_tables_32(G(b,32))
999
1000 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
1001 if (test_mode == PCRE8_MODE) \
1002 G(a,8) = pcre2_compile_8(G(b,8),c,d,e,f,g); \
1003 else if (test_mode == PCRE16_MODE) \
1004 G(a,16) = pcre2_compile_16(G(b,16),c,d,e,f,g); \
1005 else \
1006 G(a,32) = pcre2_compile_32(G(b,32),c,d,e,f,g)
1007
1008 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
1009 if (test_mode == PCRE8_MODE) \
1010 a = pcre2_dfa_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h,i,j); \
1011 else if (test_mode == PCRE16_MODE) \
1012 a = pcre2_dfa_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h,i,j); \
1013 else \
1014 a = pcre2_dfa_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h,i,j)
1015
1016 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
1017 if (test_mode == PCRE8_MODE) \
1018 r = pcre2_get_error_message_8(a,G(b,8),G(G(b,8),_size)); \
1019 else if (test_mode == PCRE16_MODE) \
1020 r = pcre2_get_error_message_16(a,G(b,16),G(G(b,16),_size/2)); \
1021 else \
1022 r = pcre2_get_error_message_32(a,G(b,32),G(G(b,32),_size/4))
1023
1024 #define PCRE2_GET_OVECTOR_COUNT(a,b) \
1025 if (test_mode == PCRE8_MODE) \
1026 a = pcre2_get_ovector_count_8(G(b,8)); \
1027 else if (test_mode == PCRE16_MODE) \
1028 a = pcre2_get_ovector_count_16(G(b,16)); \
1029 else \
1030 a = pcre2_get_ovector_count_32(G(b,32))
1031
1032 #define PCRE2_GET_STARTCHAR(a,b) \
1033 if (test_mode == PCRE8_MODE) \
1034 a = pcre2_get_startchar_8(G(b,8)); \
1035 else if (test_mode == PCRE16_MODE) \
1036 a = pcre2_get_startchar_16(G(b,16)); \
1037 else \
1038 a = pcre2_get_startchar_32(G(b,32))
1039
1040 #define PCRE2_JIT_COMPILE(r,a,b) \
1041 if (test_mode == PCRE8_MODE) r = pcre2_jit_compile_8(G(a,8),b); \
1042 else if (test_mode == PCRE16_MODE) r = pcre2_jit_compile_16(G(a,16),b); \
1043 else r = pcre2_jit_compile_32(G(a,32),b)
1044
1045 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) \
1046 if (test_mode == PCRE8_MODE) pcre2_jit_free_unused_memory_8(G(a,8)); \
1047 else if (test_mode == PCRE16_MODE) pcre2_jit_free_unused_memory_16(G(a,16)); \
1048 else pcre2_jit_free_unused_memory_32(G(a,32))
1049
1050 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
1051 if (test_mode == PCRE8_MODE) \
1052 a = pcre2_jit_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h); \
1053 else if (test_mode == PCRE16_MODE) \
1054 a = pcre2_jit_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h); \
1055 else \
1056 a = pcre2_jit_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
1057
1058 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
1059 if (test_mode == PCRE8_MODE) \
1060 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_8(b,c,d); \
1061 else if (test_mode == PCRE16_MODE) \
1062 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_16(b,c,d); \
1063 else \
1064 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_32(b,c,d);
1065
1066 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
1067 if (test_mode == PCRE8_MODE) \
1068 pcre2_jit_stack_assign_8(G(a,8),(pcre2_jit_callback_8)b,c); \
1069 else if (test_mode == PCRE16_MODE) \
1070 pcre2_jit_stack_assign_16(G(a,16),(pcre2_jit_callback_16)b,c); \
1071 else \
1072 pcre2_jit_stack_assign_32(G(a,32),(pcre2_jit_callback_32)b,c);
1073
1074 #define PCRE2_JIT_STACK_FREE(a) \
1075 if (test_mode == PCRE8_MODE) \
1076 pcre2_jit_stack_free_8((pcre2_jit_stack_8 *)a); \
1077 else if (test_mode == PCRE16_MODE) \
1078 pcre2_jit_stack_free_16((pcre2_jit_stack_16 *)a); \
1079 else \
1080 pcre2_jit_stack_free_32((pcre2_jit_stack_32 *)a);
1081
1082 #define PCRE2_MAKETABLES(a) \
1083 if (test_mode == PCRE8_MODE) a = pcre2_maketables_8(NULL); \
1084 else if (test_mode == PCRE16_MODE) a = pcre2_maketables_16(NULL); \
1085 else a = pcre2_maketables_32(NULL)
1086
1087 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
1088 if (test_mode == PCRE8_MODE) \
1089 a = pcre2_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h); \
1090 else if (test_mode == PCRE16_MODE) \
1091 a = pcre2_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h); \
1092 else \
1093 a = pcre2_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
1094
1095 #define PCRE2_MATCH_DATA_CREATE(a,b,c) \
1096 if (test_mode == PCRE8_MODE) \
1097 G(a,8) = pcre2_match_data_create_8(b,c); \
1098 else if (test_mode == PCRE16_MODE) \
1099 G(a,16) = pcre2_match_data_create_16(b,c); \
1100 else \
1101 G(a,32) = pcre2_match_data_create_32(b,c)
1102
1103 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
1104 if (test_mode == PCRE8_MODE) \
1105 G(a,8) = pcre2_match_data_create_from_pattern_8(G(b,8),c); \
1106 else if (test_mode == PCRE16_MODE) \
1107 G(a,16) = pcre2_match_data_create_from_pattern_16(G(b,16),c); \
1108 else \
1109 G(a,32) = pcre2_match_data_create_from_pattern_32(G(b,32),c)
1110
1111 #define PCRE2_MATCH_DATA_FREE(a) \
1112 if (test_mode == PCRE8_MODE) \
1113 pcre2_match_data_free_8(G(a,8)); \
1114 else if (test_mode == PCRE16_MODE) \
1115 pcre2_match_data_free_16(G(a,16)); \
1116 else \
1117 pcre2_match_data_free_32(G(a,32))
1118
1119 #define PCRE2_PATTERN_INFO(a,b,c,d) \
1120 if (test_mode == PCRE8_MODE) \
1121 a = pcre2_pattern_info_8(G(b,8),c,d); \
1122 else if (test_mode == PCRE16_MODE) \
1123 a = pcre2_pattern_info_16(G(b,16),c,d); \
1124 else \
1125 a = pcre2_pattern_info_32(G(b,32),c,d)
1126
1127 #define PCRE2_PRINTINT(a) \
1128 if (test_mode == PCRE8_MODE) \
1129 pcre2_printint_8(compiled_code8,outfile,a); \
1130 else if (test_mode == PCRE16_MODE) \
1131 pcre2_printint_16(compiled_code16,outfile,a); \
1132 else \
1133 pcre2_printint_32(compiled_code32,outfile,a)
1134
1135 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
1136 if (test_mode == PCRE8_MODE) \
1137 r = pcre2_serialize_decode_8((pcre2_code_8 **)a,b,c,G(d,8)); \
1138 else if (test_mode == PCRE16_MODE) \
1139 r = pcre2_serialize_decode_16((pcre2_code_16 **)a,b,c,G(d,16)); \
1140 else \
1141 r = pcre2_serialize_decode_32((pcre2_code_32 **)a,b,c,G(d,32))
1142
1143 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
1144 if (test_mode == PCRE8_MODE) \
1145 r = pcre2_serialize_encode_8((const pcre2_code_8 **)a,b,c,d,G(e,8)); \
1146 else if (test_mode == PCRE16_MODE) \
1147 r = pcre2_serialize_encode_16((const pcre2_code_16 **)a,b,c,d,G(e,16)); \
1148 else \
1149 r = pcre2_serialize_encode_32((const pcre2_code_32 **)a,b,c,d,G(e,32))
1150
1151 #define PCRE2_SERIALIZE_FREE(a) \
1152 if (test_mode == PCRE8_MODE) \
1153 pcre2_serialize_free_8(a); \
1154 else if (test_mode == PCRE16_MODE) \
1155 pcre2_serialize_free_16(a); \
1156 else \
1157 pcre2_serialize_free_32(a)
1158
1159 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
1160 if (test_mode == PCRE8_MODE) \
1161 r = pcre2_serialize_get_number_of_codes_8(a); \
1162 else if (test_mode == PCRE16_MODE) \
1163 r = pcre2_serialize_get_number_of_codes_16(a); \
1164 else \
1165 r = pcre2_serialize_get_number_of_codes_32(a); \
1166
1167 #define PCRE2_SET_CALLOUT(a,b,c) \
1168 if (test_mode == PCRE8_MODE) \
1169 pcre2_set_callout_8(G(a,8),(int (*)(pcre2_callout_block_8 *, void *))b,c); \
1170 else if (test_mode == PCRE16_MODE) \
1171 pcre2_set_callout_16(G(a,16),(int (*)(pcre2_callout_block_16 *, void *))b,c); \
1172 else \
1173 pcre2_set_callout_32(G(a,32),(int (*)(pcre2_callout_block_32 *, void *))b,c);
1174
1175 #define PCRE2_SET_CHARACTER_TABLES(a,b) \
1176 if (test_mode == PCRE8_MODE) \
1177 pcre2_set_character_tables_8(G(a,8),b); \
1178 else if (test_mode == PCRE16_MODE) \
1179 pcre2_set_character_tables_16(G(a,16),b); \
1180 else \
1181 pcre2_set_character_tables_32(G(a,32),b)
1182
1183 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
1184 if (test_mode == PCRE8_MODE) \
1185 pcre2_set_compile_recursion_guard_8(G(a,8),b,c); \
1186 else if (test_mode == PCRE16_MODE) \
1187 pcre2_set_compile_recursion_guard_16(G(a,16),b,c); \
1188 else \
1189 pcre2_set_compile_recursion_guard_32(G(a,32),b,c)
1190
1191 #define PCRE2_SET_DEPTH_LIMIT(a,b) \
1192 if (test_mode == PCRE8_MODE) \
1193 pcre2_set_depth_limit_8(G(a,8),b); \
1194 else if (test_mode == PCRE16_MODE) \
1195 pcre2_set_depth_limit_16(G(a,16),b); \
1196 else \
1197 pcre2_set_depth_limit_32(G(a,32),b)
1198
1199 #define PCRE2_SET_MATCH_LIMIT(a,b) \
1200 if (test_mode == PCRE8_MODE) \
1201 pcre2_set_match_limit_8(G(a,8),b); \
1202 else if (test_mode == PCRE16_MODE) \
1203 pcre2_set_match_limit_16(G(a,16),b); \
1204 else \
1205 pcre2_set_match_limit_32(G(a,32),b)
1206
1207 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) \
1208 if (test_mode == PCRE8_MODE) \
1209 pcre2_set_max_pattern_length_8(G(a,8),b); \
1210 else if (test_mode == PCRE16_MODE) \
1211 pcre2_set_max_pattern_length_16(G(a,16),b); \
1212 else \
1213 pcre2_set_max_pattern_length_32(G(a,32),b)
1214
1215 #define PCRE2_SET_OFFSET_LIMIT(a,b) \
1216 if (test_mode == PCRE8_MODE) \
1217 pcre2_set_offset_limit_8(G(a,8),b); \
1218 else if (test_mode == PCRE16_MODE) \
1219 pcre2_set_offset_limit_16(G(a,16),b); \
1220 else \
1221 pcre2_set_offset_limit_32(G(a,32),b)
1222
1223 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) \
1224 if (test_mode == PCRE8_MODE) \
1225 pcre2_set_parens_nest_limit_8(G(a,8),b); \
1226 else if (test_mode == PCRE16_MODE) \
1227 pcre2_set_parens_nest_limit_16(G(a,16),b); \
1228 else \
1229 pcre2_set_parens_nest_limit_32(G(a,32),b)
1230
1231 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
1232 if (test_mode == PCRE8_MODE) \
1233 a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),G(h,8), \
1234 (PCRE2_SPTR8)i,j,(PCRE2_UCHAR8 *)k,l); \
1235 else if (test_mode == PCRE16_MODE) \
1236 a = pcre2_substitute_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),G(h,16), \
1237 (PCRE2_SPTR16)i,j,(PCRE2_UCHAR16 *)k,l); \
1238 else \
1239 a = pcre2_substitute_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),G(h,32), \
1240 (PCRE2_SPTR32)i,j,(PCRE2_UCHAR32 *)k,l)
1241
1242 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
1243 if (test_mode == PCRE8_MODE) \
1244 a = pcre2_substring_copy_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 *)d,e); \
1245 else if (test_mode == PCRE16_MODE) \
1246 a = pcre2_substring_copy_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 *)d,e); \
1247 else \
1248 a = pcre2_substring_copy_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 *)d,e)
1249
1250 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
1251 if (test_mode == PCRE8_MODE) \
1252 a = pcre2_substring_copy_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 *)d,e); \
1253 else if (test_mode == PCRE16_MODE) \
1254 a = pcre2_substring_copy_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 *)d,e); \
1255 else \
1256 a = pcre2_substring_copy_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 *)d,e)
1257
1258 #define PCRE2_SUBSTRING_FREE(a) \
1259 if (test_mode == PCRE8_MODE) pcre2_substring_free_8((PCRE2_UCHAR8 *)a); \
1260 else if (test_mode == PCRE16_MODE) \
1261 pcre2_substring_free_16((PCRE2_UCHAR16 *)a); \
1262 else pcre2_substring_free_32((PCRE2_UCHAR32 *)a)
1263
1264 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
1265 if (test_mode == PCRE8_MODE) \
1266 a = pcre2_substring_get_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 **)d,e); \
1267 else if (test_mode == PCRE16_MODE) \
1268 a = pcre2_substring_get_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 **)d,e); \
1269 else \
1270 a = pcre2_substring_get_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 **)d,e)
1271
1272 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
1273 if (test_mode == PCRE8_MODE) \
1274 a = pcre2_substring_get_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 **)d,e); \
1275 else if (test_mode == PCRE16_MODE) \
1276 a = pcre2_substring_get_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 **)d,e); \
1277 else \
1278 a = pcre2_substring_get_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 **)d,e)
1279
1280 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
1281 if (test_mode == PCRE8_MODE) \
1282 a = pcre2_substring_length_byname_8(G(b,8),G(c,8),d); \
1283 else if (test_mode == PCRE16_MODE) \
1284 a = pcre2_substring_length_byname_16(G(b,16),G(c,16),d); \
1285 else \
1286 a = pcre2_substring_length_byname_32(G(b,32),G(c,32),d)
1287
1288 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
1289 if (test_mode == PCRE8_MODE) \
1290 a = pcre2_substring_length_bynumber_8(G(b,8),c,d); \
1291 else if (test_mode == PCRE16_MODE) \
1292 a = pcre2_substring_length_bynumber_16(G(b,16),c,d); \
1293 else \
1294 a = pcre2_substring_length_bynumber_32(G(b,32),c,d)
1295
1296 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
1297 if (test_mode == PCRE8_MODE) \
1298 a = pcre2_substring_list_get_8(G(b,8),(PCRE2_UCHAR8 ***)c,d); \
1299 else if (test_mode == PCRE16_MODE) \
1300 a = pcre2_substring_list_get_16(G(b,16),(PCRE2_UCHAR16 ***)c,d); \
1301 else \
1302 a = pcre2_substring_list_get_32(G(b,32),(PCRE2_UCHAR32 ***)c,d)
1303
1304 #define PCRE2_SUBSTRING_LIST_FREE(a) \
1305 if (test_mode == PCRE8_MODE) \
1306 pcre2_substring_list_free_8((PCRE2_SPTR8 *)a); \
1307 else if (test_mode == PCRE16_MODE) \
1308 pcre2_substring_list_free_16((PCRE2_SPTR16 *)a); \
1309 else \
1310 pcre2_substring_list_free_32((PCRE2_SPTR32 *)a)
1311
1312 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
1313 if (test_mode == PCRE8_MODE) \
1314 a = pcre2_substring_number_from_name_8(G(b,8),G(c,8)); \
1315 else if (test_mode == PCRE16_MODE) \
1316 a = pcre2_substring_number_from_name_16(G(b,16),G(c,16)); \
1317 else \
1318 a = pcre2_substring_number_from_name_32(G(b,32),G(c,32))
1319
1320 #define PTR(x) ( \
1321 (test_mode == PCRE8_MODE)? (void *)G(x,8) : \
1322 (test_mode == PCRE16_MODE)? (void *)G(x,16) : \
1323 (void *)G(x,32))
1324
1325 #define SETFLD(x,y,z) \
1326 if (test_mode == PCRE8_MODE) G(x,8)->y = z; \
1327 else if (test_mode == PCRE16_MODE) G(x,16)->y = z; \
1328 else G(x,32)->y = z
1329
1330 #define SETFLDVEC(x,y,v,z) \
1331 if (test_mode == PCRE8_MODE) G(x,8)->y[v] = z; \
1332 else if (test_mode == PCRE16_MODE) G(x,16)->y[v] = z; \
1333 else G(x,32)->y[v] = z
1334
1335 #define SETOP(x,y,z) \
1336 if (test_mode == PCRE8_MODE) G(x,8) z y; \
1337 else if (test_mode == PCRE16_MODE) G(x,16) z y; \
1338 else G(x,32) z y
1339
1340 #define SETCASTPTR(x,y) \
1341 if (test_mode == PCRE8_MODE) \
1342 G(x,8) = (uint8_t *)(y); \
1343 else if (test_mode == PCRE16_MODE) \
1344 G(x,16) = (uint16_t *)(y); \
1345 else \
1346 G(x,32) = (uint32_t *)(y)
1347
1348 #define STRLEN(p) ((test_mode == PCRE8_MODE)? ((int)strlen((char *)p)) : \
1349 (test_mode == PCRE16_MODE)? ((int)strlen16((PCRE2_SPTR16)p)) : \
1350 ((int)strlen32((PCRE2_SPTR32)p)))
1351
1352 #define SUB1(a,b) \
1353 if (test_mode == PCRE8_MODE) G(a,8)(G(b,8)); \
1354 else if (test_mode == PCRE16_MODE) G(a,16)(G(b,16)); \
1355 else G(a,32)(G(b,32))
1356
1357 #define SUB2(a,b,c) \
1358 if (test_mode == PCRE8_MODE) G(a,8)(G(b,8),G(c,8)); \
1359 else if (test_mode == PCRE16_MODE) G(a,16)(G(b,16),G(c,16)); \
1360 else G(a,32)(G(b,32),G(c,32))
1361
1362 #define TEST(x,r,y) ( \
1363 (test_mode == PCRE8_MODE && G(x,8) r (y)) || \
1364 (test_mode == PCRE16_MODE && G(x,16) r (y)) || \
1365 (test_mode == PCRE32_MODE && G(x,32) r (y)))
1366
1367 #define TESTFLD(x,f,r,y) ( \
1368 (test_mode == PCRE8_MODE && G(x,8)->f r (y)) || \
1369 (test_mode == PCRE16_MODE && G(x,16)->f r (y)) || \
1370 (test_mode == PCRE32_MODE && G(x,32)->f r (y)))
1371
1372
1373 /* ----- Two out of three modes are supported ----- */
1374
1375 #else
1376
1377 /* We can use some macro trickery to make a single set of definitions work in
1378 the three different cases. */
1379
1380 /* ----- 32-bit and 16-bit but not 8-bit supported ----- */
1381
1382 #if defined(SUPPORT_PCRE2_32) && defined(SUPPORT_PCRE2_16)
1383 #define BITONE 32
1384 #define BITTWO 16
1385
1386 /* ----- 32-bit and 8-bit but not 16-bit supported ----- */
1387
1388 #elif defined(SUPPORT_PCRE2_32) && defined(SUPPORT_PCRE2_8)
1389 #define BITONE 32
1390 #define BITTWO 8
1391
1392 /* ----- 16-bit and 8-bit but not 32-bit supported ----- */
1393
1394 #else
1395 #define BITONE 16
1396 #define BITTWO 8
1397 #endif
1398
1399
1400 /* ----- Common macros for two-mode cases ----- */
1401
1402 #define BYTEONE (BITONE/8)
1403 #define BYTETWO (BITTWO/8)
1404
1405 #define CASTFLD(t,a,b) \
1406 ((test_mode == G(G(PCRE,BITONE),_MODE))? (t)(G(a,BITONE)->b) : \
1407 (t)(G(a,BITTWO)->b))
1408
1409 #define CASTVAR(t,x) ( \
1410 (test_mode == G(G(PCRE,BITONE),_MODE))? \
1411 (t)G(x,BITONE) : (t)G(x,BITTWO))
1412
1413 #define CODE_UNIT(a,b) ( \
1414 (test_mode == G(G(PCRE,BITONE),_MODE))? \
1415 (uint32_t)(((G(PCRE2_SPTR,BITONE))(a))[b]) : \
1416 (uint32_t)(((G(PCRE2_SPTR,BITTWO))(a))[b]))
1417
1418 #define DATCTXCPY(a,b) \
1419 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1420 memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_match_context_,BITONE))); \
1421 else \
1422 memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_match_context_,BITTWO)))
1423
1424 #define FLD(a,b) \
1425 ((test_mode == G(G(PCRE,BITONE),_MODE))? G(a,BITONE)->b : G(a,BITTWO)->b)
1426
1427 #define PATCTXCPY(a,b) \
1428 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1429 memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_compile_context_,BITONE))); \
1430 else \
1431 memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_compile_context_,BITTWO)))
1432
1433 #define PCHARS(lv, p, offset, len, utf, f) \
1434 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1435 lv = G(pchars,BITONE)((G(PCRE2_SPTR,BITONE))(p)+offset, len, utf, f); \
1436 else \
1437 lv = G(pchars,BITTWO)((G(PCRE2_SPTR,BITTWO))(p)+offset, len, utf, f)
1438
1439 #define PCHARSV(p, offset, len, utf, f) \
1440 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1441 (void)G(pchars,BITONE)((G(PCRE2_SPTR,BITONE))(p)+offset, len, utf, f); \
1442 else \
1443 (void)G(pchars,BITTWO)((G(PCRE2_SPTR,BITTWO))(p)+offset, len, utf, f)
1444
1445 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
1446 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1447 a = G(pcre2_callout_enumerate,BITONE)(G(compiled_code,BITONE), \
1448 (int (*)(struct G(pcre2_callout_enumerate_block_,BITONE) *, void *))b,c); \
1449 else \
1450 a = G(pcre2_callout_enumerate,BITTWO)(G(compiled_code,BITTWO), \
1451 (int (*)(struct G(pcre2_callout_enumerate_block_,BITTWO) *, void *))b,c)
1452
1453 #define PCRE2_CODE_COPY_FROM_VOID(a,b) \
1454 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1455 G(a,BITONE) = G(pcre2_code_copy_,BITONE)(b); \
1456 else \
1457 G(a,BITTWO) = G(pcre2_code_copy_,BITTWO)(b)
1458
1459 #define PCRE2_CODE_COPY_TO_VOID(a,b) \
1460 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1461 a = (void *)G(pcre2_code_copy_,BITONE)(G(b,BITONE)); \
1462 else \
1463 a = (void *)G(pcre2_code_copy_,BITTWO)(G(b,BITTWO))
1464
1465 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) \
1466 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1467 a = (void *)G(pcre2_code_copy_with_tables_,BITONE)(G(b,BITONE)); \
1468 else \
1469 a = (void *)G(pcre2_code_copy_with_tables_,BITTWO)(G(b,BITTWO))
1470
1471 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
1472 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1473 G(a,BITONE) = G(pcre2_compile_,BITONE)(G(b,BITONE),c,d,e,f,g); \
1474 else \
1475 G(a,BITTWO) = G(pcre2_compile_,BITTWO)(G(b,BITTWO),c,d,e,f,g)
1476
1477 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
1478 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1479 a = G(pcre2_dfa_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1480 G(g,BITONE),h,i,j); \
1481 else \
1482 a = G(pcre2_dfa_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1483 G(g,BITTWO),h,i,j)
1484
1485 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
1486 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1487 r = G(pcre2_get_error_message_,BITONE)(a,G(b,BITONE),G(G(b,BITONE),_size/BYTEONE)); \
1488 else \
1489 r = G(pcre2_get_error_message_,BITTWO)(a,G(b,BITTWO),G(G(b,BITTWO),_size/BYTETWO))
1490
1491 #define PCRE2_GET_OVECTOR_COUNT(a,b) \
1492 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1493 a = G(pcre2_get_ovector_count_,BITONE)(G(b,BITONE)); \
1494 else \
1495 a = G(pcre2_get_ovector_count_,BITTWO)(G(b,BITTWO))
1496
1497 #define PCRE2_GET_STARTCHAR(a,b) \
1498 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1499 a = G(pcre2_get_startchar_,BITONE)(G(b,BITONE)); \
1500 else \
1501 a = G(pcre2_get_startchar_,BITTWO)(G(b,BITTWO))
1502
1503 #define PCRE2_JIT_COMPILE(r,a,b) \
1504 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1505 r = G(pcre2_jit_compile_,BITONE)(G(a,BITONE),b); \
1506 else \
1507 r = G(pcre2_jit_compile_,BITTWO)(G(a,BITTWO),b)
1508
1509 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) \
1510 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1511 G(pcre2_jit_free_unused_memory_,BITONE)(G(a,BITONE)); \
1512 else \
1513 G(pcre2_jit_free_unused_memory_,BITTWO)(G(a,BITTWO))
1514
1515 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
1516 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1517 a = G(pcre2_jit_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1518 G(g,BITONE),h); \
1519 else \
1520 a = G(pcre2_jit_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1521 G(g,BITTWO),h)
1522
1523 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
1524 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1525 a = (PCRE2_JIT_STACK *)G(pcre2_jit_stack_create_,BITONE)(b,c,d); \
1526 else \
1527 a = (PCRE2_JIT_STACK *)G(pcre2_jit_stack_create_,BITTWO)(b,c,d); \
1528
1529 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
1530 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1531 G(pcre2_jit_stack_assign_,BITONE)(G(a,BITONE),(G(pcre2_jit_callback_,BITONE))b,c); \
1532 else \
1533 G(pcre2_jit_stack_assign_,BITTWO)(G(a,BITTWO),(G(pcre2_jit_callback_,BITTWO))b,c);
1534
1535 #define PCRE2_JIT_STACK_FREE(a) \
1536 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1537 G(pcre2_jit_stack_free_,BITONE)((G(pcre2_jit_stack_,BITONE) *)a); \
1538 else \
1539 G(pcre2_jit_stack_free_,BITTWO)((G(pcre2_jit_stack_,BITTWO) *)a);
1540
1541 #define PCRE2_MAKETABLES(a) \
1542 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1543 a = G(pcre2_maketables_,BITONE)(NULL); \
1544 else \
1545 a = G(pcre2_maketables_,BITTWO)(NULL)
1546
1547 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
1548 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1549 a = G(pcre2_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1550 G(g,BITONE),h); \
1551 else \
1552 a = G(pcre2_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1553 G(g,BITTWO),h)
1554
1555 #define PCRE2_MATCH_DATA_CREATE(a,b,c) \
1556 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1557 G(a,BITONE) = G(pcre2_match_data_create_,BITONE)(b,c); \
1558 else \
1559 G(a,BITTWO) = G(pcre2_match_data_create_,BITTWO)(b,c)
1560
1561 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
1562 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1563 G(a,BITONE) = G(pcre2_match_data_create_from_pattern_,BITONE)(G(b,BITONE),c); \
1564 else \
1565 G(a,BITTWO) = G(pcre2_match_data_create_from_pattern_,BITTWO)(G(b,BITTWO),c)
1566
1567 #define PCRE2_MATCH_DATA_FREE(a) \
1568 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1569 G(pcre2_match_data_free_,BITONE)(G(a,BITONE)); \
1570 else \
1571 G(pcre2_match_data_free_,BITTWO)(G(a,BITTWO))
1572
1573 #define PCRE2_PATTERN_INFO(a,b,c,d) \
1574 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1575 a = G(pcre2_pattern_info_,BITONE)(G(b,BITONE),c,d); \
1576 else \
1577 a = G(pcre2_pattern_info_,BITTWO)(G(b,BITTWO),c,d)
1578
1579 #define PCRE2_PRINTINT(a) \
1580 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1581 G(pcre2_printint_,BITONE)(G(compiled_code,BITONE),outfile,a); \
1582 else \
1583 G(pcre2_printint_,BITTWO)(G(compiled_code,BITTWO),outfile,a)
1584
1585 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
1586 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1587 r = G(pcre2_serialize_decode_,BITONE)((G(pcre2_code_,BITONE) **)a,b,c,G(d,BITONE)); \
1588 else \
1589 r = G(pcre2_serialize_decode_,BITTWO)((G(pcre2_code_,BITTWO) **)a,b,c,G(d,BITTWO))
1590
1591 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
1592 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1593 r = G(pcre2_serialize_encode_,BITONE)((G(const pcre2_code_,BITONE) **)a,b,c,d,G(e,BITONE)); \
1594 else \
1595 r = G(pcre2_serialize_encode_,BITTWO)((G(const pcre2_code_,BITTWO) **)a,b,c,d,G(e,BITTWO))
1596
1597 #define PCRE2_SERIALIZE_FREE(a) \
1598 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1599 G(pcre2_serialize_free_,BITONE)(a); \
1600 else \
1601 G(pcre2_serialize_free_,BITTWO)(a)
1602
1603 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
1604 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1605 r = G(pcre2_serialize_get_number_of_codes_,BITONE)(a); \
1606 else \
1607 r = G(pcre2_serialize_get_number_of_codes_,BITTWO)(a)
1608
1609 #define PCRE2_SET_CALLOUT(a,b,c) \
1610 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1611 G(pcre2_set_callout_,BITONE)(G(a,BITONE), \
1612 (int (*)(G(pcre2_callout_block_,BITONE) *, void *))b,c); \
1613 else \
1614 G(pcre2_set_callout_,BITTWO)(G(a,BITTWO), \
1615 (int (*)(G(pcre2_callout_block_,BITTWO) *, void *))b,c);
1616
1617 #define PCRE2_SET_CHARACTER_TABLES(a,b) \
1618 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1619 G(pcre2_set_character_tables_,BITONE)(G(a,BITONE),b); \
1620 else \
1621 G(pcre2_set_character_tables_,BITTWO)(G(a,BITTWO),b)
1622
1623 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
1624 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1625 G(pcre2_set_compile_recursion_guard_,BITONE)(G(a,BITONE),b,c); \
1626 else \
1627 G(pcre2_set_compile_recursion_guard_,BITTWO)(G(a,BITTWO),b,c)
1628
1629 #define PCRE2_SET_DEPTH_LIMIT(a,b) \
1630 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1631 G(pcre2_set_depth_limit_,BITONE)(G(a,BITONE),b); \
1632 else \
1633 G(pcre2_set_depth_limit_,BITTWO)(G(a,BITTWO),b)
1634
1635 #define PCRE2_SET_MATCH_LIMIT(a,b) \
1636 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1637 G(pcre2_set_match_limit_,BITONE)(G(a,BITONE),b); \
1638 else \
1639 G(pcre2_set_match_limit_,BITTWO)(G(a,BITTWO),b)
1640
1641 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) \
1642 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1643 G(pcre2_set_max_pattern_length_,BITONE)(G(a,BITONE),b); \
1644 else \
1645 G(pcre2_set_max_pattern_length_,BITTWO)(G(a,BITTWO),b)
1646
1647 #define PCRE2_SET_OFFSET_LIMIT(a,b) \
1648 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1649 G(pcre2_set_offset_limit_,BITONE)(G(a,BITONE),b); \
1650 else \
1651 G(pcre2_set_offset_limit_,BITTWO)(G(a,BITTWO),b)
1652
1653 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) \
1654 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1655 G(pcre2_set_parens_nest_limit_,BITONE)(G(a,BITONE),b); \
1656 else \
1657 G(pcre2_set_parens_nest_limit_,BITTWO)(G(a,BITTWO),b)
1658
1659 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
1660 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1661 a = G(pcre2_substitute_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1662 G(g,BITONE),G(h,BITONE),(G(PCRE2_SPTR,BITONE))i,j, \
1663 (G(PCRE2_UCHAR,BITONE) *)k,l); \
1664 else \
1665 a = G(pcre2_substitute_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1666 G(g,BITTWO),G(h,BITTWO),(G(PCRE2_SPTR,BITTWO))i,j, \
1667 (G(PCRE2_UCHAR,BITTWO) *)k,l)
1668
1669 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
1670 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1671 a = G(pcre2_substring_copy_byname_,BITONE)(G(b,BITONE),G(c,BITONE),\
1672 (G(PCRE2_UCHAR,BITONE) *)d,e); \
1673 else \
1674 a = G(pcre2_substring_copy_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),\
1675 (G(PCRE2_UCHAR,BITTWO) *)d,e)
1676
1677 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
1678 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1679 a = G(pcre2_substring_copy_bynumber_,BITONE)(G(b,BITONE),c,\
1680 (G(PCRE2_UCHAR,BITONE) *)d,e); \
1681 else \
1682 a = G(pcre2_substring_copy_bynumber_,BITTWO)(G(b,BITTWO),c,\
1683 (G(PCRE2_UCHAR,BITTWO) *)d,e)
1684
1685 #define PCRE2_SUBSTRING_FREE(a) \
1686 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1687 G(pcre2_substring_free_,BITONE)((G(PCRE2_UCHAR,BITONE) *)a); \
1688 else G(pcre2_substring_free_,BITTWO)((G(PCRE2_UCHAR,BITTWO) *)a)
1689
1690 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
1691 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1692 a = G(pcre2_substring_get_byname_,BITONE)(G(b,BITONE),G(c,BITONE),\
1693 (G(PCRE2_UCHAR,BITONE) **)d,e); \
1694 else \
1695 a = G(pcre2_substring_get_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),\
1696 (G(PCRE2_UCHAR,BITTWO) **)d,e)
1697
1698 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
1699 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1700 a = G(pcre2_substring_get_bynumber_,BITONE)(G(b,BITONE),c,\
1701 (G(PCRE2_UCHAR,BITONE) **)d,e); \
1702 else \
1703 a = G(pcre2_substring_get_bynumber_,BITTWO)(G(b,BITTWO),c,\
1704 (G(PCRE2_UCHAR,BITTWO) **)d,e)
1705
1706 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
1707 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1708 a = G(pcre2_substring_length_byname_,BITONE)(G(b,BITONE),G(c,BITONE),d); \
1709 else \
1710 a = G(pcre2_substring_length_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),d)
1711
1712 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
1713 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1714 a = G(pcre2_substring_length_bynumber_,BITONE)(G(b,BITONE),c,d); \
1715 else \
1716 a = G(pcre2_substring_length_bynumber_,BITTWO)(G(b,BITTWO),c,d)
1717
1718 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
1719 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1720 a = G(pcre2_substring_list_get_,BITONE)(G(b,BITONE), \
1721 (G(PCRE2_UCHAR,BITONE) ***)c,d); \
1722 else \
1723 a = G(pcre2_substring_list_get_,BITTWO)(G(b,BITTWO), \
1724 (G(PCRE2_UCHAR,BITTWO) ***)c,d)
1725
1726 #define PCRE2_SUBSTRING_LIST_FREE(a) \
1727 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1728 G(pcre2_substring_list_free_,BITONE)((G(PCRE2_SPTR,BITONE) *)a); \
1729 else \
1730 G(pcre2_substring_list_free_,BITTWO)((G(PCRE2_SPTR,BITTWO) *)a)
1731
1732 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
1733 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1734 a = G(pcre2_substring_number_from_name_,BITONE)(G(b,BITONE),G(c,BITONE)); \
1735 else \
1736 a = G(pcre2_substring_number_from_name_,BITTWO)(G(b,BITTWO),G(c,BITTWO))
1737
1738 #define PTR(x) ( \
1739 (test_mode == G(G(PCRE,BITONE),_MODE))? (void *)G(x,BITONE) : \
1740 (void *)G(x,BITTWO))
1741
1742 #define SETFLD(x,y,z) \
1743 if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE)->y = z; \
1744 else G(x,BITTWO)->y = z
1745
1746 #define SETFLDVEC(x,y,v,z) \
1747 if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE)->y[v] = z; \
1748 else G(x,BITTWO)->y[v] = z
1749
1750 #define SETOP(x,y,z) \
1751 if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE) z y; \
1752 else G(x,BITTWO) z y
1753
1754 #define SETCASTPTR(x,y) \
1755 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1756 G(x,BITONE) = (G(G(uint,BITONE),_t) *)(y); \
1757 else \
1758 G(x,BITTWO) = (G(G(uint,BITTWO),_t) *)(y)
1759
1760 #define STRLEN(p) ((test_mode == G(G(PCRE,BITONE),_MODE))? \
1761 G(strlen,BITONE)((G(PCRE2_SPTR,BITONE))p) : \
1762 G(strlen,BITTWO)((G(PCRE2_SPTR,BITTWO))p))
1763
1764 #define SUB1(a,b) \
1765 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1766 G(a,BITONE)(G(b,BITONE)); \
1767 else \
1768 G(a,BITTWO)(G(b,BITTWO))
1769
1770 #define SUB2(a,b,c) \
1771 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1772 G(a,BITONE))(G(b,BITONE),G(c,BITONE)); \
1773 else \
1774 G(a,BITTWO))(G(b,BITTWO),G(c,BITTWO))
1775
1776 #define TEST(x,r,y) ( \
1777 (test_mode == G(G(PCRE,BITONE),_MODE) && G(x,BITONE) r (y)) || \
1778 (test_mode == G(G(PCRE,BITTWO),_MODE) && G(x,BITTWO) r (y)))
1779
1780 #define TESTFLD(x,f,r,y) ( \
1781 (test_mode == G(G(PCRE,BITONE),_MODE) && G(x,BITONE)->f r (y)) || \
1782 (test_mode == G(G(PCRE,BITTWO),_MODE) && G(x,BITTWO)->f r (y)))
1783
1784
1785 #endif /* Two out of three modes */
1786
1787 /* ----- End of cases where more than one mode is supported ----- */
1788
1789
1790 /* ----- Only 8-bit mode is supported ----- */
1791
1792 #elif defined SUPPORT_PCRE2_8
1793 #define CASTFLD(t,a,b) (t)(G(a,8)->b)
1794 #define CASTVAR(t,x) (t)G(x,8)
1795 #define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR8)(a))[b])
1796 #define DATCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_match_context_8))
1797 #define FLD(a,b) G(a,8)->b
1798 #define PATCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_compile_context_8))
1799 #define PCHARS(lv, p, offset, len, utf, f) \
1800 lv = pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
1801 #define PCHARSV(p, offset, len, utf, f) \
1802 (void)pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
1803 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
1804 a = pcre2_callout_enumerate_8(compiled_code8, \
1805 (int (*)(struct pcre2_callout_enumerate_block_8 *, void *))b,c)
1806 #define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,8) = pcre2_code_copy_8(b)
1807 #define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_8(G(b,8))
1808 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_8(G(b,8))
1809 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
1810 G(a,8) = pcre2_compile_8(G(b,8),c,d,e,f,g)
1811 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
1812 a = pcre2_dfa_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h,i,j)
1813 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
1814 r = pcre2_get_error_message_8(a,G(b,8),G(G(b,8),_size))
1815 #define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_8(G(b,8))
1816 #define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_8(G(b,8))
1817 #define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_8(G(a,8),b)
1818 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_8(G(a,8))
1819 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
1820 a = pcre2_jit_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h)
1821 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
1822 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_8(b,c,d);
1823 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
1824 pcre2_jit_stack_assign_8(G(a,8),(pcre2_jit_callback_8)b,c);
1825 #define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_8((pcre2_jit_stack_8 *)a);
1826 #define PCRE2_MAKETABLES(a) a = pcre2_maketables_8(NULL)
1827 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
1828 a = pcre2_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h)
1829 #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,8) = pcre2_match_data_create_8(b,c)
1830 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
1831 G(a,8) = pcre2_match_data_create_from_pattern_8(G(b,8),c)
1832 #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_8(G(a,8))
1833 #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_8(G(b,8),c,d)
1834 #define PCRE2_PRINTINT(a) pcre2_printint_8(compiled_code8,outfile,a)
1835 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
1836 r = pcre2_serialize_decode_8((pcre2_code_8 **)a,b,c,G(d,8))
1837 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
1838 r = pcre2_serialize_encode_8((const pcre2_code_8 **)a,b,c,d,G(e,8))
1839 #define PCRE2_SERIALIZE_FREE(a) pcre2_serialize_free_8(a)
1840 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
1841 r = pcre2_serialize_get_number_of_codes_8(a)
1842 #define PCRE2_SET_CALLOUT(a,b,c) \
1843 pcre2_set_callout_8(G(a,8),(int (*)(pcre2_callout_block_8 *, void *))b,c)
1844 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_8(G(a,8),b)
1845 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
1846 pcre2_set_compile_recursion_guard_8(G(a,8),b,c)
1847 #define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_8(G(a,8),b)
1848 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_8(G(a,8),b)
1849 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_8(G(a,8),b)
1850 #define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_8(G(a,8),b)
1851 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_8(G(a,8),b)
1852 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
1853 a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),G(h,8), \
1854 (PCRE2_SPTR8)i,j,(PCRE2_UCHAR8 *)k,l)
1855 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
1856 a = pcre2_substring_copy_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 *)d,e)
1857 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
1858 a = pcre2_substring_copy_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 *)d,e)
1859 #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_8((PCRE2_UCHAR8 *)a)
1860 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
1861 a = pcre2_substring_get_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 **)d,e)
1862 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
1863 a = pcre2_substring_get_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 **)d,e)
1864 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
1865 a = pcre2_substring_length_byname_8(G(b,8),G(c,8),d)
1866 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
1867 a = pcre2_substring_length_bynumber_8(G(b,8),c,d)
1868 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
1869 a = pcre2_substring_list_get_8(G(b,8),(PCRE2_UCHAR8 ***)c,d)
1870 #define PCRE2_SUBSTRING_LIST_FREE(a) \
1871 pcre2_substring_list_free_8((PCRE2_SPTR8 *)a)
1872 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
1873 a = pcre2_substring_number_from_name_8(G(b,8),G(c,8));
1874 #define PTR(x) (void *)G(x,8)
1875 #define SETFLD(x,y,z) G(x,8)->y = z
1876 #define SETFLDVEC(x,y,v,z) G(x,8)->y[v] = z
1877 #define SETOP(x,y,z) G(x,8) z y
1878 #define SETCASTPTR(x,y) G(x,8) = (uint8_t *)(y)
1879 #define STRLEN(p) (int)strlen((char *)p)
1880 #define SUB1(a,b) G(a,8)(G(b,8))
1881 #define SUB2(a,b,c) G(a,8)(G(b,8),G(c,8))
1882 #define TEST(x,r,y) (G(x,8) r (y))
1883 #define TESTFLD(x,f,r,y) (G(x,8)->f r (y))
1884
1885
1886 /* ----- Only 16-bit mode is supported ----- */
1887
1888 #elif defined SUPPORT_PCRE2_16
1889 #define CASTFLD(t,a,b) (t)(G(a,16)->b)
1890 #define CASTVAR(t,x) (t)G(x,16)
1891 #define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR16)(a))[b])
1892 #define DATCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_match_context_16))
1893 #define FLD(a,b) G(a,16)->b
1894 #define PATCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_compile_context_16))
1895 #define PCHARS(lv, p, offset, len, utf, f) \
1896 lv = pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f)
1897 #define PCHARSV(p, offset, len, utf, f) \
1898 (void)pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f)
1899 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
1900 a = pcre2_callout_enumerate_16(compiled_code16, \
1901 (int (*)(struct pcre2_callout_enumerate_block_16 *, void *))b,c)
1902 #define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,16) = pcre2_code_copy_16(b)
1903 #define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_16(G(b,16))
1904 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_16(G(b,16))
1905 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
1906 G(a,16) = pcre2_compile_16(G(b,16),c,d,e,f,g)
1907 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
1908 a = pcre2_dfa_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h,i,j)
1909 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
1910 r = pcre2_get_error_message_16(a,G(b,16),G(G(b,16),_size/2))
1911 #define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_16(G(b,16))
1912 #define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_16(G(b,16))
1913 #define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_16(G(a,16),b)
1914 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_16(G(a,16))
1915 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
1916 a = pcre2_jit_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h)
1917 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
1918 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_16(b,c,d);
1919 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
1920 pcre2_jit_stack_assign_16(G(a,16),(pcre2_jit_callback_16)b,c);
1921 #define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_16((pcre2_jit_stack_16 *)a);
1922 #define PCRE2_MAKETABLES(a) a = pcre2_maketables_16(NULL)
1923 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
1924 a = pcre2_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h)
1925 #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,16) = pcre2_match_data_create_16(b,c)
1926 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
1927 G(a,16) = pcre2_match_data_create_from_pattern_16(G(b,16),c)
1928 #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_16(G(a,16))
1929 #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_16(G(b,16),c,d)
1930 #define PCRE2_PRINTINT(a) pcre2_printint_16(compiled_code16,outfile,a)
1931 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
1932 r = pcre2_serialize_decode_16((pcre2_code_16 **)a,b,c,G(d,16))
1933 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
1934 r = pcre2_serialize_encode_16((const pcre2_code_16 **)a,b,c,d,G(e,16))
1935 #define PCRE2_SERIALIZE_FREE(a) pcre2_serialize_free_16(a)
1936 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
1937 r = pcre2_serialize_get_number_of_codes_16(a)
1938 #define PCRE2_SET_CALLOUT(a,b,c) \
1939 pcre2_set_callout_16(G(a,16),(int (*)(pcre2_callout_block_16 *, void *))b,c);
1940 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_16(G(a,16),b)
1941 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
1942 pcre2_set_compile_recursion_guard_16(G(a,16),b,c)
1943 #define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_16(G(a,16),b)
1944 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_16(G(a,16),b)
1945 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_16(G(a,16),b)
1946 #define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_16(G(a,16),b)
1947 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_16(G(a,16),b)
1948 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
1949 a = pcre2_substitute_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),G(h,16), \
1950 (PCRE2_SPTR16)i,j,(PCRE2_UCHAR16 *)k,l)
1951 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
1952 a = pcre2_substring_copy_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 *)d,e)
1953 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
1954 a = pcre2_substring_copy_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 *)d,e)
1955 #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_16((PCRE2_UCHAR16 *)a)
1956 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
1957 a = pcre2_substring_get_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 **)d,e)
1958 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
1959 a = pcre2_substring_get_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 **)d,e)
1960 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
1961 a = pcre2_substring_length_byname_16(G(b,16),G(c,16),d)
1962 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
1963 a = pcre2_substring_length_bynumber_16(G(b,16),c,d)
1964 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
1965 a = pcre2_substring_list_get_16(G(b,16),(PCRE2_UCHAR16 ***)c,d)
1966 #define PCRE2_SUBSTRING_LIST_FREE(a) \
1967 pcre2_substring_list_free_16((PCRE2_SPTR16 *)a)
1968 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
1969 a = pcre2_substring_number_from_name_16(G(b,16),G(c,16));
1970 #define PTR(x) (void *)G(x,16)
1971 #define SETFLD(x,y,z) G(x,16)->y = z
1972 #define SETFLDVEC(x,y,v,z) G(x,16)->y[v] = z
1973 #define SETOP(x,y,z) G(x,16) z y
1974 #define SETCASTPTR(x,y) G(x,16) = (uint16_t *)(y)
1975 #define STRLEN(p) (int)strlen16((PCRE2_SPTR16)p)
1976 #define SUB1(a,b) G(a,16)(G(b,16))
1977 #define SUB2(a,b,c) G(a,16)(G(b,16),G(c,16))
1978 #define TEST(x,r,y) (G(x,16) r (y))
1979 #define TESTFLD(x,f,r,y) (G(x,16)->f r (y))
1980
1981
1982 /* ----- Only 32-bit mode is supported ----- */
1983
1984 #elif defined SUPPORT_PCRE2_32
1985 #define CASTFLD(t,a,b) (t)(G(a,32)->b)
1986 #define CASTVAR(t,x) (t)G(x,32)
1987 #define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR32)(a))[b])
1988 #define DATCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_match_context_32))
1989 #define FLD(a,b) G(a,32)->b
1990 #define PATCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_compile_context_32))
1991 #define PCHARS(lv, p, offset, len, utf, f) \
1992 lv = pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f)
1993 #define PCHARSV(p, offset, len, utf, f) \
1994 (void)pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f)
1995 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
1996 a = pcre2_callout_enumerate_32(compiled_code32, \
1997 (int (*)(struct pcre2_callout_enumerate_block_32 *, void *))b,c)
1998 #define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,32) = pcre2_code_copy_32(b)
1999 #define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_32(G(b,32))
2000 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_32(G(b,32))
2001 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
2002 G(a,32) = pcre2_compile_32(G(b,32),c,d,e,f,g)
2003 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
2004 a = pcre2_dfa_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h,i,j)
2005 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
2006 r = pcre2_get_error_message_32(a,G(b,32),G(G(b,32),_size/4))
2007 #define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_32(G(b,32))
2008 #define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_32(G(b,32))
2009 #define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_32(G(a,32),b)
2010 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_32(G(a,32))
2011 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
2012 a = pcre2_jit_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
2013 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
2014 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_32(b,c,d);
2015 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
2016 pcre2_jit_stack_assign_32(G(a,32),(pcre2_jit_callback_32)b,c);
2017 #define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_32((pcre2_jit_stack_32 *)a);
2018 #define PCRE2_MAKETABLES(a) a = pcre2_maketables_32(NULL)
2019 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
2020 a = pcre2_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
2021 #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,32) = pcre2_match_data_create_32(b,c)
2022 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
2023 G(a,32) = pcre2_match_data_create_from_pattern_32(G(b,32),c)
2024 #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_32(G(a,32))
2025 #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_32(G(b,32),c,d)
2026 #define PCRE2_PRINTINT(a) pcre2_printint_32(compiled_code32,outfile,a)
2027 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
2028 r = pcre2_serialize_decode_32((pcre2_code_32 **)a,b,c,G(d,32))
2029 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
2030 r = pcre2_serialize_encode_32((const pcre2_code_32 **)a,b,c,d,G(e,32))
2031 #define PCRE2_SERIALIZE_FREE(a) pcre2_serialize_free_32(a)
2032 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
2033 r = pcre2_serialize_get_number_of_codes_32(a)
2034 #define PCRE2_SET_CALLOUT(a,b,c) \
2035 pcre2_set_callout_32(G(a,32),(int (*)(pcre2_callout_block_32 *, void *))b,c);
2036 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_32(G(a,32),b)
2037 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
2038 pcre2_set_compile_recursion_guard_32(G(a,32),b,c)
2039 #define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_32(G(a,32),b)
2040 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_32(G(a,32),b)
2041 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_32(G(a,32),b)
2042 #define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_32(G(a,32),b)
2043 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_32(G(a,32),b)
2044 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
2045 a = pcre2_substitute_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),G(h,32), \
2046 (PCRE2_SPTR32)i,j,(PCRE2_UCHAR32 *)k,l)
2047 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
2048 a = pcre2_substring_copy_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 *)d,e)
2049 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
2050 a = pcre2_substring_copy_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 *)d,e);
2051 #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_32((PCRE2_UCHAR32 *)a)
2052 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
2053 a = pcre2_substring_get_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 **)d,e)
2054 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
2055 a = pcre2_substring_get_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 **)d,e)
2056 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
2057 a = pcre2_substring_length_byname_32(G(b,32),G(c,32),d)
2058 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
2059 a = pcre2_substring_length_bynumber_32(G(b,32),c,d)
2060 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
2061 a = pcre2_substring_list_get_32(G(b,32),(PCRE2_UCHAR32 ***)c,d)
2062 #define PCRE2_SUBSTRING_LIST_FREE(a) \
2063 pcre2_substring_list_free_32((PCRE2_SPTR32 *)a)
2064 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
2065 a = pcre2_substring_number_from_name_32(G(b,32),G(c,32));
2066 #define PTR(x) (void *)G(x,32)
2067 #define SETFLD(x,y,z) G(x,32)->y = z
2068 #define SETFLDVEC(x,y,v,z) G(x,32)->y[v] = z
2069 #define SETOP(x,y,z) G(x,32) z y
2070 #define SETCASTPTR(x,y) G(x,32) = (uint32_t *)(y)
2071 #define STRLEN(p) (int)strlen32((PCRE2_SPTR32)p)
2072 #define SUB1(a,b) G(a,32)(G(b,32))
2073 #define SUB2(a,b,c) G(a,32)(G(b,32),G(c,32))
2074 #define TEST(x,r,y) (G(x,32) r (y))
2075 #define TESTFLD(x,f,r,y) (G(x,32)->f r (y))
2076
2077 #endif
2078
2079 /* ----- End of mode-specific function call macros ----- */
2080
2081
2082
2083
2084 /*************************************************
2085 * Alternate character tables *
2086 *************************************************/
2087
2088 /* By default, the "tables" pointer in the compile context when calling
2089 pcre2_compile() is not set (= NULL), thereby using the default tables of the
2090 library. However, the tables modifier can be used to select alternate sets of
2091 tables, for different kinds of testing. Note that the locale modifier also
2092 adjusts the tables. */
2093
2094 /* This is the set of tables distributed as default with PCRE2. It recognizes
2095 only ASCII characters. */
2096
2097 static const uint8_t tables1[] = {
2098
2099 /* This table is a lower casing table. */
2100
2101 0, 1, 2, 3, 4, 5, 6, 7,
2102 8, 9, 10, 11, 12, 13, 14, 15,
2103 16, 17, 18, 19, 20, 21, 22, 23,
2104 24, 25, 26, 27, 28, 29, 30, 31,
2105 32, 33, 34, 35, 36, 37, 38, 39,
2106 40, 41, 42, 43, 44, 45, 46, 47,
2107 48, 49, 50, 51, 52, 53, 54, 55,
2108 56, 57, 58, 59, 60, 61, 62, 63,
2109 64, 97, 98, 99,100,101,102,103,
2110 104,105,106,107,108,109,110,111,
2111 112,113,114,115,116,117,118,119,
2112 120,121,122, 91, 92, 93, 94, 95,
2113 96, 97, 98, 99,100,101,102,103,
2114 104,105,106,107,108,109,110,111,
2115 112,113,114,115,116,117,118,119,
2116 120,121,122,123,124,125,126,127,
2117 128,129,130,131,132,133,134,135,
2118 136,137,138,139,140,141,142,143,
2119 144,145,146,147,148,149,150,151,
2120 152,153,154,155,156,157,158,159,
2121 160,161,162,163,164,165,166,167,
2122 168,169,170,171,172,173,174,175,
2123 176,177,178,179,180,181,182,183,
2124 184,185,186,187,188,189,190,191,
2125 192,193,194,195,196,197,198,199,
2126 200,201,202,203,204,205,206,207,
2127 208,209,210,211,212,213,214,215,
2128 216,217,218,219,220,221,222,223,
2129 224,225,226,227,228,229,230,231,
2130 232,233,234,235,236,237,238,239,
2131 240,241,242,243,244,245,246,247,
2132 248,249,250,251,252,253,254,255,
2133
2134 /* This table is a case flipping table. */
2135
2136 0, 1, 2, 3, 4, 5, 6, 7,
2137 8, 9, 10, 11, 12, 13, 14, 15,
2138 16, 17, 18, 19, 20, 21, 22, 23,
2139 24, 25, 26, 27, 28, 29, 30, 31,
2140 32, 33, 34, 35, 36, 37, 38, 39,
2141 40, 41, 42, 43, 44, 45, 46, 47,
2142 48, 49, 50, 51, 52, 53, 54, 55,
2143 56, 57, 58, 59, 60, 61, 62, 63,
2144 64, 97, 98, 99,100,101,102,103,
2145 104,105,106,107,108,109,110,111,
2146 112,113,114,115,116,117,118,119,
2147 120,121,122, 91, 92, 93, 94, 95,
2148 96, 65, 66, 67, 68, 69, 70, 71,
2149 72, 73, 74, 75, 76, 77, 78, 79,
2150 80, 81, 82, 83, 84, 85, 86, 87,
2151 88, 89, 90,123,124,125,126,127,
2152 128,129,130,131,132,133,134,135,
2153 136,137,138,139,140,141,142,143,
2154 144,145,146,147,148,149,150,151,
2155 152,153,154,155,156,157,158,159,
2156 160,161,162,163,164,165,166,167,
2157 168,169,170,171,172,173,174,175,
2158 176,177,178,179,180,181,182,183,
2159 184,185,186,187,188,189,190,191,
2160 192,193,194,195,196,197,198,199,
2161 200,201,202,203,204,205,206,207,
2162 208,209,210,211,212,213,214,215,
2163 216,217,218,219,220,221,222,223,
2164 224,225,226,227,228,229,230,231,
2165 232,233,234,235,236,237,238,239,
2166 240,241,242,243,244,245,246,247,
2167 248,249,250,251,252,253,254,255,
2168
2169 /* This table contains bit maps for various character classes. Each map is 32
2170 bytes long and the bits run from the least significant end of each byte. The
2171 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
2172 graph, print, punct, and cntrl. Other classes are built from combinations. */
2173
2174 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
2175 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2176 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2177 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2178
2179 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
2180 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
2181 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2182 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2183
2184 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
2185 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2186 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2187 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2188
2189 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2190 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
2191 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2192 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2193
2194 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2195 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
2196 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2197 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2198
2199 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
2200 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
2201 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2202 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2203
2204 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
2205 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
2206 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2207 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2208
2209 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
2210 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
2211 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2212 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2213
2214 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
2215 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
2216 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2217 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2218
2219 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
2220 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
2221 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2222 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2223
2224 /* This table identifies various classes of character by individual bits:
2225 0x01 white space character
2226 0x02 letter
2227 0x04 decimal digit
2228 0x08 hexadecimal digit
2229 0x10 alphanumeric or '_'
2230 0x80 regular expression metacharacter or binary zero
2231 */
2232
2233 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
2234 0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /* 8- 15 */
2235 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
2236 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
2237 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
2238 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
2239 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
2240 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
2241 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
2242 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
2243 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
2244 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
2245 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
2246 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
2247 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
2248 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
2249 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
2250 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
2251 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
2252 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
2253 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
2254 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
2255 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
2256 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
2257 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
2258 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
2259 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
2260 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
2261 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
2262 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
2263 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
2264 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
2265
2266 /* This is a set of tables that came originally from a Windows user. It seems
2267 to be at least an approximation of ISO 8859. In particular, there are
2268 characters greater than 128 that are marked as spaces, letters, etc. */
2269
2270 static const uint8_t tables2[] = {
2271 0,1,2,3,4,5,6,7,
2272 8,9,10,11,12,13,14,15,
2273 16,17,18,19,20,21,22,23,
2274 24,25,26,27,28,29,30,31,
2275 32,33,34,35,36,37,38,39,
2276 40,41,42,43,44,45,46,47,
2277 48,49,50,51,52,53,54,55,
2278 56,57,58,59,60,61,62,63,
2279 64,97,98,99,100,101,102,103,
2280 104,105,106,107,108,109,110,111,
2281 112,113,114,115,116,117,118,119,
2282 120,121,122,91,92,93,94,95,
2283 96,97,98,99,100,101,102,103,
2284 104,105,106,107,108,109,110,111,
2285 112,113,114,115,116,117,118,119,
2286 120,121,122,123,124,125,126,127,
2287 128,129,130,131,132,133,134,135,
2288 136,137,138,139,140,141,142,143,
2289 144,145,146,147,148,149,150,151,
2290 152,153,154,155,156,157,158,159,
2291 160,161,162,163,164,165,166,167,
2292 168,169,170,171,172,173,174,175,
2293 176,177,178,179,180,181,182,183,
2294 184,185,186,187,188,189,190,191,
2295 224,225,226,227,228,229,230,231,
2296 232,233,234,235,236,237,238,239,
2297 240,241,242,243,244,245,246,215,
2298 248,249,250,251,252,253,254,223,
2299 224,225,226,227,228,229,230,231,
2300 232,233,234,235,236,237,238,239,
2301 240,241,242,243,244,245,246,247,
2302 248,249,250,251,252,253,254,255,
2303 0,1,2,3,4,5,6,7,
2304 8,9,10,11,12,13,14,15,
2305 16,17,18,19,20,21,22,23,
2306 24,25,26,27,28,29,30,31,
2307 32,33,34,35,36,37,38,39,
2308 40,41,42,43,44,45,46,47,
2309 48,49,50,51,52,53,54,55,
2310 56,57,58,59,60,61,62,63,
2311 64,97,98,99,100,101,102,103,
2312 104,105,106,107,108,109,110,111,
2313 112,113,114,115,116,117,118,119,
2314 120,121,122,91,92,93,94,95,
2315 96,65,66,67,68,69,70,71,
2316 72,73,74,75,76,77,78,79,
2317 80,81,82,83,84,85,86,87,
2318 88,89,90,123,124,125,126,127,
2319 128,129,130,131,132,133,134,135,
2320 136,137,138,139,140,141,142,143,
2321 144,145,146,147,148,149,150,151,
2322 152,153,154,155,156,157,158,159,
2323 160,161,162,163,164,165,166,167,
2324 168,169,170,171,172,173,174,175,
2325 176,177,178,179,180,181,182,183,
2326 184,185,186,187,188,189,190,191,
2327 224,225,226,227,228,229,230,231,
2328 232,233,234,235,236,237,238,239,
2329 240,241,242,243,244,245,246,215,
2330 248,249,250,251,252,253,254,223,
2331 192,193,194,195,196,197,198,199,
2332 200,201,202,203,204,205,206,207,
2333 208,209,210,211,212,213,214,247,
2334 216,217,218,219,220,221,222,255,
2335 0,62,0,0,1,0,0,0,
2336 0,0,0,0,0,0,0,0,
2337 32,0,0,0,1,0,0,0,
2338 0,0,0,0,0,0,0,0,
2339 0,0,0,0,0,0,255,3,
2340 126,0,0,0,126,0,0,0,
2341 0,0,0,0,0,0,0,0,
2342 0,0,0,0,0,0,0,0,
2343 0,0,0,0,0,0,255,3,
2344 0,0,0,0,0,0,0,0,
2345 0,0,0,0,0,0,12,2,
2346 0,0,0,0,0,0,0,0,
2347 0,0,0,0,0,0,0,0,
2348 254,255,255,7,0,0,0,0,
2349 0,0,0,0,0,0,0,0,
2350 255,255,127,127,0,0,0,0,
2351 0,0,0,0,0,0,0,0,
2352 0,0,0,0,254,255,255,7,
2353 0,0,0,0,0,4,32,4,
2354 0,0,0,128,255,255,127,255,
2355 0,0,0,0,0,0,255,3,
2356 254,255,255,135,254,255,255,7,
2357 0,0,0,0,0,4,44,6,
2358 255,255,127,255,255,255,127,255,
2359 0,0,0,0,254,255,255,255,
2360 255,255,255,255,255,255,255,127,
2361 0,0,0,0,254,255,255,255,
2362 255,255,255,255,255,255,255,255,
2363 0,2,0,0,255,255,255,255,
2364 255,255,255,255,255,255,255,127,
2365 0,0,0,0,255,255,255,255,
2366 255,255,255,255,255,255,255,255,
2367 0,0,0,0,254,255,0,252,
2368 1,0,0,248,1,0,0,120,
2369 0,0,0,0,254,255,255,255,
2370 0,0,128,0,0,0,128,0,
2371 255,255,255,255,0,0,0,0,
2372 0,0,0,0,0,0,0,128,
2373 255,255,255,255,0,0,0,0,
2374 0,0,0,0,0,0,0,0,
2375 128,0,0,0,0,0,0,0,
2376 0,1,1,0,1,1,0,0,
2377 0,0,0,0,0,0,0,0,
2378 0,0,0,0,0,0,0,0,
2379 1,0,0,0,128,0,0,0,
2380 128,128,128,128,0,0,128,0,
2381 28,28,28,28,28,28,28,28,
2382 28,28,0,0,0,0,0,128,
2383 0,26,26,26,26,26,26,18,
2384 18,18,18,18,18,18,18,18,
2385 18,18,18,18,18,18,18,18,
2386 18,18,18,128,128,0,128,16,
2387 0,26,26,26,26,26,26,18,
2388 18,18,18,18,18,18,18,18,
2389 18,18,18,18,18,18,18,18,
2390 18,18,18,128,128,0,0,0,
2391 0,0,0,0,0,1,0,0,
2392 0,0,0,0,0,0,0,0,
2393 0,0,0,0,0,0,0,0,
2394 0,0,0,0,0,0,0,0,
2395 1,0,0,0,0,0,0,0,
2396 0,0,18,0,0,0,0,0,
2397 0,0,20,20,0,18,0,0,
2398 0,20,18,0,0,0,0,0,
2399 18,18,18,18,18,18,18,18,
2400 18,18,18,18,18,18,18,18,
2401 18,18,18,18,18,18,18,0,
2402 18,18,18,18,18,18,18,18,
2403 18,18,18,18,18,18,18,18,
2404 18,18,18,18,18,18,18,18,
2405 18,18,18,18,18,18,18,0,
2406 18,18,18,18,18,18,18,18
2407 };
2408
2409
2410 #ifndef HAVE_STRERROR
2411 /*************************************************
2412 * Provide strerror() for non-ANSI libraries *
2413 *************************************************/
2414
2415 /* Some old-fashioned systems (e.g. SunOS4) didn't have strerror() in their
2416 libraries. They may no longer be around, but just in case, we can try to
2417 provide the same facility by this simple alternative function. */
2418
2419 extern int sys_nerr;
2420 extern char *sys_errlist[];
2421
2422 char *
2423 strerror(int n)
2424 {
2425 if (n < 0 || n >= sys_nerr) return "unknown error number";
2426 return sys_errlist[n];
2427 }
2428 #endif /* HAVE_STRERROR */
2429
2430
2431
2432 /*************************************************
2433 * Local memory functions *
2434 *************************************************/
2435
2436 /* Alternative memory functions, to test functionality. */
2437
2438 static void *my_malloc(size_t size, void *data)
2439 {
2440 void *block = malloc(size);
2441 (void)data;
2442 if (show_memory)
2443 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
2444 return block;
2445 }
2446
2447 static void my_free(void *block, void *data)
2448 {
2449 (void)data;
2450 if (show_memory)
2451 fprintf(outfile, "free %p\n", block);
2452 free(block);
2453 }
2454
2455
2456
2457 /*************************************************
2458 * Callback function for stack guard *
2459 *************************************************/
2460
2461 /* This is set up to be called from pcre2_compile() when the stackguard=n
2462 modifier sets a value greater than zero. The test we do is whether the
2463 parenthesis nesting depth is greater than the value set by the modifier.
2464
2465 Argument: the current parenthesis nesting depth
2466 Returns: non-zero to kill the compilation
2467 */
2468
2469 static int
2470 stack_guard(uint32_t depth, void *user_data)
2471 {
2472 (void)user_data;
2473 return depth > pat_patctl.stackguard_test;
2474 }
2475
2476
2477 /*************************************************
2478 * JIT memory callback *
2479 *************************************************/
2480
2481 static PCRE2_JIT_STACK*
2482 jit_callback(void *arg)
2483 {
2484 jit_was_used = TRUE;
2485 return (PCRE2_JIT_STACK *)arg;
2486 }
2487
2488
2489 /*************************************************
2490 * Convert UTF-8 character to code point *
2491 *************************************************/
2492
2493 /* This function reads one or more bytes that represent a UTF-8 character,
2494 and returns the codepoint of that character. Note that the function supports
2495 the original UTF-8 definition of RFC 2279, allowing for values in the range 0
2496 to 0x7fffffff, up to 6 bytes long. This makes it possible to generate
2497 codepoints greater than 0x10ffff which are useful for testing PCRE2's error
2498 checking, and also for generating 32-bit non-UTF data values above the UTF
2499 limit.
2500
2501 Argument:
2502 utf8bytes a pointer to the byte vector
2503 vptr a pointer to an int to receive the value
2504
2505 Returns: > 0 => the number of bytes consumed
2506 -6 to 0 => malformed UTF-8 character at offset = (-return)
2507 */
2508
2509 static int
2510 utf82ord(PCRE2_SPTR8 utf8bytes, uint32_t *vptr)
2511 {
2512 uint32_t c = *utf8bytes++;
2513 uint32_t d = c;
2514 int i, j, s;
2515
2516 for (i = -1; i < 6; i++) /* i is number of additional bytes */
2517 {
2518 if ((d & 0x80) == 0) break;
2519 d <<= 1;
2520 }
2521
2522 if (i == -1) { *vptr = c; return 1; } /* ascii character */
2523 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
2524
2525 /* i now has a value in the range 1-5 */
2526
2527 s = 6*i;
2528 d = (c & utf8_table3[i]) << s;
2529
2530 for (j = 0; j < i; j++)
2531 {
2532 c = *utf8bytes++;
2533 if ((c & 0xc0) != 0x80) return -(j+1);
2534 s -= 6;
2535 d |= (c & 0x3f) << s;
2536 }
2537
2538 /* Check that encoding was the correct unique one */
2539
2540 for (j = 0; j < utf8_table1_size; j++)
2541 if (d <= (uint32_t)utf8_table1[j]) break;
2542 if (j != i) return -(i+1);
2543
2544 /* Valid value */
2545
2546 *vptr = d;
2547 return i+1;
2548 }
2549
2550
2551
2552 /*************************************************
2553 * Print one character *
2554 *************************************************/
2555
2556 /* Print a single character either literally, or as a hex escape, and count how
2557 many printed characters are used.
2558
2559 Arguments:
2560 c the character
2561 utf TRUE in UTF mode
2562 f the FILE to print to, or NULL just to count characters
2563
2564 Returns: number of characters written
2565 */
2566
2567 static int
2568 pchar(uint32_t c, BOOL utf, FILE *f)
2569 {
2570 int n = 0;
2571 char tempbuffer[16];
2572
2573 if (PRINTOK(c))
2574 {
2575 if (f != NULL) fprintf(f, "%c", c);
2576 return 1;
2577 }
2578
2579 if (c < 0x100)
2580 {
2581 if (utf)
2582 {
2583 if (f != NULL) fprintf(f, "\\x{%02x}", c);
2584 return 6;
2585 }
2586 else
2587 {
2588 if (f != NULL) fprintf(f, "\\x%02x", c);
2589 return 4;
2590 }
2591 }
2592
2593 if (f != NULL) n = fprintf(f, "\\x{%02x}", c);
2594 else n = sprintf(tempbuffer, "\\x{%02x}", c);
2595
2596 return n >= 0 ? n : 0;
2597 }
2598
2599
2600
2601 #ifdef SUPPORT_PCRE2_16
2602 /*************************************************
2603 * Find length of 0-terminated 16-bit string *
2604 *************************************************/
2605
2606 static size_t strlen16(PCRE2_SPTR16 p)
2607 {
2608 PCRE2_SPTR16 pp = p;
2609 while (*pp != 0) pp++;
2610 return (int)(pp - p);
2611 }
2612 #endif /* SUPPORT_PCRE2_16 */
2613
2614
2615
2616 #ifdef SUPPORT_PCRE2_32
2617 /*************************************************
2618 * Find length of 0-terminated 32-bit string *
2619 *************************************************/
2620
2621 static size_t strlen32(PCRE2_SPTR32 p)
2622 {
2623 PCRE2_SPTR32 pp = p;
2624 while (*pp != 0) pp++;
2625 return (int)(pp - p);
2626 }
2627 #endif /* SUPPORT_PCRE2_32 */
2628
2629
2630 #ifdef SUPPORT_PCRE2_8
2631 /*************************************************
2632 * Print 8-bit character string *
2633 *************************************************/
2634
2635 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
2636 For printing *MARK strings, a negative length is given. If handed a NULL file,
2637 just counts chars without printing (because pchar() does that). */
2638
2639 static int pchars8(PCRE2_SPTR8 p, int length, BOOL utf, FILE *f)
2640 {
2641 uint32_t c = 0;
2642 int yield = 0;
2643
2644 if (length < 0) length = p[-1];
2645 while (length-- > 0)
2646 {
2647 if (utf)
2648 {
2649 int rc = utf82ord(p, &c);
2650 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
2651 {
2652 length -= rc - 1;
2653 p += rc;
2654 yield += pchar(c, utf, f);
2655 continue;
2656 }
2657 }
2658 c = *p++;
2659 yield += pchar(c, utf, f);
2660 }
2661
2662 return yield;
2663 }
2664 #endif
2665
2666
2667 #ifdef SUPPORT_PCRE2_16
2668 /*************************************************
2669 * Print 16-bit character string *
2670 *************************************************/
2671
2672 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
2673 For printing *MARK strings, a negative length is given. If handed a NULL file,
2674 just counts chars without printing. */
2675
2676 static int pchars16(PCRE2_SPTR16 p, int length, BOOL utf, FILE *f)
2677 {
2678 int yield = 0;
2679 if (length < 0) length = p[-1];
2680 while (length-- > 0)
2681 {
2682 uint32_t c = *p++ & 0xffff;
2683 if (utf && c >= 0xD800 && c < 0xDC00 && length > 0)
2684 {
2685 int d = *p & 0xffff;
2686 if (d >= 0xDC00 && d <= 0xDFFF)
2687 {
2688 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
2689 length--;
2690 p++;
2691 }
2692 }
2693 yield += pchar(c, utf, f);
2694 }
2695 return yield;
2696 }
2697 #endif /* SUPPORT_PCRE2_16 */
2698
2699
2700
2701 #ifdef SUPPORT_PCRE2_32
2702 /*************************************************
2703 * Print 32-bit character string *
2704 *************************************************/
2705
2706 /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
2707 For printing *MARK strings, a negative length is given.If handed a NULL file,
2708 just counts chars without printing. */
2709
2710 static int pchars32(PCRE2_SPTR32 p, int length, BOOL utf, FILE *f)
2711 {
2712 int yield = 0;
2713 (void)(utf); /* Avoid compiler warning */
2714 if (length < 0) length = p[-1];
2715 while (length-- > 0)
2716 {
2717 uint32_t c = *p++;
2718 yield += pchar(c, utf, f);
2719 }
2720 return yield;
2721 }
2722 #endif /* SUPPORT_PCRE2_32 */
2723
2724
2725
2726
2727 #ifdef SUPPORT_PCRE2_8
2728 /*************************************************
2729 * Convert character value to UTF-8 *
2730 *************************************************/
2731
2732 /* This function takes an integer value in the range 0 - 0x7fffffff
2733 and encodes it as a UTF-8 character in 0 to 6 bytes.
2734
2735 Arguments:
2736 cvalue the character value
2737 utf8bytes pointer to buffer for result - at least 6 bytes long
2738
2739 Returns: number of characters placed in the buffer
2740 */
2741
2742 static int
2743 ord2utf8(uint32_t cvalue, uint8_t *utf8bytes)
2744 {
2745 int i, j;
2746 if (cvalue > 0x7fffffffu)
2747 return -1;
2748 for (i = 0; i < utf8_table1_size; i++)
2749 if (cvalue <= (uint32_t)utf8_table1[i]) break;
2750 utf8bytes += i;
2751 for (j = i; j > 0; j--)
2752 {
2753 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
2754 cvalue >>= 6;
2755 }
2756 *utf8bytes = utf8_table2[i] | cvalue;
2757 return i + 1;
2758 }
2759 #endif /* SUPPORT_PCRE2_8 */
2760
2761
2762
2763 #ifdef SUPPORT_PCRE2_16
2764 /*************************************************
2765 * Convert string to 16-bit *
2766 *************************************************/
2767
2768 /* In UTF mode the input is always interpreted as a string of UTF-8 bytes using
2769 the original UTF-8 definition of RFC 2279, which allows for up to 6 bytes, and
2770 code values from 0 to 0x7fffffff. However, values greater than the later UTF
2771 limit of 0x10ffff cause an error. In non-UTF mode the input is interpreted as
2772 UTF-8 if the utf8_input modifier is set, but an error is generated for values
2773 greater than 0xffff.
2774
2775 If all the input bytes are ASCII, the space needed for a 16-bit string is
2776 exactly double the 8-bit size. Otherwise, the size needed for a 16-bit string
2777 is no more than double, because up to 0xffff uses no more than 3 bytes in UTF-8
2778 but possibly 4 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes
2779 in UTF-16. The result is always left in pbuffer16. Impose a minimum size to
2780 save repeated re-sizing.
2781
2782 Note that this function does not object to surrogate values. This is
2783 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
2784 for the purpose of testing that they are correctly faulted.
2785
2786 Arguments:
2787 p points to a byte string
2788 utf true in UTF mode
2789 lenptr points to number of bytes in the string (excluding trailing zero)
2790
2791 Returns: 0 on success, with the length updated to the number of 16-bit
2792 data items used (excluding the trailing zero)
2793 OR -1 if a UTF-8 string is malformed
2794 OR -2 if a value > 0x10ffff is encountered in UTF mode
2795 OR -3 if a value > 0xffff is encountered when not in UTF mode
2796 */
2797
2798 static PCRE2_SIZE
2799 to16(uint8_t *p, int utf, PCRE2_SIZE *lenptr)
2800 {
2801 uint16_t *pp;
2802 PCRE2_SIZE len = *lenptr;
2803
2804 if (pbuffer16_size < 2*len + 2)
2805 {
2806 if (pbuffer16 != NULL) free(pbuffer16);
2807 pbuffer16_size = 2*len + 2;
2808 if (pbuffer16_size < 256) pbuffer16_size = 256;
2809 pbuffer16 = (uint16_t *)malloc(pbuffer16_size);
2810 if (pbuffer16 == NULL)
2811 {
2812 fprintf(stderr, "pcre2test: malloc(%lu) failed for pbuffer16\n",
2813 (unsigned long int)pbuffer16_size);
2814 exit(1);
2815 }
2816 }
2817
2818 pp = pbuffer16;
2819 if (!utf && (pat_patctl.control & CTL_UTF8_INPUT) == 0)
2820 {
2821 for (; len > 0; len--) *pp++ = *p++;
2822 }
2823 else while (len > 0)
2824 {
2825 uint32_t c;
2826 int chlen = utf82ord(p, &c);
2827 if (chlen <= 0) return -1;
2828 if (!utf && c > 0xffff) return -3;
2829 if (c > 0x10ffff) return -2;
2830 p += chlen;
2831 len -= chlen;
2832 if (c < 0x10000) *pp++ = c; else
2833 {
2834 c -= 0x10000;
2835 *pp++ = 0xD800 | (c >> 10);
2836 *pp++ = 0xDC00 | (c & 0x3ff);
2837 }
2838 }
2839
2840 *pp = 0;
2841 *lenptr = pp - pbuffer16;
2842 return 0;
2843 }
2844 #endif
2845
2846
2847
2848 #ifdef SUPPORT_PCRE2_32
2849 /*************************************************
2850 * Convert string to 32-bit *
2851 *************************************************/
2852
2853 /* In UTF mode the input is always interpreted as a string of UTF-8 bytes using
2854 the original UTF-8 definition of RFC 2279, which allows for up to 6 bytes, and
2855 code values from 0 to 0x7fffffff. However, values greater than the later UTF
2856 limit of 0x10ffff cause an error.
2857
2858 In non-UTF mode the input is interpreted as UTF-8 if the utf8_input modifier
2859 is set, and no limit is imposed. There is special interpretation of the 0xff
2860 byte (which is illegal in UTF-8) in this case: it causes the top bit of the
2861 next character to be set. This provides a way of generating 32-bit characters
2862 greater than 0x7fffffff.
2863
2864 If all the input bytes are ASCII, the space needed for a 32-bit string is
2865 exactly four times the 8-bit size. Otherwise, the size needed for a 32-bit
2866 string is no more than four times, because the number of characters must be
2867 less than the number of bytes. The result is always left in pbuffer32. Impose a
2868 minimum size to save repeated re-sizing.
2869
2870 Note that this function does not object to surrogate values. This is
2871 deliberate; it makes it possible to construct UTF-32 strings that are invalid,
2872 for the purpose of testing that they are correctly faulted.
2873
2874 Arguments:
2875 p points to a byte string
2876 utf true in UTF mode
2877 lenptr points to number of bytes in the string (excluding trailing zero)
2878
2879 Returns: 0 on success, with the length updated to the number of 32-bit
2880 data items used (excluding the trailing zero)
2881 OR -1 if a UTF-8 string is malformed
2882 OR -2 if a value > 0x10ffff is encountered in UTF mode
2883 */
2884
2885 static PCRE2_SIZE
2886 to32(uint8_t *p, int utf, PCRE2_SIZE *lenptr)
2887 {
2888 uint32_t *pp;
2889 PCRE2_SIZE len = *lenptr;
2890
2891 if (pbuffer32_size < 4*len + 4)
2892 {
2893 if (pbuffer32 != NULL) free(pbuffer32);
2894 pbuffer32_size = 4*len + 4;
2895 if (pbuffer32_size < 512) pbuffer32_size = 512;
2896 pbuffer32 = (uint32_t *)malloc(pbuffer32_size);
2897 if (pbuffer32 == NULL)
2898 {
2899 fprintf(stderr, "pcre2test: malloc(%lu) failed for pbuffer32\n",
2900 (unsigned long int)pbuffer32_size);
2901 exit(1);
2902 }
2903 }
2904
2905 pp = pbuffer32;
2906
2907 if (!utf && (pat_patctl.control & CTL_UTF8_INPUT) == 0)
2908 {
2909 for (; len > 0; len--) *pp++ = *p++;
2910 }
2911
2912 else while (len > 0)
2913 {
2914 int chlen;
2915 uint32_t c;
2916 uint32_t topbit = 0;
2917 if (!utf && *p == 0xff && len > 1)
2918 {
2919 topbit = 0x80000000u;
2920 p++;
2921 len--;
2922 }
2923 chlen = utf82ord(p, &c);
2924 if (chlen <= 0) return -1;
2925 if (utf && c > 0x10ffff) return -2;
2926 p += chlen;
2927 len -= chlen;
2928 *pp++ = c | topbit;
2929 }
2930
2931 *pp = 0;
2932 *lenptr = pp - pbuffer32;
2933 return 0;
2934 }
2935 #endif /* SUPPORT_PCRE2_32 */
2936
2937
2938
2939 /*************************************************
2940 * Move back by so many characters *
2941 *************************************************/
2942
2943 /* Given a code unit offset in a subject string, move backwards by a number of
2944 characters, and return the resulting offset.
2945
2946 Arguments:
2947 subject pointer to the string
2948 offset start offset
2949 count count to move back by
2950 utf TRUE if in UTF mode
2951
2952 Returns: a possibly changed offset
2953 */
2954
2955 static PCRE2_SIZE
2956 backchars(uint8_t *subject, PCRE2_SIZE offset, uint32_t count, BOOL utf)
2957 {
2958 if (!utf || test_mode == PCRE32_MODE)
2959 return (count >= offset)? 0 : (offset - count);
2960
2961 else if (test_mode == PCRE8_MODE)
2962 {
2963 PCRE2_SPTR8 pp = (PCRE2_SPTR8)subject + offset;
2964 for (; count > 0 && pp > (PCRE2_SPTR8)subject; count--)
2965 {
2966 pp--;
2967 while ((*pp & 0xc0) == 0x80) pp--;
2968 }
2969 return pp - (PCRE2_SPTR8)subject;
2970 }
2971
2972 else /* 16-bit mode */
2973 {
2974 PCRE2_SPTR16 pp = (PCRE2_SPTR16)subject + offset;
2975 for (; count > 0 && pp > (PCRE2_SPTR16)subject; count--)
2976 {
2977 pp--;
2978 if ((*pp & 0xfc00) == 0xdc00) pp--;
2979 }
2980 return pp - (PCRE2_SPTR16)subject;
2981 }
2982 }
2983
2984
2985
2986 /*************************************************
2987 * Expand input buffers *
2988 *************************************************/
2989
2990 /* This function doubles the size of the input buffer and the buffer for
2991 keeping an 8-bit copy of patterns (pbuffer8), and copies the current buffers to
2992 the new ones.
2993
2994 Arguments: none
2995 Returns: nothing (aborts if malloc() fails)
2996 */
2997
2998 static void
2999 expand_input_buffers(void)
3000 {
3001 int new_pbuffer8_size = 2*pbuffer8_size;
3002 uint8_t *new_buffer = (uint8_t *)malloc(new_pbuffer8_size);
3003 uint8_t *new_pbuffer8 = (uint8_t *)malloc(new_pbuffer8_size);
3004
3005 if (new_buffer == NULL || new_pbuffer8 == NULL)
3006 {
3007 fprintf(stderr, "pcre2test: malloc(%d) failed\n", new_pbuffer8_size);
3008 exit(1);
3009 }
3010
3011 memcpy(new_buffer, buffer, pbuffer8_size);
3012 memcpy(new_pbuffer8, pbuffer8, pbuffer8_size);
3013
3014 pbuffer8_size = new_pbuffer8_size;
3015
3016 free(buffer);
3017 free(pbuffer8);
3018
3019 buffer = new_buffer;
3020 pbuffer8 = new_pbuffer8;
3021 }
3022
3023
3024
3025 /*************************************************
3026 * Read or extend an input line *
3027 *************************************************/
3028
3029 /* Input lines are read into buffer, but both patterns and data lines can be
3030 continued over multiple input lines. In addition, if the buffer fills up, we
3031 want to automatically expand it so as to be able to handle extremely large
3032 lines that are needed for certain stress tests, although this is less likely
3033 now that there are repetition features for both patterns and data. When the
3034 input buffer is expanded, the other two buffers must also be expanded likewise,
3035 and the contents of pbuffer, which are a copy of the input for callouts, must
3036 be preserved (for when expansion happens for a data line). This is not the most
3037 optimal way of handling this, but hey, this is just a test program!
3038
3039 Arguments:
3040 f the file to read
3041 start where in buffer to start (this *must* be within buffer)
3042 prompt for stdin or readline()
3043
3044 Returns: pointer to the start of new data
3045 could be a copy of start, or could be moved
3046 NULL if no data read and EOF reached
3047 */
3048
3049 static uint8_t *
3050 extend_inputline(FILE *f, uint8_t *start, const char *prompt)
3051 {
3052 uint8_t *here = start;
3053
3054 for (;;)
3055 {
3056 size_t rlen = (size_t)(pbuffer8_size - (here - buffer));
3057
3058 if (rlen > 1000)
3059 {
3060 size_t dlen;
3061
3062 /* If libreadline or libedit support is required, use readline() to read a
3063 line if the input is a terminal. Note that readline() removes the trailing
3064 newline, so we must put it back again, to be compatible with fgets(). */
3065
3066 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
3067 if (INTERACTIVE(f))
3068 {
3069 size_t len;
3070 char *s = readline(prompt);
3071 if (s == NULL) return (here == start)? NULL : start;
3072 len = strlen(s);
3073 if (len > 0) add_history(s);
3074 if (len > rlen - 1) len = rlen - 1;
3075 memcpy(here, s, len);
3076 here[len] = '\n';
3077 here[len+1] = 0;
3078 free(s);
3079 }
3080 else
3081 #endif
3082
3083 /* Read the next line by normal means, prompting if the file is a tty. */
3084
3085 {
3086 if (INTERACTIVE(f)) printf("%s", prompt);
3087 if (fgets((char *)here, rlen, f) == NULL)
3088 return (here == start)? NULL : start;
3089 }
3090
3091 dlen = strlen((char *)here);
3092 here += dlen;
3093
3094 /* Check for end of line reached. Take care not to read data from before
3095 start (dlen will be zero for a file starting with a binary zero). */
3096
3097 if (here > start && here[-1] == '\n') return start;
3098
3099 /* If we have not read a newline when reading a file, we have either filled
3100 the buffer or reached the end of the file. We can detect the former by
3101 checking that the string fills the buffer, and the latter by feof(). If
3102 neither of these is true, it means we read a binary zero which has caused
3103 strlen() to give a short length. This is a hard error because pcre2test
3104 expects to work with C strings. */
3105
3106 if (!INTERACTIVE(f) && dlen < rlen - 1 && !feof(f))
3107 {
3108 fprintf(outfile, "** Binary zero encountered in input\n");
3109 fprintf(outfile, "** pcre2test run abandoned\n");
3110 exit(1);
3111 }
3112 }
3113
3114 else
3115 {
3116 size_t start_offset = start - buffer;
3117 size_t here_offset = here - buffer;
3118 expand_input_buffers();
3119 start = buffer + start_offset;
3120 here = buffer + here_offset;
3121 }
3122 }
3123
3124 /* Control never gets here */
3125 }
3126
3127
3128
3129 /*************************************************
3130 * Case-independent strncmp() function *
3131 *************************************************/
3132
3133 /*
3134 Arguments:
3135 s first string
3136 t second string
3137 n number of characters to compare
3138
3139 Returns: < 0, = 0, or > 0, according to the comparison
3140 */
3141
3142 static int
3143 strncmpic(const uint8_t *s, const uint8_t *t, int n)
3144 {
3145 while (n--)
3146 {
3147 int c = tolower(*s++) - tolower(*t++);
3148 if (c) return c;
3149 }
3150 return 0;
3151 }
3152
3153
3154
3155 /*************************************************
3156 * Scan the main modifier list *
3157 *************************************************/
3158
3159 /* This function searches the modifier list for a long modifier name.
3160
3161 Argument:
3162 p start of the name
3163 lenp length of the name
3164
3165 Returns: an index in the modifier list, or -1 on failure
3166 */
3167
3168 static int
3169 scan_modifiers(const uint8_t *p, unsigned int len)
3170 {
3171 int bot = 0;
3172 int top = MODLISTCOUNT;
3173
3174 while (top > bot)
3175 {
3176 int mid = (bot + top)/2;
3177 unsigned int mlen = strlen(modlist[mid].name);
3178 int c = strncmp((char *)p, modlist[mid].name, (len < mlen)? len : mlen);
3179 if (c == 0)
3180 {
3181 if (len == mlen) return mid;
3182 c = (int)len - (int)mlen;
3183 }
3184 if (c > 0) bot = mid + 1; else top = mid;
3185 }
3186
3187 return -1;
3188
3189 }
3190
3191
3192
3193 /*************************************************
3194 * Check a modifer and find its field *
3195 *************************************************/
3196
3197 /* This function is called when a modifier has been identified. We check that
3198 it is allowed here and find the field that is to be changed.
3199
3200 Arguments:
3201 m the modifier list entry
3202 ctx CTX_PAT => pattern context
3203 CTX_POPPAT => pattern context for popped pattern
3204 CTX_DEFPAT => default pattern context
3205 CTX_DAT => data context
3206 CTX_DEFDAT => default data context
3207 pctl point to pattern control block
3208 dctl point to data control block
3209 c a single character or 0
3210
3211 Returns: a field pointer or NULL
3212 */
3213
3214 static void *
3215 check_modifier(modstruct *m, int ctx, patctl *pctl, datctl *dctl, uint32_t c)
3216 {
3217 void *field = NULL;
3218 PCRE2_SIZE offset = m->offset;
3219
3220 if (restrict_for_perl_test) switch(m->which)
3221 {
3222 case MOD_PNDP:
3223 case MOD_PATP:
3224 case MOD_PDP:
3225 break;
3226
3227 default:
3228 fprintf(outfile, "** '%s' is not allowed in a Perl-compatible test\n",
3229 m->name);
3230 return NULL;
3231 }
3232
3233 switch (m->which)
3234 {
3235 case MOD_CTC: /* Compile context modifier */
3236 if (ctx == CTX_DEFPAT) field = PTR(default_pat_context);
3237 else if (ctx == CTX_PAT) field = PTR(pat_context);
3238 break;
3239
3240 case MOD_CTM: /* Match context modifier */
3241 if (ctx == CTX_DEFDAT) field = PTR(default_dat_context);
3242 else if (ctx == CTX_DAT) field = PTR(dat_context);
3243 break;
3244
3245 case MOD_DAT: /* Data line modifier */
3246 if (dctl != NULL) field = dctl;
3247 break;
3248
3249 case MOD_PAT: /* Pattern modifier */
3250 case MOD_PATP: /* Allowed for Perl test */
3251 if (pctl != NULL) field = pctl;
3252 break;
3253
3254 case MOD_PD: /* Pattern or data line modifier */
3255 case MOD_PDP: /* Ditto, allowed for Perl test */
3256 case MOD_PND: /* Ditto, but not default pattern */
3257 case MOD_PNDP: /* Ditto, allowed for Perl test */
3258 if (dctl != NULL) field = dctl;
3259 else if (pctl != NULL && (m->which == MOD_PD || ctx != CTX_DEFPAT))
3260 field = pctl;
3261 break;
3262 }
3263
3264 if (field == NULL)
3265 {
3266 if (c == 0)
3267 fprintf(outfile, "** '%s' is not valid here\n", m->name);
3268 else
3269 fprintf(outfile, "** /%c is not valid here\n", c);
3270 return NULL;
3271 }
3272
3273 return (char *)field + offset;
3274 }
3275
3276
3277
3278 /*************************************************
3279 * Decode a modifier list *
3280 *************************************************/
3281
3282 /* A pointer to a control block is NULL when called in cases when that block is
3283 not relevant. They are never all relevant in one call. At least one of patctl
3284 and datctl is NULL. The second argument specifies which context to use for
3285 modifiers that apply to contexts.
3286
3287 Arguments:
3288 p point to modifier string
3289 ctx CTX_PAT => pattern context
3290 CTX_POPPAT => pattern context for popped pattern
3291 CTX_DEFPAT => default pattern context
3292 CTX_DAT => data context
3293 CTX_DEFDAT => default data context
3294 pctl point to pattern control block
3295 dctl point to data control block
3296
3297 Returns: TRUE if successful decode, FALSE otherwise
3298 */
3299
3300 static BOOL
3301 decode_modifiers(uint8_t *p, int ctx, patctl *pctl, datctl *dctl)
3302 {
3303 uint8_t *ep, *pp;
3304 long li;
3305 unsigned long uli;
3306 BOOL first = TRUE;
3307
3308 for (;;)
3309 {
3310 void *field;
3311 modstruct *m;
3312 BOOL off = FALSE;
3313 unsigned int i, len;
3314 int index;
3315 char *endptr;
3316
3317 /* Skip white space and commas. */
3318
3319 while (isspace(*p) || *p == ',') p++;
3320 if (*p == 0) break;
3321
3322 /* Find the end of the item; lose trailing whitespace at end of line. */
3323
3324 for (ep = p; *ep != 0 && *ep != ','; ep++);
3325 if (*ep == 0)
3326 {
3327 while (ep > p && isspace(ep[-1])) ep--;
3328 *ep = 0;
3329 }
3330
3331 /* Remember if the first character is '-'. */
3332
3333 if (*p == '-')
3334 {
3335 off = TRUE;
3336 p++;
3337 }
3338
3339 /* Find the length of a full-length modifier name, and scan for it. */
3340
3341 pp = p;
3342 while (pp < ep && *pp != '=') pp++;
3343 index = scan_modifiers(p, pp - p);
3344
3345 /* If the first modifier is unrecognized, try to interpret it as a sequence
3346 of single-character abbreviated modifiers. None of these modifiers have any
3347 associated data. They just set options or control bits. */
3348
3349 if (index < 0)
3350 {
3351 uint32_t cc;
3352 uint8_t *mp = p;
3353
3354 if (!first)
3355 {
3356 fprintf(outfile, "** Unrecognized modifier '%.*s'\n", (int)(ep-p), p);
3357 if (ep - p == 1)
3358 fprintf(outfile, "** Single-character modifiers must come first\n");
3359 return FALSE;
3360 }
3361
3362 for (cc = *p; cc != ',' && cc != '\n' && cc != 0; cc = *(++p))
3363 {
3364 for (i = 0; i < C1MODLISTCOUNT; i++)
3365 if (cc == c1modlist[i].onechar) break;
3366
3367 if (i >= C1MODLISTCOUNT)
3368 {
3369 fprintf(outfile, "** Unrecognized modifier '%c' in '%.*s'\n",
3370 *p, (int)(ep-mp), mp);
3371 return FALSE;
3372 }
3373
3374 if (c1modlist[i].index >= 0)
3375 {
3376 index = c1modlist[i].index;
3377 }
3378
3379 else
3380 {
3381 index = scan_modifiers((uint8_t *)(c1modlist[i].fullname),
3382 strlen(c1modlist[i].fullname));
3383 if (index < 0)
3384 {
3385 fprintf(outfile, "** Internal error: single-character equivalent "
3386 "modifier '%s' not found\n", c1modlist[i].fullname);
3387 return FALSE;
3388 }
3389 c1modlist[i].index = index; /* Cache for next time */
3390 }
3391
3392 field = check_modifier(modlist + index, ctx, pctl, dctl, *p);
3393 if (field == NULL) return FALSE;
3394 *((uint32_t *)field) |= modlist[index].value;
3395 }
3396
3397 continue; /* With tne next (fullname) modifier */
3398 }
3399
3400 /* We have a match on a full-name modifier. Check for the existence of data
3401 when needed. */
3402
3403 m = modlist + index; /* Save typing */
3404 if (m->type != MOD_CTL && m->type != MOD_OPT &&
3405 (m->type != MOD_IND || *pp == '='))
3406 {
3407 if (*pp++ != '=')
3408 {
3409 fprintf(outfile, "** '=' expected after '%s'\n", m->name);
3410 return FALSE;
3411 }
3412 if (off)
3413 {
3414 fprintf(outfile, "** '-' is not valid for '%s'\n", m->name);
3415 return FALSE;
3416 }
3417 }
3418
3419 /* These on/off types have no data. */
3420
3421 else if (*pp != ',' && *pp != '\n' && *pp != ' ' && *pp != 0)
3422 {
3423 fprintf(outfile, "** Unrecognized modifier '%.*s'\n", (int)(ep-p), p);
3424 return FALSE;
3425 }
3426
3427 /* Set the data length for those types that have data. Then find the field
3428 that is to be set. If check_modifier() returns NULL, it has already output an
3429 error message. */
3430
3431 len = ep - pp;
3432 field = check_modifier(m, ctx, pctl, dctl, 0);
3433 if (field == NULL) return FALSE;
3434
3435 /* Process according to data type. */
3436
3437 switch (m->type)
3438 {
3439 case MOD_CTL:
3440 case MOD_OPT:
3441 if (off) *((uint32_t *)field) &= ~m->value;
3442 else *((uint32_t *)field) |= m->value;
3443 break;
3444
3445 case MOD_BSR:
3446 if (len == 7 && strncmpic(pp, (const uint8_t *)"default", 7) == 0)
3447 {
3448 #ifdef BSR_ANYCRLF
3449 *((uint16_t *)field) = PCRE2_BSR_ANYCRLF;
3450 #else
3451 *((uint16_t *)field) = PCRE2_BSR_UNICODE;
3452 #endif
3453 if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 &= ~CTL_BSR_SET;
3454 else dctl->control2 &= ~CTL_BSR_SET;
3455 }
3456 else
3457 {
3458 if (len == 7 && strncmpic(pp, (const uint8_t *)"anycrlf", 7) == 0)
3459 *((uint16_t *)field) = PCRE2_BSR_ANYCRLF;
3460 else if (len == 7 && strncmpic(pp, (const uint8_t *)"unicode", 7) == 0)
3461 *((uint16_t *)field) = PCRE2_BSR_UNICODE;
3462 else goto INVALID_VALUE;
3463 if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 |= CTL_BSR_SET;
3464 else dctl->control2 |= CTL_BSR_SET;
3465 }
3466 pp = ep;
3467 break;
3468
3469 case MOD_IN2: /* One or two unsigned integers */
3470 if (!isdigit(*pp)) goto INVALID_VALUE;
3471 uli = strtoul((const char *)pp, &endptr, 10);
3472 if (U32OVERFLOW(uli)) goto INVALID_VALUE;
3473 ((uint32_t *)field)[0] = (uint32_t)uli;
3474 if (*endptr == ':')
3475 {
3476 uli = strtoul((const char *)endptr+1, &endptr, 10);
3477 if (U32OVERFLOW(uli)) goto INVALID_VALUE;
3478 ((uint32_t *)field)[1] = (uint32_t)uli;
3479 }
3480 else ((uint32_t *)field)[1] = 0;
3481 pp = (uint8_t *)endptr;
3482 break;
3483
3484 /* PCRE2_SIZE_MAX is usually SIZE_MAX, which may be greater, equal to, or
3485 less than ULONG_MAX. So first test for overflowing the long int, and then
3486 test for overflowing PCRE2_SIZE_MAX if it is smaller than ULONG_MAX. */
3487
3488 case MOD_SIZ: /* PCRE2_SIZE value */
3489 if (!isdigit(*pp)) goto INVALID_VALUE;
3490 uli = strtoul((const char *)pp, &endptr, 10);
3491 if (uli == ULONG_MAX) goto INVALID_VALUE;
3492 #if ULONG_MAX > PCRE2_SIZE_MAX
3493 if (uli > PCRE2_SIZE_MAX) goto INVALID_VALUE;
3494 #endif
3495 *((PCRE2_SIZE *)field) = (PCRE2_SIZE)uli;
3496 pp = (uint8_t *)endptr;
3497 break;
3498
3499 case MOD_IND: /* Unsigned integer with default */
3500 if (len == 0)
3501 {
3502 *((uint32_t *)field) = (uint32_t)(m->value);
3503 break;
3504 }
3505 /* Fall through */
3506
3507 case MOD_INT: /* Unsigned integer */
3508 if (!isdigit(*pp)) goto INVALID_VALUE;
3509 uli = strtoul((const char *)pp, &endptr, 10);
3510 if (U32OVERFLOW(uli)) goto INVALID_VALUE;
3511 *((uint32_t *)field) = (uint32_t)uli;
3512 pp = (uint8_t *)endptr;
3513 break;
3514
3515 case MOD_INS: /* Signed integer */
3516 if (!isdigit(*pp) && *pp != '-') goto INVALID_VALUE;
3517 li = strtol((const char *)pp, &endptr, 10);
3518 if (S32OVERFLOW(li)) goto INVALID_VALUE;
3519 *((int32_t *)field) = (int32_t)li;
3520 pp = (uint8_t *)endptr;
3521 break;
3522
3523 case MOD_NL:
3524 for (i = 0; i < sizeof(newlines)/sizeof(char *); i++)
3525 if (len == strlen(newlines[i]) &&
3526 strncmpic(pp, (const uint8_t *)newlines[i], len) == 0) break;
3527 if (i >= sizeof(newlines)/sizeof(char *)) goto INVALID_VALUE;
3528 if (i == 0)
3529 {
3530 *((uint16_t *)field) = NEWLINE_DEFAULT;
3531 if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 &= ~CTL_NL_SET;
3532 else dctl->control2 &= ~CTL_NL_SET;
3533 }
3534 else
3535 {
3536 *((uint16_t *)field) = i;
3537 if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 |= CTL_NL_SET;
3538 else dctl->control2 |= CTL_NL_SET;
3539 }
3540 pp = ep;
3541 break;
3542
3543 case MOD_NN: /* Name or (signed) number; may be several */
3544 if (isdigit(*pp) || *pp == '-')
3545 {
3546 int ct = MAXCPYGET - 1;
3547 int32_t value;
3548 li = strtol((const char *)pp, &endptr, 10);
3549 if (S32OVERFLOW(li)) goto INVALID_VALUE;
3550 value = (int32_t)li;
3551 field = (char *)field - m->offset + m->value; /* Adjust field ptr */
3552 if (value >= 0) /* Add new number */
3553 {
3554 while (*((int32_t *)field) >= 0 && ct-- > 0) /* Skip previous */
3555 field = (char *)field + sizeof(int32_t);
3556 if (ct <= 0)
3557 {
3558 fprintf(outfile, "** Too many numeric '%s' modifiers\n", m->name);
3559 return FALSE;
3560 }
3561 }
3562 *((int32_t *)field) = value;
3563 if (ct > 0) ((int32_t *)field)[1] = -1;
3564 pp = (uint8_t *)endptr;
3565 }
3566
3567 /* Multiple strings are put end to end. */
3568
3569 else
3570 {
3571 char *nn = (char *)field;
3572 if (len > 0) /* Add new name */
3573 {
3574 if (len > MAX_NAME_SIZE)
3575 {
3576 fprintf(outfile, "** Group name in '%s' is too long\n", m->name);
3577 return FALSE;
3578 }
3579 while (*nn != 0) nn += strlen(nn) + 1;
3580 if (nn + len + 2 - (char *)field > LENCPYGET)
3581 {
3582 fprintf(outfile, "** Too many characters in named '%s' modifiers\n",
3583 m->name);
3584 return FALSE;
3585 }
3586 memcpy(nn, pp, len);
3587 }
3588 nn[len] = 0 ;
3589 nn[len+1] = 0;
3590 pp = ep;
3591 }
3592 break;
3593
3594 case MOD_STR:
3595 if (len + 1 > m->value)
3596 {
3597 fprintf(outfile, "** Overlong value for '%s' (max %d code units)\n",
3598 m->name, m->value - 1);
3599 return FALSE;
3600 }
3601 memcpy(field, pp, len);
3602 ((uint8_t *)field)[len] = 0;
3603 pp = ep;
3604 break;
3605 }
3606
3607 if (*pp != ',' && *pp != '\n' && *pp != ' ' && *pp != 0)
3608 {
3609 fprintf(outfile, "** Comma expected after modifier item '%s'\n", m->name);
3610 return FALSE;
3611 }
3612
3613 p = pp;
3614 first = FALSE;
3615
3616 if (ctx == CTX_POPPAT &&
3617 (pctl->options != 0 ||
3618 pctl->tables_id != 0 ||
3619 pctl->locale[0] != 0 ||
3620 (pctl->control & NOTPOP_CONTROLS) != 0))
3621 {
3622 fprintf(outfile, "** '%s' is not valid here\n", m->name);
3623 return FALSE;
3624 }
3625 }
3626
3627 return TRUE;
3628
3629 INVALID_VALUE:
3630 fprintf(outfile, "** Invalid value in '%.*s'\n", (int)(ep-p), p);
3631 return FALSE;
3632 }
3633
3634
3635 /*************************************************
3636 * Get info from a pattern *
3637 *************************************************/
3638
3639 /* A wrapped call to pcre2_pattern_info(), applied to the current compiled
3640 pattern.
3641
3642 Arguments:
3643 what code for the required information
3644 where where to put the answer
3645 unsetok PCRE2_ERROR_UNSET is an "expected" result
3646
3647 Returns: the return from pcre2_pattern_info()
3648 */
3649
3650 static int
3651 pattern_info(int what, void *where, BOOL unsetok)
3652 {
3653 int rc;
3654 PCRE2_PATTERN_INFO(rc, compiled_code, what, where);
3655 if (rc >= 0) return 0;
3656 if (rc != PCRE2_ERROR_UNSET || !unsetok)
3657 {
3658 fprintf(outfile, "Error %d from pcre2_pattern_info_%d(%d)\n", rc, test_mode,
3659 what);
3660 if (rc == PCRE2_ERROR_BADMODE)
3661 fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
3662 "%d-bit mode\n", test_mode,
3663 8 * (FLD(compiled_code, flags) & PCRE2_MODE_MASK));
3664 }
3665 return rc;
3666 }
3667
3668
3669
3670 #ifdef SUPPORT_PCRE2_8
3671 /*************************************************
3672 * Show something in a list *
3673 *************************************************/
3674
3675 /* This function just helps to keep the code that uses it tidier. It's used for
3676 various lists of things where there needs to be introductory text before the
3677 first item. As these calls are all in the POSIX-support code, they happen only
3678 when 8-bit mode is supported. */
3679
3680 static void
3681 prmsg(const char **msg, const char *s)
3682 {
3683 fprintf(outfile, "%s %s", *msg, s);
3684 *msg = "";
3685 }
3686 #endif /* SUPPORT_PCRE2_8 */
3687
3688
3689
3690 /*************************************************
3691 * Show control bits *
3692 *************************************************/
3693
3694 /* Called for mutually exclusive controls and for unsupported POSIX controls.
3695 Because the bits are unique, this can be used for both pattern and data control
3696 words.
3697
3698 Arguments:
3699 controls control bits
3700 controls2 more control bits
3701 before text to print before
3702
3703 Returns: nothing
3704 */
3705
3706 static void
3707 show_controls(uint32_t controls, uint32_t controls2, const char *before)
3708 {
3709 fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
3710 before,
3711 ((controls & CTL_AFTERTEXT) != 0)? " aftertext" : "",
3712 ((controls & CTL_ALLAFTERTEXT) != 0)? " allaftertext" : "",
3713 ((controls & CTL_ALLCAPTURES) != 0)? " allcaptures" : "",
3714 ((controls & CTL_ALLUSEDTEXT) != 0)? " allusedtext" : "",
3715 ((controls & CTL_ALTGLOBAL) != 0)? " altglobal" : "",
3716 ((controls & CTL_BINCODE) != 0)? " bincode" : "",
3717 ((controls2 & CTL_BSR_SET) != 0)? " bsr" : "",
3718 ((controls & CTL_CALLOUT_CAPTURE) != 0)? " callout_capture" : "",
3719 ((controls & CTL_CALLOUT_INFO) != 0)? " callout_info" : "",
3720 ((controls & CTL_CALLOUT_NONE) != 0)? " callout_none" : "",
3721 ((controls & CTL_DFA) != 0)? " dfa" : "",
3722 ((controls & CTL_EXPAND) != 0)? " expand" : "",
3723 ((controls & CTL_FINDLIMITS) != 0)? " find_limits" : "",
3724 ((controls & CTL_FRAMESIZE) != 0)? " framesize" : "",
3725 ((controls & CTL_FULLBINCODE) != 0)? " fullbincode" : "",
3726 ((controls & CTL_GETALL) != 0)? " getall" : "",
3727 ((controls & CTL_GLOBAL) != 0)? " global" : "",
3728 ((controls & CTL_HEXPAT) != 0)? " hex" : "",
3729 ((controls & CTL_INFO) != 0)? " info" : "",
3730 ((controls & CTL_JITFAST) != 0)? " jitfast" : "",
3731 ((controls & CTL_JITVERIFY) != 0)? " jitverify" : "",
3732 ((controls & CTL_MARK) != 0)? " mark" : "",
3733 ((controls & CTL_MEMORY) != 0)? " memory" : "",
3734 ((controls2 & CTL_NL_SET) != 0)? " newline" : "",
3735 ((controls & CTL_NULLCONTEXT) != 0)? " null_context" : "",
3736 ((controls & CTL_POSIX) != 0)? " posix" : "",
3737 ((controls & CTL_POSIX_NOSUB) != 0)? " posix_nosub" : "",
3738 ((controls & CTL_PUSH) != 0)? " push" : "",
3739 ((controls & CTL_PUSHCOPY) != 0)? " pushcopy" : "",
3740 ((controls & CTL_PUSHTABLESCOPY) != 0)? " pushtablescopy" : "",
3741 ((controls & CTL_STARTCHAR) != 0)? " startchar" : "",
3742 ((controls2 & CTL2_SUBSTITUTE_EXTENDED) != 0)? " substitute_extended" : "",
3743 ((controls2 & CTL2_SUBSTITUTE_OVERFLOW_LENGTH) != 0)? " substitute_overflow_length" : "",
3744 ((controls2 & CTL2_SUBSTITUTE_UNKNOWN_UNSET) != 0)? " substitute_unknown_unset" : "",
3745 ((controls2 & CTL2_SUBSTITUTE_UNSET_EMPTY) != 0)? " substitute_unset_empty" : "",
3746 ((controls & CTL_USE_LENGTH) != 0)? " use_length" : "",
3747 ((controls & CTL_UTF8_INPUT) != 0)? " utf8_input" : "",
3748 ((controls & CTL_ZERO_TERMINATE) != 0)? " zero_terminate" : "");
3749 }
3750
3751
3752
3753 /*************************************************
3754 * Show compile options *
3755 *************************************************/
3756
3757 /* Called from show_pattern_info() and for unsupported POSIX options.
3758
3759 Arguments:
3760 options an options word
3761 before text to print before
3762 after text to print after
3763
3764 Returns: nothing
3765 */
3766
3767 static void
3768 show_compile_options(uint32_t options, const char *before, const char *after)
3769 {
3770 if (options == 0) fprintf(outfile, "%s <none>%s", before, after);
3771 else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
3772 before,
3773 ((options & PCRE2_ALT_BSUX) != 0)? " alt_bsux" : "",
3774 ((options & PCRE2_ALT_CIRCUMFLEX) != 0)? " alt_circumflex" : "",
3775 ((options & PCRE2_ALT_VERBNAMES) != 0)? " alt_verbnames" : "",
3776 ((options & PCRE2_ALLOW_EMPTY_CLASS) != 0)? " allow_empty_class" : "",
3777 ((options & PCRE2_ANCHORED) != 0)? " anchored" : "",
3778 ((options & PCRE2_AUTO_CALLOUT) != 0)? " auto_callout" : "",
3779 ((options & PCRE2_CASELESS) != 0)? " caseless" : "",
3780 ((options & PCRE2_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
3781 ((options & PCRE2_DOTALL) != 0)? " dotall" : "",
3782 ((options & PCRE2_DUPNAMES) != 0)? " dupnames" : "",
3783 ((options & PCRE2_EXTENDED) != 0)? " extended" : "",
3784 ((options & PCRE2_FIRSTLINE) != 0)? " firstline" : "",
3785 ((options & PCRE2_MATCH_UNSET_BACKREF) != 0)? " match_unset_backref" : "",
3786 ((options & PCRE2_MULTILINE) != 0)? " multiline" : "",
3787 ((options & PCRE2_NEVER_BACKSLASH_C) != 0)? " never_backslash_c" : "",
3788 ((options & PCRE2_NEVER_UCP) != 0)? " never_ucp" : "",
3789 ((options & PCRE2_NEVER_UTF) != 0)? " never_utf" : "",
3790 ((options & PCRE2_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3791 ((options & PCRE2_NO_AUTO_POSSESS) != 0)? " no_auto_possess" : "",
3792 ((options & PCRE2_NO_DOTSTAR_ANCHOR) != 0)? " no_dotstar_anchor" : "",
3793 ((options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
3794 ((options & PCRE2_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3795 ((options & PCRE2_UCP) != 0)? " ucp" : "",
3796 ((options & PCRE2_UNGREEDY) != 0)? " ungreedy" : "",
3797 ((options & PCRE2_USE_OFFSET_LIMIT) != 0)? " use_offset_limit" : "",
3798 ((options & PCRE2_UTF) != 0)? " utf" : "",
3799 after);
3800 }
3801
3802
3803
3804 #ifdef SUPPORT_PCRE2_8
3805 /*************************************************
3806 * Show match options *
3807 *************************************************/
3808
3809 /* Called for unsupported POSIX options. */
3810
3811 static void
3812 show_match_options(uint32_t options)
3813 {
3814 fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s",
3815 ((options & PCRE2_ANCHORED) != 0)? " anchored" : "",
3816 ((options & PCRE2_DFA_RESTART) != 0)? " dfa_restart" : "",
3817 ((options & PCRE2_DFA_SHORTEST) != 0)? " dfa_shortest" : "",
3818 ((options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
3819 ((options & PCRE2_NOTBOL) != 0)? " notbol" : "",
3820 ((options & PCRE2_NOTEMPTY) != 0)? " notempty" : "",
3821 ((options & PCRE2_NOTEMPTY_ATSTART) != 0)? " notempty_atstart" : "",
3822 ((options & PCRE2_NOTEOL) != 0)? " noteol" : "",
3823 ((options & PCRE2_PARTIAL_HARD) != 0)? " partial_hard" : "",
3824 ((options & PCRE2_PARTIAL_SOFT) != 0)? " partial_soft" : "");
3825 }
3826 #endif /* SUPPORT_PCRE2_8 */
3827
3828
3829
3830 /*************************************************
3831 * Show memory usage info for a pattern *
3832 *************************************************/
3833
3834 static void
3835 show_memory_info(void)
3836 {
3837 uint32_t name_count, name_entry_size;
3838 size_t size, cblock_size;
3839
3840 /* One of the test_mode values will always be true, but to stop a compiler
3841 warning we must initialize cblock_size. */
3842
3843 cblock_size = 0;
3844 #ifdef SUPPORT_PCRE2_8
3845 if (test_mode == PCRE8_MODE) cblock_size = sizeof(pcre2_real_code_8);
3846 #endif
3847 #ifdef SUPPORT_PCRE2_16
3848 if (test_mode == PCRE16_MODE) cblock_size = sizeof(pcre2_real_code_16);
3849 #endif
3850 #ifdef SUPPORT_PCRE2_32
3851 if (test_mode == PCRE32_MODE) cblock_size = sizeof(pcre2_real_code_32);
3852 #endif
3853
3854 (void)pattern_info(PCRE2_INFO_SIZE, &size, FALSE);
3855 (void)pattern_info(PCRE2_INFO_NAMECOUNT, &name_count, FALSE);
3856 (void)pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &name_entry_size, FALSE);
3857 fprintf(outfile, "Memory allocation (code space): %d\n",
3858 (int)(size - name_count*name_entry_size*code_unit_size - cblock_size));
3859 if (pat_patctl.jit != 0)
3860 {
3861 (void)pattern_info(PCRE2_INFO_JITSIZE, &size, FALSE);
3862 fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)size);
3863 }
3864 }
3865
3866
3867
3868 /*************************************************
3869 * Show frame size info for a pattern *
3870 *************************************************/
3871
3872 static void
3873 show_framesize(void)
3874 {
3875 size_t frame_size;
3876 (void)pattern_info(PCRE2_INFO_FRAMESIZE, &frame_size, FALSE);
3877 fprintf(outfile, "Frame size for pcre2_match(): %d\n", (int)frame_size);
3878 }
3879
3880
3881
3882 /*************************************************
3883 * Callback function for callout enumeration *
3884 *************************************************/
3885
3886 /* The only differences in the callout emumeration block for different code
3887 unit widths are that the pointers to the subject, the most recent MARK, and a
3888 callout argument string point to strings of the appropriate width. Casts can be
3889 used to deal with this.
3890
3891 Argument:
3892 cb pointer to enumerate block
3893 callout_data user data
3894
3895 Returns: 0
3896 */
3897
3898 static int callout_callback(pcre2_callout_enumerate_block_8 *cb,
3899 void *callout_data)
3900 {
3901 uint32_t i;
3902 BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
3903
3904 (void)callout_data; /* Not currently displayed */
3905
3906 fprintf(outfile, "Callout ");
3907 if (cb->callout_string != NULL)
3908 {
3909 uint32_t delimiter = CODE_UNIT(cb->callout_string, -1);
3910 fprintf(outfile, "%c", delimiter);
3911 PCHARSV(cb->callout_string, 0,
3912 cb->callout_string_length, utf, outfile);
3913 for (i = 0; callout_start_delims[i] != 0; i++)
3914 if (delimiter == callout_start_delims[i])
3915 {
3916 delimiter = callout_end_delims[i];
3917 break;
3918 }
3919 fprintf(outfile, "%c ", delimiter);
3920 }
3921 else fprintf(outfile, "%d ", cb->callout_number);
3922
3923 fprintf(outfile, "%.*s\n",
3924 (int)((cb->next_item_length == 0)? 1 : cb->next_item_length),
3925 pbuffer8 + cb->pattern_position);
3926
3927 return 0;
3928 }
3929
3930
3931
3932 /*************************************************
3933 * Show information about a pattern *
3934 *************************************************/
3935
3936 /* This function is called after a pattern has been compiled if any of the
3937 information-requesting controls have been set.
3938
3939 Arguments: none
3940
3941 Returns: PR_OK continue processing next line
3942 PR_SKIP skip to a blank line
3943 PR_ABEND abort the pcre2test run
3944 */
3945
3946 static int
3947 show_pattern_info(void)
3948 {
3949 uint32_t compile_options, overall_options;
3950
3951 if ((pat_patctl.control & (CTL_BINCODE|CTL_FULLBINCODE)) != 0)
3952 {
3953 fprintf(outfile, "------------------------------------------------------------------\n");
3954 PCRE2_PRINTINT((pat_patctl.control & CTL_FULLBINCODE) != 0);
3955 }
3956
3957 if ((pat_patctl.control & CTL_INFO) != 0)
3958 {
3959 void *nametable;
3960 uint8_t *start_bits;
3961 BOOL match_limit_set, depth_limit_set;
3962 uint32_t backrefmax, bsr_convention, capture_count, first_ctype, first_cunit,
3963 hasbackslashc, hascrorlf, jchanged, last_ctype, last_cunit, match_empty,
3964 match_limit, minlength, nameentrysize, namecount, newline_convention,
3965 depth_limit;
3966
3967 /* These info requests may return PCRE2_ERROR_UNSET. */
3968
3969 switch(pattern_info(PCRE2_INFO_MATCHLIMIT, &match_limit, TRUE))
3970 {
3971 case 0:
3972 match_limit_set = TRUE;
3973 break;
3974
3975 case PCRE2_ERROR_UNSET:
3976 match_limit_set = FALSE;
3977 break;
3978
3979 default:
3980 return PR_ABEND;
3981 }
3982
3983 switch(pattern_info(PCRE2_INFO_DEPTHLIMIT, &depth_limit, TRUE))
3984 {
3985 case 0:
3986 depth_limit_set = TRUE;
3987 break;
3988
3989 case PCRE2_ERROR_UNSET:
3990 depth_limit_set = FALSE;
3991 break;
3992
3993 default:
3994 return PR_ABEND;
3995 }
3996
3997 /* These info requests should always succeed. */
3998
3999 if (pattern_info(PCRE2_INFO_BACKREFMAX, &backrefmax, FALSE) +
4000 pattern_info(PCRE2_INFO_BSR, &bsr_convention, FALSE) +
4001 pattern_info(PCRE2_INFO_CAPTURECOUNT, &capture_count, FALSE) +
4002 pattern_info(PCRE2_INFO_FIRSTBITMAP, &start_bits, FALSE) +
4003 pattern_info(PCRE2_INFO_FIRSTCODEUNIT, &first_cunit, FALSE) +
4004 pattern_info(PCRE2_INFO_FIRSTCODETYPE, &first_ctype, FALSE) +
4005 pattern_info(PCRE2_INFO_HASBACKSLASHC, &hasbackslashc, FALSE) +
4006 pattern_info(PCRE2_INFO_HASCRORLF, &hascrorlf, FALSE) +
4007 pattern_info(PCRE2_INFO_JCHANGED, &jchanged, FALSE) +
4008 pattern_info(PCRE2_INFO_LASTCODEUNIT, &last_cunit, FALSE) +
4009 pattern_info(PCRE2_INFO_LASTCODETYPE, &last_ctype, FALSE) +
4010 pattern_info(PCRE2_INFO_MATCHEMPTY, &match_empty, FALSE) +
4011 pattern_info(PCRE2_INFO_MINLENGTH, &minlength, FALSE) +
4012 pattern_info(PCRE2_INFO_NAMECOUNT, &namecount, FALSE) +
4013 pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &nameentrysize, FALSE) +
4014 pattern_info(PCRE2_INFO_NAMETABLE, &nametable, FALSE) +
4015 pattern_info(PCRE2_INFO_NEWLINE, &newline_convention, FALSE)
4016 != 0)
4017 return PR_ABEND;
4018
4019 fprintf(outfile, "Capturing subpattern count = %d\n", capture_count);
4020
4021 if (backrefmax > 0)
4022 fprintf(outfile, "Max back reference = %d\n", backrefmax);
4023
4024 if (maxlookbehind > 0)
4025 fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
4026
4027 if (match_limit_set)
4028 fprintf(outfile, "Match limit = %u\n", match_limit);
4029
4030 if (depth_limit_set)
4031 fprintf(outfile, "Depth limit = %u\n", depth_limit);
4032
4033 if (namecount > 0)
4034 {
4035 fprintf(outfile, "Named capturing subpatterns:\n");
4036 for (; namecount > 0; namecount--)
4037 {
4038 int imm2_size = test_mode == PCRE8_MODE ? 2 : 1;
4039 uint32_t length = (uint32_t)STRLEN(nametable + imm2_size);
4040 fprintf(outfile, " ");
4041 PCHARSV(nametable, imm2_size, length, FALSE, outfile);
4042 while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
4043 #ifdef SUPPORT_PCRE2_32
4044 if (test_mode == PCRE32_MODE)
4045 fprintf(outfile, "%3d\n", (int)(((PCRE2_SPTR32)nametable)[0]));
4046 #endif
4047 #ifdef SUPPORT_PCRE2_16
4048 if (test_mode == PCRE16_MODE)
4049 fprintf(outfile, "%3d\n", (int)(((PCRE2_SPTR16)nametable)[0]));
4050 #endif
4051 #ifdef SUPPORT_PCRE2_8
4052 if (test_mode == PCRE8_MODE)
4053 fprintf(outfile, "%3d\n", (int)(
4054 ((((PCRE2_SPTR8)nametable)[0]) << 8) | ((PCRE2_SPTR8)nametable)[1]));
4055 #endif
4056 nametable = (void*)((PCRE2_SPTR8)nametable + nameentrysize * code_unit_size);
4057 }
4058 }
4059
4060 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
4061 if (hasbackslashc) fprintf(outfile, "Contains \\C\n");
4062 if (match_empty) fprintf(outfile, "May match empty string\n");
4063
4064 pattern_info(PCRE2_INFO_ARGOPTIONS, &compile_options, FALSE);
4065 pattern_info(PCRE2_INFO_ALLOPTIONS, &overall_options, FALSE);
4066
4067 /* Remove UTF/UCP if they were there only because of forbid_utf. This saves
4068 cluttering up the verification output of non-UTF test files. */
4069
4070 if ((pat_patctl.options & PCRE2_NEVER_UTF) == 0)
4071 {
4072 compile_options &= ~PCRE2_NEVER_UTF;
4073 overall_options &= ~PCRE2_NEVER_UTF;
4074 }
4075
4076 if ((pat_patctl.options & PCRE2_NEVER_UCP) == 0)
4077 {
4078 compile_options &= ~PCRE2_NEVER_UCP;
4079 overall_options &= ~PCRE2_NEVER_UCP;
4080 }
4081
4082 if ((compile_options|overall_options) != 0)
4083 {
4084 if (compile_options == overall_options)
4085 show_compile_options(compile_options, "Options:", "\n");
4086 else
4087 {
4088 show_compile_options(compile_options, "Compile options:", "\n");
4089 show_compile_options(overall_options, "Overall options:", "\n");
4090 }
4091 }
4092
4093 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
4094
4095 if ((pat_patctl.control2 & CTL_BSR_SET) != 0 ||
4096 (FLD(compiled_code, flags) & PCRE2_BSR_SET) != 0)
4097 fprintf(outfile, "\\R matches %s\n", (bsr_convention == PCRE2_BSR_UNICODE)?
4098 "any Unicode newline" : "CR, LF, or CRLF");
4099
4100 if ((FLD(compiled_code, flags) & PCRE2_NL_SET) != 0)
4101 {
4102 switch (newline_convention)
4103 {
4104 case PCRE2_NEWLINE_CR:
4105 fprintf(outfile, "Forced newline is CR\n");
4106 break;
4107
4108 case PCRE2_NEWLINE_LF:
4109 fprintf(outfile, "Forced newline is LF\n");
4110 break;
4111
4112 case PCRE2_NEWLINE_CRLF:
4113 fprintf(outfile, "Forced newline is CRLF\n");
4114 break;
4115
4116 case PCRE2_NEWLINE_ANYCRLF:
4117 fprintf(outfile, "Forced newline is CR, LF, or CRLF\n");
4118 break;
4119
4120 case PCRE2_NEWLINE_ANY:
4121 fprintf(outfile, "Forced newline is any Unicode newline\n");
4122 break;
4123
4124 default:
4125 break;
4126 }
4127 }
4128
4129 if (first_ctype == 2)
4130 {
4131 fprintf(outfile, "First code unit at start or follows newline\n");
4132 }
4133 else if (first_ctype == 1)
4134 {
4135 const char *caseless =
4136 ((FLD(compiled_code, flags) & PCRE2_FIRSTCASELESS) == 0)?
4137 "" : " (caseless)";
4138 if (PRINTOK(first_cunit))
4139 fprintf(outfile, "First code unit = \'%c\'%s\n", first_cunit, caseless);
4140 else
4141 {
4142 fprintf(outfile, "First code unit = ");
4143 pchar(first_cunit, FALSE, outfile);
4144 fprintf(outfile, "%s\n", caseless);
4145 }
4146 }
4147 else if (start_bits != NULL)
4148 {
4149 int i;
4150 int c = 24;
4151 fprintf(outfile, "Starting code units: ");
4152 for (i = 0; i < 256; i++)
4153 {
4154 if ((start_bits[i/8] & (1<<(i&7))) != 0)
4155 {
4156 if (c > 75)
4157 {
4158 fprintf(outfile, "\n ");
4159 c = 2;
4160 }
4161 if (PRINTOK(i) && i != ' ')
4162 {
4163 fprintf(outfile, "%c ", i);
4164 c += 2;
4165 }
4166 else
4167 {
4168 fprintf(outfile, "\\x%02x ", i);
4169 c += 5;
4170 }
4171 }
4172 }
4173 fprintf(outfile, "\n");
4174 }
4175
4176 if (last_ctype != 0)
4177 {
4178 const char *caseless =
4179 ((FLD(compiled_code, flags) & PCRE2_LASTCASELESS) == 0)?
4180 "" : " (caseless)";
4181 if (PRINTOK(last_cunit))
4182 fprintf(outfile, "Last code unit = \'%c\'%s\n", last_cunit, caseless);
4183 else
4184 {
4185 fprintf(outfile, "Last code unit = ");
4186 pchar(last_cunit, FALSE, outfile);
4187 fprintf(outfile, "%s\n", caseless);
4188 }
4189 }
4190
4191 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
4192
4193 if (pat_patctl.jit != 0 && (pat_patctl.control & CTL_JITVERIFY) != 0)
4194 {
4195 if (FLD(compiled_code, executable_jit) != NULL)
4196 fprintf(outfile, "JIT compilation was successful\n");
4197 else
4198 {
4199 #ifdef SUPPORT_JIT
4200 int len;
4201 fprintf(outfile, "JIT compilation was not successful");
4202 if (jitrc != 0)
4203 {
4204 fprintf(outfile, " (");
4205 PCRE2_GET_ERROR_MESSAGE(len, jitrc, pbuffer);
4206 PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, outfile);
4207 fprintf(outfile, ")");
4208 }
4209 fprintf(outfile, "\n");
4210 #else
4211 fprintf(outfile, "JIT support is not available in this version of PCRE2\n");
4212 #endif
4213 }
4214 }
4215 }
4216
4217 if ((pat_patctl.control & CTL_CALLOUT_INFO) != 0)
4218 {
4219 int errorcode;
4220 PCRE2_CALLOUT_ENUMERATE(errorcode, callout_callback, 0);
4221 if (errorcode != 0)
4222 {
4223 int len;
4224 fprintf(outfile, "Callout enumerate failed: error %d: ", errorcode);
4225 if (errorcode < 0)
4226 {
4227 PCRE2_GET_ERROR_MESSAGE(len, errorcode, pbuffer);
4228 PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, outfile);
4229 }
4230 fprintf(outfile, "\n");
4231 return PR_SKIP;
4232 }
4233 }
4234
4235 return PR_OK;
4236 }
4237
4238
4239
4240 /*************************************************
4241 * Handle serialization error *
4242 *************************************************/
4243
4244 /* Print an error message after a serialization failure.
4245
4246 Arguments:
4247 rc the error code
4248 msg an initial message for what failed
4249
4250 Returns: nothing
4251 */
4252
4253 static void
4254 serial_error(int rc, const char *msg)
4255 {
4256 fprintf(outfile, "%s failed: error %d: ", msg, rc);
4257 PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
4258 PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
4259 fprintf(outfile, "\n");
4260 }
4261
4262
4263
4264 /*************************************************
4265 * Open file for save/load commands *
4266 *************************************************/
4267
4268 /* This function decodes the file name and opens the file.
4269
4270 Arguments:
4271 buffptr point after the #command
4272 mode open mode
4273 fptr points to the FILE variable
4274
4275 Returns: PR_OK or PR_ABEND
4276 */
4277
4278 static int
4279 open_file(uint8_t *buffptr, const char *mode, FILE **fptr)
4280 {
4281 char *endf;
4282 char *filename = (char *)buffptr;
4283 while (isspace(*filename)) filename++;
4284 endf = filename + strlen8(filename);
4285 while (endf > filename && isspace(endf[-1])) endf--;
4286
4287 if (endf == filename)
4288 {
4289 fprintf(outfile, "** File name expected after #save\n");
4290 return PR_ABEND;
4291 }
4292
4293 *endf = 0;
4294 *fptr = fopen((const char *)filename, mode);
4295 if (*fptr == NULL)
4296 {
4297 fprintf(outfile, "** Failed to open '%s': %s\n", filename, strerror(errno));
4298 return PR_ABEND;
4299 }
4300
4301 return PR_OK;
4302 }
4303
4304
4305
4306 /*************************************************
4307 * Process command line *
4308 *************************************************/
4309
4310 /* This function is called for lines beginning with # and a character that is
4311 not ! or whitespace, when encountered between tests, which means that there is
4312 no compiled pattern (compiled_code is NULL). The line is in buffer.
4313
4314 Arguments: none
4315
4316 Returns: PR_OK continue processing next line
4317 PR_SKIP skip to a blank line
4318 PR_ABEND abort the pcre2test run
4319 */
4320
4321 static int
4322 process_command(void)
4323 {
4324 FILE *f;
4325 PCRE2_SIZE serial_size;
4326 size_t i;
4327 int rc, cmd, cmdlen;
4328 uint16_t first_listed_newline;
4329 const char *cmdname;
4330 uint8_t *argptr, *serial;
4331
4332 if (restrict_for_perl_test)
4333 {
4334 fprintf(outfile, "** #-commands are not allowed after #perltest\n");
4335 return PR_ABEND;
4336 }
4337
4338 cmd = CMD_UNKNOWN;
4339 cmdlen = 0;
4340
4341 for (i = 0; i < cmdlistcount; i++)
4342 {
4343 cmdname = cmdlist[i].name;
4344 cmdlen = strlen(cmdname);
4345 if (strncmp((char *)(buffer+1), cmdname, cmdlen) == 0 &&
4346 isspace(buffer[cmdlen+1]))
4347 {
4348 cmd = cmdlist[i].value;
4349 break;
4350 }
4351 }
4352
4353 argptr = buffer + cmdlen + 1;
4354
4355 switch(cmd)
4356 {
4357 case CMD_UNKNOWN:
4358 fprintf(outfile, "** Unknown command: %s", buffer);
4359 break;
4360
4361 case CMD_FORBID_UTF:
4362 forbid_utf = PCRE2_NEVER_UTF|PCRE2_NEVER_UCP;
4363 break;
4364
4365 case CMD_PERLTEST:
4366 restrict_for_perl_test = TRUE;
4367 break;
4368
4369 /* Set default pattern modifiers */
4370
4371 case CMD_PATTERN:
4372 (void)decode_modifiers(argptr, CTX_DEFPAT, &def_patctl, NULL);
4373 if (def_patctl.jit == 0 && (def_patctl.control & CTL_JITVERIFY) != 0)
4374 def_patctl.jit = 7;
4375 break;
4376
4377 /* Set default subject modifiers */
4378
4379 case CMD_SUBJECT:
4380 (void)decode_modifiers(argptr, CTX_DEFDAT, NULL, &def_datctl);
4381 break;
4382
4383 /* Check the default newline, and if not one of those listed, set up the
4384 first one to be forced. An empty list unsets. */
4385
4386 case CMD_NEWLINE_DEFAULT:
4387 local_newline_default = 0; /* Unset */
4388 first_listed_newline = 0;
4389 for (;;)
4390 {
4391 while (isspace(*argptr)) argptr++;
4392 if (*argptr == 0) break;
4393 for (i = 1; i < sizeof(newlines)/sizeof(char *); i++)
4394 {
4395 size_t nlen = strlen(newlines[i]);
4396 if (strncmpic(argptr, (const uint8_t *)newlines[i], nlen) == 0 &&
4397 isspace(argptr[nlen]))
4398 {
4399 if (i == NEWLINE_DEFAULT) return PR_OK; /* Default is valid */
4400 if (first_listed_newline == 0) first_listed_newline = i;
4401 }
4402 }
4403 while (*argptr != 0 && !isspace(*argptr)) argptr++;
4404 }
4405 local_newline_default = first_listed_newline;
4406 break;
4407
4408 /* Pop or copy a compiled pattern off the stack. Modifiers that do not affect
4409 the compiled pattern (e.g. to give information) are permitted. The default
4410 pattern modifiers are ignored. */
4411
4412 case CMD_POP:
4413 case CMD_POPCOPY:
4414 if (patstacknext <= 0)
4415 {
4416 fprintf(outfile, "** Can't pop off an empty stack\n");
4417 return PR_SKIP;
4418 }
4419 memset(&pat_patctl, 0, sizeof(patctl)); /* Completely unset */
4420 if (!decode_modifiers(argptr, CTX_POPPAT, &pat_patctl, NULL))
4421 return PR_SKIP;
4422
4423 if (cmd == CMD_POP)
4424 {
4425 SET(compiled_code, patstack[--patstacknext]);
4426 }
4427 else
4428 {
4429 PCRE2_CODE_COPY_FROM_VOID(compiled_code, patstack[patstacknext - 1]);
4430 }
4431
4432 if (pat_patctl.jit != 0)
4433 {
4434 PCRE2_JIT_COMPILE(jitrc, compiled_code, pat_patctl.jit);
4435 }
4436 if ((pat_patctl.control & CTL_MEMORY) != 0) show_memory_info();
4437 if ((pat_patctl.control & CTL_FRAMESIZE) != 0) show_framesize();
4438 if ((pat_patctl.control & CTL_ANYINFO) != 0)
4439 {
4440 rc = show_pattern_info();
4441 if (rc != PR_OK) return rc;
4442 }
4443 break;
4444
4445 /* Save the stack of compiled patterns to a file, then empty the stack. */
4446
4447 case CMD_SAVE:
4448 if (patstacknext <= 0)
4449 {
4450 fprintf(outfile, "** No stacked patterns to save\n");
4451 return PR_OK;
4452 }
4453
4454 rc = open_file(argptr+1, BINARY_OUTPUT_MODE, &f);
4455 if (rc != PR_OK) return rc;
4456
4457 PCRE2_SERIALIZE_ENCODE(rc, patstack, patstacknext, &serial, &serial_size,
4458 general_context);
4459 if (rc < 0)
4460 {
4461 serial_error(rc, "Serialization");
4462 fclose(f);
4463 break;
4464 }
4465
4466 /* Write the length at the start of the file to make it straightforward to
4467 get the right memory when re-loading. This saves having to read the file size
4468 in different operating systems. To allow for different endianness (even
4469 though reloading with the opposite endianness does not work), write the
4470 length byte-by-byte. */
4471
4472 for (i = 0; i < 4; i++) fputc((serial_size >> (i*8)) & 255, f);
4473 if (fwrite(serial, 1, serial_size, f) != serial_size)
4474 {
4475 fprintf(outfile, "** Wrong return from fwrite()\n");
4476 fclose(f);
4477 return PR_ABEND;
4478 }
4479
4480 fclose(f);
4481 PCRE2_SERIALIZE_FREE(serial);
4482 while(patstacknext > 0)
4483 {
4484 SET(compiled_code, patstack[--patstacknext]);
4485 SUB1(pcre2_code_free, compiled_code);
4486 }
4487 SET(compiled_code, NULL);
4488 break;
4489
4490 /* Load a set of compiled patterns from a file onto the stack */
4491
4492 case CMD_LOAD:
4493 rc = open_file(argptr+1, BINARY_INPUT_MODE, &f);
4494 if (rc != PR_OK) return rc;
4495
4496 serial_size = 0;
4497 for (i = 0; i < 4; i++) serial_size |= fgetc(f) << (i*8);
4498
4499 serial = malloc(serial_size);
4500 if (serial == NULL)
4501 {
4502 fprintf(outfile, "** Failed to get memory (size %lu) for #load\n",
4503 (unsigned long int)serial_size);
4504 fclose(f);
4505 return PR_ABEND;
4506 }
4507
4508 if (fread(serial, 1, serial_size, f) != serial_size)
4509 {
4510 fprintf(outfile, "** Wrong return from fread()\n");
4511 free(serial);
4512 fclose(f);
4513 return PR_ABEND;
4514 }
4515 fclose(f);
4516
4517 PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(rc, serial);
4518 if (rc < 0) serial_error(rc, "Get number of codes"); else
4519 {
4520 if (rc + patstacknext > PATSTACKSIZE)
4521 {
4522 fprintf(outfile, "** Not enough space on pattern stack for %d pattern%s\n",
4523 rc, (rc == 1)? "" : "s");
4524 rc = PATSTACKSIZE - patstacknext;
4525 fprintf(outfile, "** Decoding %d pattern%s\n", rc,
4526 (rc == 1)? "" : "s");
4527 }
4528 PCRE2_SERIALIZE_DECODE(rc, patstack + patstacknext, rc, serial,
4529 general_context);
4530 if (rc < 0) serial_error(rc, "Deserialization");
4531 else patstacknext += rc;
4532 }
4533
4534 free(serial);
4535 break;
4536 }
4537
4538 return PR_OK;
4539 }
4540
4541
4542
4543 /*************************************************
4544 * Process pattern line *
4545 *************************************************/
4546
4547 /* This function is called when the input buffer contains the start of a
4548 pattern. The first character is known to be a valid delimiter. The pattern is
4549 read, modifiers are interpreted, and a suitable local context is set up for
4550 this test. The pattern is then compiled.
4551
4552 Arguments: none
4553
4554 Returns: PR_OK continue processing next line
4555 PR_SKIP skip to a blank line
4556 PR_ABEND abort the pcre2test run
4557 */
4558
4559 static int
4560 process_pattern(void)
4561 {
4562 BOOL utf;
4563 uint32_t k;
4564 uint8_t *p = buffer;
4565 unsigned int delimiter = *p++;
4566 int errorcode;
4567 void *use_pat_context;
4568 PCRE2_SIZE patlen;
4569 PCRE2_SIZE valgrind_access_length;
4570 PCRE2_SIZE erroroffset;
4571
4572 /* Initialize the context and pattern/data controls for this test from the
4573 defaults. */
4574
4575 PATCTXCPY(pat_context, default_pat_context);
4576 memcpy(&pat_patctl, &def_patctl, sizeof(patctl));
4577
4578 /* Find the end of the pattern, reading more lines if necessary. */
4579
4580 for(;;)
4581 {
4582 while (*p != 0)
4583 {
4584 if (*p == '\\' && p[1] != 0) p++;
4585 else if (*p == delimiter) break;
4586 p++;
4587 }
4588 if (*p != 0) break;
4589 if ((p = extend_inputline(infile, p, " > ")) == NULL)
4590 {
4591 fprintf(outfile, "** Unexpected EOF\n");
4592 return PR_ABEND;
4593 }
4594 if (!INTERACTIVE(infile)) fprintf(outfile, "%s", (char *)p);
4595 }
4596
4597 /* If the first character after the delimiter is backslash, make the pattern
4598 end with backslash. This is purely to provide a way of testing for the error
4599 message when a pattern ends with backslash. */
4600
4601 if (p[1] == '\\') *p++ = '\\';
4602
4603 /* Terminate the pattern at the delimiter, and compute the length. */
4604
4605 *p++ = 0;
4606 patlen = p - buffer - 2;
4607
4608 /* Look for modifiers and options after the final delimiter. */
4609
4610 if (!decode_modifiers(p, CTX_PAT, &pat_patctl, NULL)) return PR_SKIP;
4611 utf = (pat_patctl.options & PCRE2_UTF) != 0;
4612
4613 /* The utf8_input modifier is not allowed in 8-bit mode, and is mutually
4614 exclusive with the utf modifier. */
4615
4616 if ((pat_patctl.control & CTL_UTF8_INPUT) != 0)
4617 {
4618 if (test_mode == PCRE8_MODE)
4619 {
4620 fprintf(outfile, "** The utf8_input modifier is not allowed in 8-bit mode\n");
4621 return PR_SKIP;
4622 }
4623 if (utf)
4624 {
4625 fprintf(outfile, "** The utf and utf8_input modifiers are mutually exclusive\n");
4626 return PR_SKIP;
4627 }
4628 }
4629
4630 /* Check for mutually exclusive modifiers. At present, these are all in the
4631 first control word. */
4632
4633 for (k = 0; k < sizeof(exclusive_pat_controls)/sizeof(uint32_t); k++)
4634 {
4635 uint32_t c = pat_patctl.control & exclusive_pat_controls[k];
4636 if (c != 0 && c != (c & (~c+1)))
4637 {
4638 show_controls(c, 0, "** Not allowed together:");
4639 fprintf(outfile, "\n");
4640 return PR_SKIP;
4641 }
4642 }
4643
4644 /* Assume full JIT compile for jitverify and/or jitfast if nothing else was
4645 specified. */
4646
4647 if (pat_patctl.jit == 0 &&
4648 (pat_patctl.control & (CTL_JITVERIFY|CTL_JITFAST)) != 0)
4649 pat_patctl.jit = 7;
4650
4651 /* Now copy the pattern to pbuffer8 for use in 8-bit testing and for reflecting
4652 in callouts. Convert from hex if requested (literal strings in quotes may be
4653 present within the hexadecimal pairs). The result must necessarily be fewer
4654 characters so will always fit in pbuffer8. */
4655
4656 if ((pat_patctl.control & CTL_HEXPAT) != 0)
4657 {
4658 uint8_t *pp, *pt;
4659 uint32_t c, d;
4660
4661 pt = pbuffer8;
4662 for (pp = buffer + 1; *pp != 0; pp++)
4663 {
4664 if (isspace(*pp)) continue;
4665 c = *pp++;
4666
4667 /* Handle a literal substring */
4668
4669 if (c == '\'' || c == '"')
4670 {
4671 uint8_t *pq = pp;
4672 for (;; pp++)
4673 {
4674 d = *pp;
4675 if (d == 0)
4676 {
4677 fprintf(outfile, "** Missing closing quote in hex pattern: "
4678 "opening quote is at offset " PTR_SPEC ".\n", pq - buffer - 2);
4679 return PR_SKIP;
4680 }
4681 if (d == c) break;
4682 *pt++ = d;
4683 }
4684 }
4685
4686 /* Expect a hex pair */
4687
4688 else
4689 {
4690 if (!isxdigit(c))
4691 {
4692 fprintf(outfile, "** Unexpected non-hex-digit '%c' at offset "
4693 PTR_SPEC " in hex pattern: quote missing?\n", c, pp - buffer - 2);
4694 return PR_SKIP;
4695 }
4696 if (*pp == 0)
4697 {
4698 fprintf(outfile, "** Odd number of digits in hex pattern\n");
4699 return PR_SKIP;
4700 }
4701 d = *pp;
4702 if (!isxdigit(d))
4703 {
4704 fprintf(outfile, "** Unexpected non-hex-digit '%c' at offset "
4705 PTR_SPEC " in hex pattern: quote missing?\n", d, pp - buffer - 1);
4706 return PR_SKIP;
4707 }
4708 c = toupper(c);
4709 d = toupper(d);
4710 *pt++ = ((isdigit(c)? (c - '0') : (c - 'A' + 10)) << 4) +
4711 (isdigit(d)? (d - '0') : (d - 'A' + 10));
4712 }
4713 }
4714 *pt = 0;
4715 patlen = pt - pbuffer8;
4716 }
4717
4718 /* If not a hex string, process for repetition expansion if requested. */
4719
4720 else if ((pat_patctl.control & CTL_EXPAND) != 0)
4721 {
4722 uint8_t *pp, *pt;
4723
4724 pt = pbuffer8;
4725 for (pp = buffer + 1; *pp != 0; pp++)
4726 {
4727 uint8_t *pc = pp;
4728 uint32_t count = 1;
4729 size_t length = 1;
4730
4731 /* Check for replication syntax; if not found, the defaults just set will
4732 prevail and one character will be copied. */
4733
4734 if (pp[0] == '\\' && pp[1] == '[')
4735 {
4736 uint8_t *pe;
4737 for (pe = pp + 2; *pe != 0; pe++)
4738 {
4739 if (pe[0] == ']' && pe[1] == '{')
4740 {
4741 uint32_t clen = pe - pc - 2;
4742 uint32_t i = 0;
4743 unsigned long uli;
4744 char *endptr;
4745
4746 pe += 2;
4747 uli = strtoul((const char *)pe, &endptr, 10);
4748 if (U32OVERFLOW(uli))
4749 {
4750 fprintf(outfile, "** Pattern repeat count too large\n");
4751 return PR_SKIP;
4752 }
4753
4754 i = (uint32_t)uli;
4755 pe = (uint8_t *)endptr;
4756 if (*pe == '}')
4757 {
4758 if (i == 0)
4759 {
4760 fprintf(outfile, "** Zero repeat not allowed\n");
4761 return PR_SKIP;
4762 }
4763 pc += 2;
4764 count = i;
4765 length = clen;
4766 pp = pe;
4767 break;
4768 }
4769 }
4770 }
4771 }
4772
4773 /* Add to output. If the buffer is too small expand it. The function for
4774 expanding buffers always keeps buffer and pbuffer8 in step as far as their
4775 size goes. */
4776
4777 while (pt + count * length > pbuffer8 + pbuffer8_size)
4778 {
4779 size_t pc_offset = pc - buffer;
4780 size_t pp_offset = pp - buffer;
4781 size_t pt_offset = pt - pbuffer8;
4782 expand_input_buffers();
4783 pc = buffer + pc_offset;
4784 pp = buffer + pp_offset;
4785 pt = pbuffer8 + pt_offset;
4786 }
4787
4788 for (; count > 0; count--)
4789 {
4790 memcpy(pt, pc, length);
4791 pt += length;
4792 }
4793 }
4794
4795 *pt = 0;
4796 patlen = pt - pbuffer8;
4797
4798 if ((pat_patctl.control & CTL_INFO) != 0)
4799 fprintf(outfile, "Expanded: %s\n", pbuffer8);
4800 }
4801
4802 /* Neither hex nor expanded, just copy the input verbatim. */
4803
4804 else
4805 {
4806 strncpy((char *)pbuffer8, (char *)(buffer+1), patlen + 1);
4807 }
4808
4809 /* Sort out character tables */
4810
4811 if (pat_patctl.locale[0] != 0)
4812 {
4813 if (pat_patctl.tables_id != 0)
4814 {
4815 fprintf(outfile, "** 'Locale' and 'tables' must not both be set\n");
4816 return PR_SKIP;
4817 }
4818 if (setlocale(LC_CTYPE, (const char *)pat_patctl.locale) == NULL)
4819 {
4820 fprintf(outfile, "** Failed to set locale '%s'\n", pat_patctl.locale);
4821 return PR_SKIP;
4822 }
4823 if (strcmp((const char *)pat_patctl.locale, (const char *)locale_name) != 0)
4824 {
4825 strcpy((char *)locale_name, (char *)pat_patctl.locale);
4826 if (locale_tables != NULL) free((void *)locale_tables);
4827 PCRE2_MAKETABLES(locale_tables);
4828 }
4829 use_tables = locale_tables;
4830 }
4831
4832 else switch (pat_patctl.tables_id)
4833 {
4834 case 0: use_tables = NULL; break;
4835 case 1: use_tables = tables1; break;
4836 case 2: use_tables = tables2; break;
4837 default:
4838 fprintf(outfile, "** 'Tables' must specify 0, 1, or 2.\n");
4839 return PR_SKIP;
4840 }
4841
4842 PCRE2_SET_CHARACTER_TABLES(pat_context, use_tables);
4843
4844 /* Set up for the stackguard test. */
4845
4846 if (pat_patctl.stackguard_test != 0)
4847 {
4848 PCRE2_SET_COMPILE_RECURSION_GUARD(pat_context, stack_guard, NULL);
4849 }
4850
4851 /* Handle compiling via the POSIX interface, which doesn't support the
4852 timing, showing, or debugging options, nor the ability to pass over
4853 local character tables. Neither does it have 16-bit or 32-bit support. */
4854
4855 if ((pat_patctl.control & CTL_POSIX) != 0)
4856 {
4857 #ifdef SUPPORT_PCRE2_8
4858 int rc;
4859 int cflags = 0;
4860 const char *msg = "** Ignored with POSIX interface:";
4861 #endif
4862
4863 if (test_mode != PCRE8_MODE)
4864 {
4865 fprintf(outfile, "** The POSIX interface is available only in 8-bit mode\n");
4866 return PR_SKIP;
4867 }
4868
4869 #ifdef SUPPORT_PCRE2_8
4870 /* Check for features that the POSIX interface does not support. */
4871
4872 if (pat_patctl.locale[0] != 0) prmsg(&msg, "locale");
4873 if (pat_patctl.replacement[0] != 0) prmsg(&msg, "replace");
4874 if (pat_patctl.tables_id != 0) prmsg(&msg, "tables");
4875 if (pat_patctl.stackguard_test != 0) prmsg(&msg, "stackguard");
4876 if (timeit > 0) prmsg(&msg, "timing");
4877 if (pat_patctl.jit != 0) prmsg(&msg, "JIT");
4878
4879 if ((pat_patctl.options & ~POSIX_SUPPORTED_COMPILE_OPTIONS) != 0)
4880 {
4881 show_compile_options(
4882 pat_patctl.options & ~POSIX_SUPPORTED_COMPILE_OPTIONS, msg, "");
4883 msg = "";
4884 }
4885 if ((pat_patctl.control & ~POSIX_SUPPORTED_COMPILE_CONTROLS) != 0 ||
4886 (pat_patctl.control2 & ~POSIX_SUPPORTED_COMPILE_CONTROLS2) != 0)
4887 {
4888 show_controls(pat_patctl.control & ~POSIX_SUPPORTED_COMPILE_CONTROLS,
4889 pat_patctl.control2 & ~POSIX_SUPPORTED_COMPILE_CONTROLS2, msg);
4890 msg = "";
4891 }
4892
4893 if (local_newline_default != 0) prmsg(&msg, "#newline_default");
4894
4895 if (msg[0] == 0) fprintf(outfile, "\n");
4896
4897 /* Translate PCRE2 options to POSIX options and then compile. */
4898
4899 if (utf) cflags |= REG_UTF;
4900 if ((pat_patctl.control & CTL_POSIX_NOSUB) != 0) cflags |= REG_NOSUB;
4901 if ((pat_patctl.options & PCRE2_UCP) != 0) cflags |= REG_UCP;
4902 if ((pat_patctl.options & PCRE2_CASELESS) != 0) cflags |= REG_ICASE;
4903 if ((pat_patctl.options & PCRE2_MULTILINE) != 0) cflags |= REG_NEWLINE;
4904 if ((pat_patctl.options & PCRE2_DOTALL) != 0) cflags |= REG_DOTALL;
4905 if ((pat_patctl.options & PCRE2_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
4906
4907 rc = regcomp(&preg, (char *)pbuffer8, cflags);
4908
4909 /* Compiling failed */
4910
4911 if (rc != 0)
4912 {
4913 size_t bsize, usize;
4914 int psize;
4915
4916 preg.re_pcre2_code = NULL; /* In case something was left in there */
4917 preg.re_match_data = NULL;
4918
4919 bsize = (pat_patctl.regerror_buffsize != 0)?
4920 pat_patctl.regerror_buffsize : pbuffer8_size;
4921 if (bsize + 8 < pbuffer8_size)
4922 memcpy(pbuffer8 + bsize, "DEADBEEF", 8);
4923 usize = regerror(rc, &preg, (char *)pbuffer8, bsize);
4924
4925 /* Inside regerror(), snprintf() is used. If the buffer is too small, some
4926 versions of snprintf() put a zero byte at the end, but others do not.
4927 Therefore, we print a maximum of one less than the size of the buffer. */
4928
4929 psize = (int)bsize - 1;
4930 fprintf(outfile, "Failed: POSIX code %d: %.*s\n", rc, psize, pbuffer8);
4931 if (usize > bsize)
4932 {
4933 fprintf(outfile, "** regerror() message truncated\n");
4934 if (memcmp(pbuffer8 + bsize, "DEADBEEF", 8) != 0)
4935 fprintf(outfile, "** regerror() buffer overflow\n");
4936 }
4937 return PR_SKIP;
4938 }
4939
4940 /* Compiling succeeded. Check that the values in the preg block are sensible.
4941 It can happen that pcre2test is accidentally linked with a different POSIX
4942 library which succeeds, but of course puts different things into preg. In
4943 this situation, calling regfree() may cause a segfault (or invalid free() in
4944 valgrind), so ensure that preg.re_pcre2_code is NULL, which suppresses the
4945 calling of regfree() on exit. */
4946
4947 if (preg.re_pcre2_code == NULL ||
4948 ((pcre2_real_code_8 *)preg.re_pcre2_code)->magic_number != MAGIC_NUMBER ||
4949 ((pcre2_real_code_8 *)preg.re_pcre2_code)->top_bracket != preg.re_nsub ||
4950 preg.re_match_data == NULL ||
4951 preg.re_cflags != cflags)
4952 {
4953 fprintf(outfile,
4954 "** The regcomp() function returned zero (success), but the values set\n"
4955 "** in the preg block are not valid for PCRE2. Check that pcre2test is\n"
4956 "** linked with PCRE2's pcre2posix module (-lpcre2-posix) and not with\n"
4957 "** some other POSIX regex library.\n**\n");
4958 preg.re_pcre2_code = NULL;
4959 return PR_ABEND;
4960 }
4961
4962 return PR_OK;
4963 #endif /* SUPPORT_PCRE2_8 */
4964 }
4965
4966 /* Handle compiling via the native interface. Controls that act later are
4967 ignored with "push". Replacements are locked out. */
4968
4969 if ((pat_patctl.control & (CTL_PUSH|CTL_PUSHCOPY|CTL_PUSHTABLESCOPY)) != 0)
4970 {
4971 if (pat_patctl.replacement[0] != 0)
4972 {
4973 fprintf(outfile, "** Replacement text is not supported with 'push'.\n");
4974 return PR_OK;
4975 }
4976 if ((pat_patctl.control & ~PUSH_SUPPORTED_COMPILE_CONTROLS) != 0 ||
4977 (pat_patctl.control2 & ~PUSH_SUPPORTED_COMPILE_CONTROLS2) != 0)
4978 {
4979 show_controls(pat_patctl.control & ~PUSH_SUPPORTED_COMPILE_CONTROLS,
4980 pat_patctl.control2 & ~PUSH_SUPPORTED_COMPILE_CONTROLS2,
4981 "** Ignored when compiled pattern is stacked with 'push':");
4982 fprintf(outfile, "\n");
4983 }
4984 if ((pat_patctl.control & PUSH_COMPILE_ONLY_CONTROLS) != 0 ||
4985 (pat_patctl.control2 & PUSH_COMPILE_ONLY_CONTROLS2) != 0)
4986 {
4987 show_controls(pat_patctl.control & PUSH_COMPILE_ONLY_CONTROLS,
4988 pat_patctl.control2 & PUSH_COMPILE_ONLY_CONTROLS2,
4989 "** Applies only to compile when pattern is stacked with 'push':");
4990 fprintf(outfile, "\n");
4991 }
4992 }
4993
4994 /* Convert the input in non-8-bit modes. */
4995
4996 errorcode = 0;
4997
4998 #ifdef SUPPORT_PCRE2_16
4999 if (test_mode == PCRE16_MODE) errorcode = to16(pbuffer8, utf, &patlen);
5000 #endif
5001
5002 #ifdef SUPPORT_PCRE2_32
5003 if (test_mode == PCRE32_MODE) errorcode = to32(pbuffer8, utf, &patlen);
5004 #endif
5005
5006 switch(errorcode)
5007 {
5008 case -1:
5009 fprintf(outfile, "** Failed: invalid UTF-8 string cannot be "
5010 "converted to %d-bit string\n", (test_mode == PCRE16_MODE)? 16:32);
5011 return PR_SKIP;
5012
5013 case -2:
5014 fprintf(outfile, "** Failed: character value greater than 0x10ffff "
5015 "cannot be converted to UTF\n");
5016 return PR_SKIP;
5017
5018 case -3:
5019 fprintf(outfile, "** Failed: character value greater than 0xffff "
5020 "cannot be converted to 16-bit in non-UTF mode\n");
5021 return PR_SKIP;
5022
5023 default:
5024 break;
5025 }
5026
5027 /* The pattern is now in pbuffer[8|16|32], with the length in code units in
5028 patlen. By default we pass a zero-terminated pattern, but a length is passed if
5029 "use_length" was specified or this is a hex pattern (which might contain binary
5030 zeros). When valgrind is supported, arrange for the unused part of the buffer
5031 to be marked as no access. */
5032
5033 valgrind_access_length = patlen;
5034 if ((pat_patctl.control & (CTL_HEXPAT|CTL_USE_LENGTH)) == 0)
5035 {
5036 patlen = PCRE2_ZERO_TERMINATED;
5037 valgrind_access_length += 1; /* For the terminating zero */
5038 }
5039
5040 #ifdef SUPPORT_VALGRIND
5041 #ifdef SUPPORT_PCRE2_8
5042 if (test_mode == PCRE8_MODE && pbuffer8 != NULL)
5043 {
5044 VALGRIND_MAKE_MEM_NOACCESS(pbuffer8 + valgrind_access_length,
5045 pbuffer8_size - valgrind_access_length);
5046 }
5047 #endif
5048 #ifdef SUPPORT_PCRE2_16
5049 if (test_mode == PCRE16_MODE && pbuffer16 != NULL)
5050 {
5051 VALGRIND_MAKE_MEM_NOACCESS(pbuffer16 + valgrind_access_length,
5052 pbuffer16_size - valgrind_access_length*sizeof(uint16_t));
5053 }
5054 #endif
5055 #ifdef SUPPORT_PCRE2_32
5056 if (test_mode == PCRE32_MODE && pbuffer32 != NULL)
5057 {
5058 VALGRIND_MAKE_MEM_NOACCESS(pbuffer32 + valgrind_access_length,
5059 pbuffer32_size - valgrind_access_length*sizeof(uint32_t));
5060 }
5061 #endif
5062 #else /* Valgrind not supported */
5063 (void)valgrind_access_length; /* Avoid compiler warning */
5064 #endif
5065
5066 /* If #newline_default has been used and the library was not compiled with an
5067 appropriate default newline setting, local_newline_default will be non-zero. We
5068 use this if there is no explicit newline modifier. */
5069
5070 if ((pat_patctl.control2 & CTL_NL_SET) == 0 && local_newline_default != 0)
5071 {
5072 SETFLD(pat_context, newline_convention, local_newline_default);
5073 }
5074
5075 /* The null_context modifier is used to test calling pcre2_compile() with a
5076 NULL context. */
5077
5078 use_pat_context = ((pat_patctl.control & CTL_NULLCONTEXT) != 0)?
5079 NULL : PTR(pat_context);
5080
5081 /* Compile many times when timing. */
5082
5083 if (timeit > 0)
5084 {
5085 int i;
5086 clock_t time_taken = 0;
5087 for (i = 0; i < timeit; i++)
5088 {
5089 clock_t start_time = clock();
5090 PCRE2_COMPILE(compiled_code, pbuffer, patlen,
5091 pat_patctl.options|forbid_utf, &errorcode, &erroroffset, use_pat_context);
5092 time_taken += clock() - start_time;
5093 if (TEST(compiled_code, !=, NULL))
5094 { SUB1(pcre2_code_free, compiled_code); }
5095 }
5096 total_compile_time += time_taken;
5097 fprintf(outfile, "Compile time %.4f milliseconds\n",
5098 (((double)time_taken * 1000.0) / (double)timeit) /
5099 (double)CLOCKS_PER_SEC);
5100 }
5101
5102 /* A final compile that is used "for real". */
5103
5104 PCRE2_COMPILE(compiled_code, pbuffer, patlen, pat_patctl.options|forbid_utf,
5105 &errorcode, &erroroffset, use_pat_context);
5106
5107 /* Call the JIT compiler if requested. When timing, we must free and recompile
5108 the pattern each time because that is the only way to free the JIT compiled
5109 code. We know that compilation will always succeed. */
5110
5111 if (TEST(compiled_code, !=, NULL) && pat_patctl.jit != 0)
5112 {
5113 if (timeit > 0)
5114 {
5115 int i;
5116 clock_t time_taken = 0;
5117 for (i = 0; i < timeit; i++)
5118 {
5119 clock_t start_time;
5120 SUB1(pcre2_code_free, compiled_code);
5121 PCRE2_COMPILE(compiled_code, pbuffer, patlen,
5122 pat_patctl.options|forbid_utf, &errorcode, &erroroffset,
5123 use_pat_context);
5124 start_time = clock();
5125 PCRE2_JIT_COMPILE(jitrc,compiled_code, pat_patctl.jit);
5126 time_taken += clock() - start_time;
5127 }
5128 total_jit_compile_time += time_taken;
5129 fprintf(outfile, "JIT compile %.4f milliseconds\n",
5130 (((double)time_taken * 1000.0) / (double)timeit) /
5131 (double)CLOCKS_PER_SEC);
5132 }
5133 else
5134 {
5135 PCRE2_JIT_COMPILE(jitrc, compiled_code, pat_patctl.jit);
5136 }
5137 }
5138
5139 /* If valgrind is supported, mark the pbuffer as accessible again. The 16-bit
5140 and 32-bit buffers can be marked completely undefined, but we must leave the
5141 pattern in the 8-bit buffer defined because it may be read from a callout
5142 during matching. */
5143
5144 #ifdef SUPPORT_VALGRIND
5145 #ifdef SUPPORT_PCRE2_8
5146 if (test_mode == PCRE8_MODE)
5147 {
5148 VALGRIND_MAKE_MEM_UNDEFINED(pbuffer8 + valgrind_access_length,
5149 pbuffer8_size - valgrind_access_length);
5150 }
5151 #endif
5152 #ifdef SUPPORT_PCRE2_16
5153 if (test_mode == PCRE16_MODE)
5154 {
5155 VALGRIND_MAKE_MEM_UNDEFINED(pbuffer16, pbuffer16_size);
5156 }
5157 #endif
5158 #ifdef SUPPORT_PCRE2_32
5159 if (test_mode == PCRE32_MODE)
5160 {
5161 VALGRIND_MAKE_MEM_UNDEFINED(pbuffer32, pbuffer32_size);
5162 }
5163 #endif
5164 #endif
5165
5166 /* Compilation failed; go back for another re, skipping to blank line
5167 if non-interactive. */
5168
5169 if (TEST(compiled_code, ==, NULL))
5170 {
5171 int len;
5172 fprintf(outfile, "Failed: error %d at offset %d: ", errorcode,
5173 (int)erroroffset);
5174 PCRE2_GET_ERROR_MESSAGE(len, errorcode, pbuffer);
5175 PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, outfile);
5176 fprintf(outfile, "\n");
5177 return PR_SKIP;
5178 }
5179
5180 /* If forbid_utf is non-zero, we are running a non-UTF test. UTF and UCP are
5181 locked out at compile time, but we must also check for occurrences of \P, \p,
5182 and \X, which are only supported when Unicode is supported. */
5183
5184 if (forbid_utf != 0)
5185 {
5186 if ((FLD(compiled_code, flags) & PCRE2_HASBKPORX) != 0)
5187 {
5188 fprintf(outfile, "** \\P, \\p, and \\X are not allowed after the "
5189 "#forbid_utf command\n");
5190 return PR_SKIP;
5191 }
5192 }
5193
5194 /* Remember the maximum lookbehind, for partial matching. */
5195
5196 if (pattern_info(PCRE2_INFO_MAXLOOKBEHIND, &maxlookbehind, FALSE) != 0)
5197 return PR_ABEND;
5198
5199 /* If an explicit newline modifier was given, set the information flag in the
5200 pattern so that it is preserved over push/pop. */
5201
5202 if ((pat_patctl.control2 & CTL_NL_SET) != 0)
5203 {
5204 SETFLD(compiled_code, flags, FLD(compiled_code, flags) | PCRE2_NL_SET);
5205 }
5206
5207 /* Output code size and other information if requested. */
5208
5209 if ((pat_patctl.control & CTL_MEMORY) != 0) show_memory_info();
5210 if ((pat_patctl.control & CTL_FRAMESIZE) != 0) show_framesize();
5211 if ((pat_patctl.control & CTL_ANYINFO) != 0)
5212 {
5213 int rc = show_pattern_info();
5214 if (rc != PR_OK) return rc;
5215 }
5216
5217 /* The "push" control requests that the compiled pattern be remembered on a
5218 stack. This is mainly for testing the serialization functionality. */
5219
5220 if ((pat_patctl.control & CTL_PUSH) != 0)
5221 {
5222 if (patstacknext >= PATSTACKSIZE)
5223 {
5224 fprintf(outfile, "** Too many pushed patterns (max %d)\n", PATSTACKSIZE);
5225 return PR_ABEND;
5226 }
5227 patstack[patstacknext++] = PTR(compiled_code);
5228 SET(compiled_code, NULL);
5229 }
5230
5231 /* The "pushcopy" and "pushtablescopy" controls are similar, but push a
5232 copy of the pattern, the latter with a copy of its character tables. This tests
5233 the pcre2_code_copy() and pcre2_code_copy_with_tables() functions. */
5234
5235 if ((pat_patctl.control & (CTL_PUSHCOPY|CTL_PUSHTABLESCOPY)) != 0)
5236 {
5237 if (patstacknext >= PATSTACKSIZE)
5238 {
5239 fprintf(outfile, "** Too many pushed patterns (max %d)\n", PATSTACKSIZE);
5240 return PR_ABEND;
5241 }
5242 if ((pat_patctl.control & CTL_PUSHCOPY) != 0)
5243 {
5244 PCRE2_CODE_COPY_TO_VOID(patstack[patstacknext++], compiled_code);
5245 }
5246 else
5247 {
5248 PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(patstack[patstacknext++],
5249 compiled_code); }
5250 }
5251
5252 return PR_OK;
5253 }
5254
5255
5256
5257 /*************************************************
5258 * Check match or depth limit *
5259 *************************************************/
5260
5261 static int
5262 check_match_limit(uint8_t *pp, size_t ulen, int errnumber, const char *msg)
5263 {
5264 int capcount;
5265 uint32_t min = 0;
5266 uint32_t mid = 64;
5267 uint32_t max = UINT32_MAX;
5268
5269 PCRE2_SET_MATCH_LIMIT(dat_context, max);
5270 PCRE2_SET_DEPTH_LIMIT(dat_context, max);
5271
5272 for (;;)
5273 {
5274 if (errnumber == PCRE2_ERROR_MATCHLIMIT)
5275 {
5276 PCRE2_SET_MATCH_LIMIT(dat_context, mid);
5277 }
5278 else
5279 {
5280 PCRE2_SET_DEPTH_LIMIT(dat_context, mid);
5281 }
5282
5283 if ((pat_patctl.control & CTL_JITFAST) != 0)
5284 PCRE2_JIT_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
5285 dat_datctl.options, match_data, PTR(dat_context));
5286 else
5287 PCRE2_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
5288 dat_datctl.options, match_data, PTR(dat_context));
5289
5290 if (capcount == errnumber)
5291 {
5292 min = mid;
5293 mid = (mid == max - 1)? max : (max != UINT32_MAX)? (min + max)/2 : mid*2;
5294 }
5295 else if (capcount >= 0 ||
5296 capcount == PCRE2_ERROR_NOMATCH ||
5297 capcount == PCRE2_ERROR_PARTIAL)
5298 {
5299 if (mid == min + 1)
5300 {
5301 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
5302 break;
5303 }
5304 max = mid;
5305 mid = (min + mid)/2;
5306 }
5307 else break; /* Some other error */
5308 }
5309
5310 return capcount;
5311 }
5312
5313
5314
5315 /*************************************************
5316 * Callout function *
5317 *************************************************/
5318
5319 /* Called from a PCRE2 library as a result of the (?C) item. We print out where
5320 we are in the match. Yield zero unless more callouts than the fail count, or
5321 the callout data is not zero. The only differences in the callout block for
5322 different code unit widths are that the pointers to the subject, the most
5323 recent MARK, and a callout argument string point to strings of the appropriate
5324 width. Casts can be used to deal with this.
5325
5326 Argument: a pointer to a callout block
5327 Return:
5328 */
5329
5330 static int
5331 callout_function(pcre2_callout_block_8 *cb, void *callout_data_ptr)
5332 {
5333 uint32_t i, pre_start, post_start, subject_length;
5334 PCRE2_SIZE current_position;
5335 BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
5336 BOOL callout_capture = (dat_datctl.control & CTL_CALLOUT_CAPTURE) != 0;
5337
5338 /* This FILE is used for echoing the subject. This is done only once in simple
5339 cases. */
5340
5341 FILE *f = (first_callout || callout_capture || cb->callout_string != NULL)?
5342 outfile : NULL;
5343
5344 /* For a callout with a string argument, show the string first because there
5345 isn't a tidy way to fit it in the rest of the data. */
5346
5347 if (cb->callout_string != NULL)
5348 {
5349 uint32_t delimiter = CODE_UNIT(cb->callout_string, -1);
5350 fprintf(outfile, "Callout (%lu): %c",
5351 (unsigned long int)cb->callout_string_offset, delimiter);
5352 PCHARSV(cb->callout_string, 0,
5353 cb->callout_string_length, utf, outfile);
5354 for (i = 0; callout_start_delims[i] != 0; i++)
5355 if (delimiter == callout_start_delims[i])
5356 {
5357 delimiter = callout_end_delims[i];
5358 break;
5359 }
5360 fprintf(outfile, "%c", delimiter);
5361 if (!callout_capture) fprintf(outfile, "\n");
5362 }
5363
5364 /* Show captured strings if required */
5365
5366 if (callout_capture)
5367 {
5368 if (cb->callout_string == NULL)
5369 fprintf(outfile, "Callout %d:", cb->callout_number);
5370 fprintf(outfile, " last capture = %d\n", cb->capture_last);
5371 for (i = 2; i < cb->capture_top * 2; i += 2)
5372 {
5373 fprintf(outfile, "%2d: ", i/2);
5374 if (cb->offset_vector[i] == PCRE2_UNSET)
5375 fprintf(outfile, "<unset>");
5376 else
5377 {
5378 PCHARSV(cb->subject, cb->offset_vector[i],
5379 cb->offset_vector[i+1] - cb->offset_vector[i], utf, f);
5380 }
5381 fprintf(outfile, "\n");
5382 }
5383 }
5384
5385 /* Re-print the subject in canonical form (with escapes for non-printing
5386 characters), the first time, or if giving full details. On subsequent calls in
5387 the same match, we use PCHARS() just to find the printed lengths of the
5388 substrings. */
5389
5390 if (f != NULL) fprintf(f, "--->");
5391
5392 /* The subject before the match start. */
5393
5394 PCHARS(pre_start, cb->subject, 0, cb->start_match, utf, f);
5395
5396 /* If a lookbehind is involved, the current position may be earlier than the
5397 match start. If so, use the match start instead. */
5398
5399 current_position = (cb->current_position >= cb->start_match)?
5400 cb->current_position : cb->start_match;
5401
5402 /* The subject between the match start and the current position. */
5403
5404 PCHARS(post_start, cb->subject, cb->start_match,
5405 current_position - cb->start_match, utf, f);
5406
5407 /* Print from the current position to the end. */
5408
5409 PCHARSV(cb->subject, current_position, cb->subject_length - current_position,
5410 utf, f);
5411
5412 /* Calculate the total subject printed length (no print). */
5413
5414 PCHARS(subject_length, cb->subject, 0, cb->subject_length, utf, NULL);
5415
5416 if (f != NULL) fprintf(f, "\n");
5417
5418 /* For automatic callouts, show the pattern offset. Otherwise, for a numerical
5419 callout whose number has not already been shown with captured strings, show the
5420 number here. A callout with a string argument has been displayed above. */
5421
5422 if (cb->callout_number == 255)
5423 {
5424 fprintf(outfile, "%+3d ", (int)cb->pattern_position);
5425 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
5426 }
5427 else
5428 {
5429 if (callout_capture || cb->callout_string != NULL) fprintf(outfile, " ");
5430 else fprintf(outfile, "%3d ", cb->callout_number);
5431 }
5432
5433 /* Now show position indicators */
5434
5435 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
5436 fprintf(outfile, "^");
5437
5438 if (post_start > 0)
5439 {
5440 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
5441 fprintf(outfile, "^");
5442 }
5443
5444 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
5445 fprintf(outfile, " ");
5446
5447 if (cb->next_item_length != 0)
5448 fprintf(outfile, "%.*s", (int)(cb->next_item_length),
5449 pbuffer8 + cb->pattern_position);
5450
5451 fprintf(outfile, "\n");
5452 first_callout = FALSE;
5453
5454 if (cb->mark != last_callout_mark)
5455 {
5456 if (cb->mark == NULL)
5457 fprintf(outfile, "Latest Mark: <unset>\n");
5458 else
5459 {
5460 fprintf(outfile, "Latest Mark: ");
5461 PCHARSV(cb->mark, 0, -1, utf, outfile);
5462 putc('\n', outfile);
5463 }
5464 last_callout_mark = cb->mark;
5465 }
5466
5467 if (callout_data_ptr != NULL)
5468 {
5469 int callout_data = *((int32_t *)callout_data_ptr);
5470 if (callout_data != 0)
5471 {
5472 fprintf(outfile, "Callout data = %d\n", callout_data);
5473 return callout_data;
5474 }
5475 }
5476
5477 callout_count++;
5478
5479 if (cb->callout_number == dat_datctl.cerror[0] &&
5480 callout_count >= dat_datctl.cerror[1])
5481 return PCRE2_ERROR_CALLOUT;
5482
5483 if (cb->callout_number == dat_datctl.cfail[0] &&
5484 callout_count >= dat_datctl.cfail[1])
5485 return 1;
5486
5487 return 0;
5488 }
5489
5490
5491
5492 /*************************************************
5493 * Handle *MARK and copy/get tests *
5494 *************************************************/
5495
5496 /* This function is called after complete and partial matches. It runs the
5497 tests for substring extraction.
5498
5499 Arguments:
5500 utf TRUE for utf
5501 capcount return from pcre2_match()
5502
5503 Returns: nothing
5504 */
5505
5506 static void
5507 copy_and_get(BOOL utf, int capcount)
5508 {
5509 int i;
5510 uint8_t *nptr;
5511
5512 /* Test copy strings by number */
5513
5514 for (i = 0; i < MAXCPYGET && dat_datctl.copy_numbers[i] >= 0; i++)
5515 {
5516 int rc;
5517 PCRE2_SIZE length, length2;
5518 uint32_t copybuffer[256];
5519 uint32_t n = (uint32_t)(dat_datctl.copy_numbers[i]);
5520 length = sizeof(copybuffer)/code_unit_size;
5521 PCRE2_SUBSTRING_COPY_BYNUMBER(rc, match_data, n, copybuffer, &length);
5522 if (rc < 0)
5523 {
5524 fprintf(outfile, "Copy substring %d failed (%d): ", n, rc);
5525 PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
5526 PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
5527 fprintf(outfile, "\n");
5528 }
5529 else
5530 {
5531 PCRE2_SUBSTRING_LENGTH_BYNUMBER(rc, match_data, n, &length2);
5532 if (rc < 0)
5533 {
5534 fprintf(outfile, "Get substring %d length failed (%d): ", n, rc);
5535 PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
5536 PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
5537 fprintf(outfile, "\n");
5538 }
5539 else if (length2 != length)
5540 {
5541 fprintf(outfile, "Mismatched substring lengths: %lu %lu\n",
5542 (unsigned long int)length, (unsigned long int)length2);
5543 }
5544 fprintf(outfile, "%2dC ", n);
5545 PCHARSV(copybuffer, 0, length, utf, outfile);
5546 fprintf(outfile, " (%lu)\n", (unsigned long)length);
5547 }
5548 }
5549
5550 /* Test copy strings by name */
5551
5552 nptr = dat_datctl.copy_names;
5553 for (;;)
5554 {
5555 int rc;
5556 int groupnumber;
5557 PCRE2_SIZE length, length2;
5558 uint32_t copybuffer[256];
5559 int namelen = strlen((const char *)nptr);
5560 #if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32
5561 PCRE2_SIZE cnl = namelen;
5562 #endif
5563 if (namelen == 0) break;
5564
5565 #ifdef SUPPORT_PCRE2_8
5566 if (test_mode == PCRE8_MODE) strcpy((char *)pbuffer8, (char *)nptr);
5567 #endif
5568 #ifdef SUPPORT_PCRE2_16
5569 if (test_mode == PCRE16_MODE)(void)to16(nptr, utf, &cnl);
5570 #endif
5571 #ifdef SUPPORT_PCRE2_32
5572 if (test_mode == PCRE32_MODE)(void)to32(nptr, utf, &cnl);
5573 #endif
5574
5575 PCRE2_SUBSTRING_NUMBER_FROM_NAME(groupnumber, compiled_code, pbuffer);
5576 if (groupnumber < 0 && groupnumber != PCRE2_ERROR_NOUNIQUESUBSTRING)
5577 fprintf(outfile, "Number not found for group '%s'\n", nptr);
5578
5579 length = sizeof(copybuffer)/code_unit_size;
5580 PCRE2_SUBSTRING_COPY_BYNAME(rc, match_data, pbuffer, copybuffer, &length);
5581 if (rc < 0)
5582 {
5583 fprintf(outfile, "Copy substring '%s' failed (%d): ", nptr, rc);
5584 PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
5585 PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
5586 fprintf(outfile, "\n");
5587 }
5588 else
5589 {
5590 PCRE2_SUBSTRING_LENGTH_BYNAME(rc, match_data, pbuffer, &length2);
5591 if (rc < 0)
5592 {
5593 fprintf(outfile, "Get substring '%s' length failed (%d): ", nptr, rc);
5594 PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
5595 PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
5596 fprintf(outfile, "\n");
5597 }
5598 else if (length2 != length)
5599 {
5600 fprintf(outfile, "Mismatched substring lengths: %lu %lu\n",
5601 (unsigned long int)length, (unsigned long int)length2);
5602 }
5603 fprintf(outfile, " C ");
5604 PCHARSV(copybuffer, 0, length, utf, outfile);
5605 fprintf(outfile, " (%lu) %s", (unsigned long)length, nptr);
5606 if (groupnumber >= 0) fprintf(outfile, " (group %d)\n", groupnumber);
5607 else fprintf(outfile, " (non-unique)\n");
5608 }
5609 nptr += namelen + 1;
5610 }
5611
5612 /* Test get strings by number */
5613
5614 for (i = 0; i < MAXCPYGET && dat_datctl.get_numbers[i] >= 0; i++)
5615 {
5616 int rc;
5617 PCRE2_SIZE length;
5618 void *gotbuffer;
5619 uint32_t n = (uint32_t)(dat_datctl.get_numbers[i]);
5620 PCRE2_SUBSTRING_GET_BYNUMBER(rc, match_data, n, &gotbuffer, &length);
5621 if (rc < 0)
5622 {
5623 fprintf(outfile, "Get substring %d failed (%d): ", n, rc);
5624 PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
5625 PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
5626 fprintf(outfile, "\n");
5627 }
5628 else
5629 {
5630 fprintf(outfile, "%2dG ", n);
5631 PCHARSV(gotbuffer, 0, length, utf, outfile);
5632 fprintf(outfile, " (%lu)\n", (unsigned long)length);
5633 PCRE2_SUBSTRING_FREE(gotbuffer);
5634 }
5635 }
5636
5637 /* Test get strings by name */
5638
5639 nptr = dat_datctl.get_names;
5640 for (;;)
5641 {
5642 PCRE2_SIZE length;
5643 void *gotbuffer;
5644 int rc;
5645 int groupnumber;
5646 int namelen = strlen((const char *)nptr);
5647 #if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32
5648 PCRE2_SIZE cnl = namelen;
5649 #endif
5650 if (namelen == 0) break;
5651
5652 #ifdef SUPPORT_PCRE2_8
5653 if (test_mode == PCRE8_MODE) strcpy((char *)pbuffer8, (char *)nptr);
5654 #endif
5655 #ifdef SUPPORT_PCRE2_16
5656 if (test_mode == PCRE16_MODE)(void)to16(nptr, utf, &cnl);
5657 #endif
5658 #ifdef SUPPORT_PCRE2_32
5659 if (test_mode == PCRE32_MODE)(void)to32(nptr, utf, &cnl);
5660 #endif
5661
5662 PCRE2_SUBSTRING_NUMBER_FROM_NAME(groupnumber, compiled_code, pbuffer);
5663 if (groupnumber < 0 && groupnumber != PCRE2_ERROR_NOUNIQUESUBSTRING)
5664 fprintf(outfile, "Number not found for group '%s'\n", nptr);
5665
5666 PCRE2_SUBSTRING_GET_BYNAME(rc, match_data, pbuffer, &gotbuffer, &length);
5667 if (rc < 0)
5668 {
5669 fprintf(outfile, "Get substring '%s' failed (%d): ", nptr, rc);
5670 PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
5671 PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
5672 fprintf(outfile, "\n");
5673 }
5674 else
5675 {
5676 fprintf(outfile, " G ");
5677 PCHARSV(gotbuffer, 0, length, utf, outfile);
5678 fprintf(outfile, " (%lu) %s", (unsigned long)length, nptr);
5679 if (groupnumber >= 0) fprintf(outfile, " (group %d)\n", groupnumber);
5680 else fprintf(outfile, " (non-unique)\n");
5681 PCRE2_SUBSTRING_FREE(gotbuffer);
5682 }
5683 nptr += namelen + 1;
5684 }
5685
5686 /* Test getting the complete list of captured strings. */
5687
5688 if ((dat_datctl.control & CTL_GETALL) != 0)
5689 {
5690 int rc;
5691 void **stringlist;
5692 PCRE2_SIZE *lengths;
5693 PCRE2_SUBSTRING_LIST_GET(rc, match_data, &stringlist, &lengths);
5694 if (rc < 0)
5695 {
5696 fprintf(outfile, "get substring list failed (%d): ", rc);
5697 PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
5698 PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
5699 fprintf(outfile, "\n");
5700 }
5701 else
5702 {
5703 for (i = 0; i < capcount; i++)
5704 {
5705 fprintf(outfile, "%2dL ", i);
5706 PCHARSV(stringlist[i], 0, lengths[i], utf, outfile);
5707 putc('\n', outfile);
5708 }
5709 if (stringlist[i] != NULL)
5710 fprintf(outfile, "string list not terminated by NULL\n");
5711 PCRE2_SUBSTRING_LIST_FREE(stringlist);
5712 }
5713 }
5714 }
5715
5716
5717
5718 /*************************************************
5719 * Process a data line *
5720 *************************************************/
5721
5722 /* The line is in buffer; it will not be empty.
5723
5724 Arguments: none
5725
5726 Returns: PR_OK continue processing next line
5727 PR_SKIP skip to a blank line
5728 PR_ABEND abort the pcre2test run
5729 */
5730
5731 static int
5732 process_data(void)
5733 {
5734 PCRE2_SIZE len, ulen, arg_ulen;
5735 uint32_t gmatched;
5736 uint32_t c, k;
5737 uint32_t g_notempty = 0;
5738 uint8_t *p, *pp, *start_rep;
5739 size_t needlen;
5740 void *use_dat_context;
5741 BOOL utf;
5742
5743 #ifdef SUPPORT_PCRE2_8
5744 uint8_t *q8 = NULL;
5745 #endif
5746 #ifdef SUPPORT_PCRE2_16
5747 uint16_t *q16 = NULL;
5748 #endif
5749 #ifdef SUPPORT_PCRE2_32
5750 uint32_t *q32 = NULL;
5751 #endif
5752
5753 /* Copy the default context and data control blocks to the active ones. Then
5754 copy from the pattern the controls that can be set in either the pattern or the
5755 data. This allows them to be overridden in the data line. We do not do this for
5756 options because those that are common apply separately to compiling and
5757 matching. */
5758
5759 DATCTXCPY(dat_context, default_dat_context);
5760 memcpy(&dat_datctl, &def_datctl, sizeof(datctl));
5761 dat_datctl.control |= (pat_patctl.control & CTL_ALLPD);
5762 dat_datctl.control2 |= (pat_patctl.control2 & CTL2_ALLPD);
5763 strcpy((char *)dat_datctl.replacement, (char *)pat_patctl.replacement);
5764
5765 /* Initialize for scanning the data line. */
5766
5767 #ifdef SUPPORT_PCRE2_8
5768 utf = ((((pat_patctl.control & CTL_POSIX) != 0)?
5769 ((pcre2_real_code_8 *)preg.re_pcre2_code)->overall_options :
5770 FLD(compiled_code, overall_options)) & PCRE2_UTF) != 0;
5771 #else
5772 utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
5773 #endif
5774
5775 start_rep = NULL;
5776 len = strlen((const char *)buffer);
5777 while (len > 0 && isspace(buffer[len-1])) len--;
5778 buffer[len] = 0;
5779 p = buffer;
5780 while (isspace(*p)) p++;
5781
5782 /* Check that the data is well-formed UTF-8 if we're in UTF mode. To create
5783 invalid input to pcre2_match(), you must use \x?? or \x{} sequences. */
5784
5785 if (utf)
5786 {
5787 uint8_t *q;
5788 uint32_t cc;
5789 int n = 1;
5790 for (q = p; n > 0 && *q; q += n) n = utf82ord(q, &cc);
5791 if (n <= 0)
5792 {
5793 fprintf(outfile, "** Failed: invalid UTF-8 string cannot be used as input "
5794 "in UTF mode\n");
5795 return PR_OK;
5796 }
5797 }
5798
5799 #ifdef SUPPORT_VALGRIND
5800 /* Mark the dbuffer as addressable but undefined again. */
5801 if (dbuffer != NULL)
5802 {
5803 VALGRIND_MAKE_MEM_UNDEFINED(dbuffer, dbuffer_size);
5804 }
5805 #endif
5806
5807 /* Allocate a buffer to hold the data line; len+1 is an upper bound on
5808 the number of code units that will be needed (though the buffer may have to be
5809 extended if replication is involved). */
5810
5811 needlen = (size_t)((len+1) * code_unit_size);
5812 if (dbuffer == NULL || needlen >= dbuffer_size)
5813 {
5814 while (needlen >= dbuffer_size) dbuffer_size *= 2;
5815 dbuffer = (uint8_t *)realloc(dbuffer, dbuffer_size);
5816 if (dbuffer == NULL)
5817 {
5818 fprintf(stderr, "pcre2test: realloc(%d) failed\n", (int)dbuffer_size);
5819 exit(1);
5820 }
5821 }
5822 SETCASTPTR(q, dbuffer); /* Sets q8, q16, or q32, as appropriate. */
5823
5824 /* Scan the data line, interpreting data escapes, and put the result into a
5825 buffer of the appropriate width. In UTF mode, input is always UTF-8; otherwise,
5826 in 16- and 32-bit modes, it can be forced to UTF-8 by the utf8_input modifier.
5827 */
5828
5829 while ((c = *p++) != 0)
5830 {
5831 int32_t i = 0;
5832 size_t replen;
5833
5834 /* ] may mark the end of a replicated sequence */
5835
5836 if (c == ']' && start_rep != NULL)
5837 {
5838 long li;
5839 char *endptr;
5840 size_t qoffset = CAST8VAR(q) - dbuffer;
5841 size_t rep_offset = start_rep - dbuffer;
5842
5843 if (*p++ != '{')
5844 {
5845 fprintf(outfile, "** Expected '{' after \\[....]\n");
5846 return PR_OK;
5847 }
5848
5849 li = strtol((const char *)p, &endptr, 10);
5850 if (S32OVERFLOW(li))
5851 {
5852 fprintf(outfile, "** Repeat count too large\n");
5853 return PR_OK;
5854 }
5855
5856 p = (uint8_t *)endptr;
5857 if (*p++ != '}')
5858 {
5859 fprintf(outfile, "** Expected '}' after \\[...]{...\n");
5860 return PR_OK;
5861 }
5862
5863 i = (int32_t)li;
5864 if (i-- == 0)
5865 {
5866 fprintf(outfile, "** Zero repeat not allowed\n");
5867 return PR_OK;
5868 }
5869
5870 replen = CAST8VAR(q) - start_rep;
5871 needlen += replen * i;
5872
5873 if (needlen >= dbuffer_size)
5874 {
5875 while (needlen >= dbuffer_size) dbuffer_size *= 2;
5876 dbuffer = (uint8_t *)realloc(dbuffer, dbuffer_size);
5877 if (dbuffer == NULL)
5878 {
5879 fprintf(stderr, "pcre2test: realloc(%d) failed\n", (int)dbuffer_size);
5880 exit(1);
5881 }
5882 SETCASTPTR(q, dbuffer + qoffset);
5883 start_rep = dbuffer + rep_offset;
5884 }
5885
5886 while (i-- > 0)
5887 {
5888 memcpy(CAST8VAR(q), start_rep, replen);
5889 SETPLUS(q, replen/code_unit_size);
5890 }
5891
5892 start_rep = NULL;
5893 continue;
5894 }
5895
5896 /* Handle a non-escaped character. In non-UTF 32-bit mode with utf8_input
5897 set, do the fudge for setting the top bit. */
5898
5899 if (c != '\\')
5900 {
5901 uint32_t topbit = 0;
5902 if (test_mode == PCRE32_MODE && c == 0xff && *p != 0)
5903 {
5904 topbit = 0x80000000;
5905 c = *p++;
5906 }
5907 if ((utf || (pat_patctl.control & CTL_UTF8_INPUT) != 0) &&
5908 HASUTF8EXTRALEN(c)) { GETUTF8INC(c, p); }
5909 c |= topbit;
5910 }
5911
5912 /* Handle backslash escapes */
5913
5914 else switch ((c = *p++))
5915 {
5916 case '\\': break;
5917 case 'a': c = CHAR_BEL; break;
5918 case 'b': c = '\b'; break;
5919 case 'e': c = CHAR_ESC; break;
5920 case 'f': c = '\f'; break;
5921 case 'n': c = '\n'; break;
5922 case 'r': c = '\r'; break;
5923 case 't': c = '\t'; break;
5924 case 'v': c = '\v'; break;
5925
5926 case '0': case '1': case '2': case '3':
5927 case '4': case '5': case '6': case '7':
5928 c -= '0';
5929 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
5930 c = c * 8 + *p++ - '0';
5931 break;
5932
5933 case 'o':
5934 if (*p == '{')
5935 {
5936 uint8_t *pt = p;
5937 c = 0;
5938 for (pt++; isdigit(*pt) && *pt != '8' && *pt != '9'; pt++)
5939 {
5940 if (++i == 12)
5941 fprintf(outfile, "** Too many octal digits in \\o{...} item; "
5942 "using only the first twelve.\n");
5943 else c = c * 8 + *pt - '0';
5944 }
5945 if (*pt == '}') p = pt + 1;
5946 else fprintf(outfile, "** Missing } after \\o{ (assumed)\n");
5947 }
5948 break;
5949
5950 case 'x':
5951 if (*p == '{')
5952 {
5953 uint8_t *pt = p;
5954 c = 0;
5955
5956 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
5957 when isxdigit() is a macro that refers to its argument more than
5958 once. This is banned by the C Standard, but apparently happens in at
5959 least one MacOS environment. */
5960
5961 for (pt++; isxdigit(*pt); pt++)
5962 {
5963 if (++i == 9)
5964 fprintf(outfile, "** Too many hex digits in \\x{...} item; "
5965 "using only the first eight.\n");
5966 else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
5967 }
5968 if (*pt == '}')
5969 {
5970 p = pt + 1;
5971 break;
5972 }
5973 /* Not correct form for \x{...}; fall through */
5974 }
5975
5976 /* \x without {} always defines just one byte in 8-bit mode. This
5977 allows UTF-8 characters to be constructed byte by byte, and also allows
5978 invalid UTF-8 sequences to be made. Just copy the byte in UTF-8 mode.
5979 Otherwise, pass it down as data. */
5980
5981 c = 0;
5982 while (i++ < 2 && isxdigit(*p))
5983 {
5984 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
5985 p++;
5986 }
5987 #if defined SUPPORT_PCRE2_8
5988 if (utf && (test_mode == PCRE8_MODE))
5989 {
5990 *q8++ = c;
5991 continue;
5992 }
5993 #endif
5994 break;
5995
5996 case 0: /* \ followed by EOF allows for an empty line */
5997 p--;
5998 continue;
5999
6000 case '=': /* \= terminates the data, starts modifiers */
6001 goto ENDSTRING;
6002
6003 case '[': /* \[ introduces a replicated character sequence */
6004 if (start_rep != NULL)
6005 {
6006 fprintf(outfile, "** Nested replication is not supported\n");
6007 return PR_OK;
6008 }
6009 start_rep = CAST8VAR(q);
6010 continue;
6011
6012 default:
6013 if (isalnum(c))
6014 {
6015 fprintf(outfile, "** Unrecognized escape sequence \"\\%c\"\n", c);
6016 return PR_OK;
6017 }
6018 }
6019
6020 /* We now have a character value in c that may be greater than 255.
6021 In 8-bit mode we convert to UTF-8 if we are in UTF mode. Values greater
6022 than 127 in UTF mode must have come from \x{...} or octal constructs
6023 because values from \x.. get this far only in non-UTF mode. */
6024
6025 #ifdef SUPPORT_PCRE2_8
6026 if (test_mode == PCRE8_MODE)
6027 {
6028 if (utf)
6029 {
6030 if (c > 0x7fffffff)
6031 {
6032 fprintf(outfile, "** Character \\x{%x} is greater than 0x7fffffff "
6033 "and so cannot be converted to UTF-8\n", c);
6034 return PR_OK;
6035 }
6036 q8 += ord2utf8(c, q8);
6037 }
6038 else
6039 {
6040 if (c > 0xffu)
6041 {
6042 fprintf(outfile, "** Character \\x{%x} is greater than 255 "
6043 "and UTF-8 mode is not enabled.\n", c);
6044 fprintf(outfile, "** Truncation will probably give the wrong "
6045 "result.\n");
6046 }
6047 *q8++ = c;
6048 }
6049 }
6050 #endif
6051 #ifdef SUPPORT_PCRE2_16
6052 if (test_mode == PCRE16_MODE)
6053 {
6054 if (utf)
6055 {
6056 if (c > 0x10ffffu)
6057 {
6058 fprintf(outfile, "** Failed: character \\x{%x} is greater than "
6059 "0x10ffff and so cannot be converted to UTF-16\n", c);
6060 return PR_OK;
6061 }
6062 else if (c >= 0x10000u)
6063 {
6064 c-= 0x10000u;
6065 *q16++ = 0xD800 | (c >> 10);
6066 *q16++ = 0xDC00 | (c & 0x3ff);
6067 }
6068 else
6069 *q16++ = c;
6070 }
6071 else
6072 {
6073 if (c > 0xffffu)
6074 {
6075 fprintf(outfile, "** Character \\x{%x} is greater than 0xffff "
6076 "and UTF-16 mode is not enabled.\n", c);
6077 fprintf(outfile, "** Truncation will probably give the wrong "
6078 "result.\n");
6079 }
6080
6081 *q16++ = c;
6082 }
6083 }
6084 #endif
6085 #ifdef SUPPORT_PCRE2_32
6086 if (test_mode == PCRE32_MODE)
6087 {
6088 *q32++ = c;
6089 }
6090 #endif
6091 }
6092
6093 ENDSTRING:
6094 SET(*q, 0);
6095 len = CASTVAR(uint8_t *, q) - dbuffer; /* Length in bytes */
6096 ulen = len/code_unit_size; /* Length in code units */
6097 arg_ulen = ulen; /* Value to use in match arg */
6098
6099 /* If the string was terminated by \= we must now interpret modifiers. */
6100
6101 if (p[-1] != 0 && !decode_modifiers(p, CTX_DAT, NULL, &dat_datctl))
6102 return PR_OK;
6103
6104 /* Check for mutually exclusive modifiers. At present, these are all in the
6105 first control word. */
6106
6107 for (k = 0; k < sizeof(exclusive_dat_controls)/sizeof(uint32_t); k++)
6108 {
6109 c = dat_datctl.control & exclusive_dat_controls[k];
6110 if (c != 0 && c != (c & (~c+1)))
6111 {
6112 show_controls(c, 0, "** Not allowed together:");
6113 fprintf(outfile, "\n");
6114 return PR_OK;
6115 }
6116 }
6117
6118 if (pat_patctl.replacement[0] != 0 &&
6119 (dat_datctl.control & CTL_NULLCONTEXT) != 0)
6120 {
6121 fprintf(outfile, "** Replacement text is not supported with null_context.\n");
6122 return PR_OK;
6123 }
6124
6125 /* We now have the subject in dbuffer, with len containing the byte length, and
6126 ulen containing the code unit length, with a copy in arg_ulen for use in match
6127 function arguments (this gets changed to PCRE2_ZERO_TERMINATED when the
6128 zero_terminate modifier is present).
6129
6130 Move the data to the end of the buffer so that a read over the end can be
6131 caught by valgrind or other means. If we have explicit valgrind support, mark
6132 the unused start of the buffer unaddressable. If we are using the POSIX
6133 interface, or testing zero-termination, we must include the terminating zero in
6134 the usable data. */
6135
6136 c = code_unit_size * (((pat_patctl.control & CTL_POSIX) +
6137 (dat_datctl.control & CTL_ZERO_TERMINATE) != 0)? 1:0);
6138 pp = memmove(dbuffer + dbuffer_size - len - c, dbuffer, len + c);
6139 #ifdef SUPPORT_VALGRIND
6140 VALGRIND_MAKE_MEM_NOACCESS(dbuffer, dbuffer_size - (len + c));
6141 #endif
6142
6143 /* Now pp points to the subject string. POSIX matching is only possible in
6144 8-bit mode, and it does not support timing or other fancy features. Some were
6145 checked at compile time, but we need to check the match-time settings here. */
6146
6147 #ifdef SUPPORT_PCRE2_8
6148 if ((pat_patctl.control & CTL_POSIX) != 0)
6149 {
6150 int rc;
6151 int eflags = 0;
6152 regmatch_t *pmatch = NULL;
6153 const char *msg = "** Ignored with POSIX interface:";
6154
6155 if (dat_datctl.cerror[0] != CFORE_UNSET || dat_datctl.cerror[1] != CFORE_UNSET)
6156 prmsg(&msg, "callout_error");
6157 if (dat_datctl.cfail[0] != CFORE_UNSET || dat_datctl.cfail[1] != CFORE_UNSET)
6158 prmsg(&msg, "callout_fail");
6159 if (dat_datctl.copy_numbers[0] >= 0 || dat_datctl.copy_names[0] != 0)
6160 prmsg(&msg, "copy");
6161 if (dat_datctl.get_numbers[0] >= 0 || dat_datctl.get_names[0] != 0)
6162 prmsg(&msg, "get");
6163 if (dat_datctl.jitstack != 0) prmsg(&msg, "jitstack");
6164 if (dat_datctl.offset != 0) prmsg(&msg, "offset");
6165
6166 if ((dat_datctl.options & ~POSIX_SUPPORTED_MATCH_OPTIONS) != 0)
6167 {
6168 fprintf(outfile, "%s", msg);
6169 show_match_options(dat_datctl.options & ~POSIX_SUPPORTED_MATCH_OPTIONS);
6170 msg = "";
6171 }
6172 if ((dat_datctl.control & ~POSIX_SUPPORTED_MATCH_CONTROLS) != 0 ||
6173 (dat_datctl.control2 & ~POSIX_SUPPORTED_MATCH_CONTROLS2) != 0)
6174 {
6175 show_controls(dat_datctl.control & ~POSIX_SUPPORTED_MATCH_CONTROLS,
6176 dat_datctl.control2 & ~POSIX_SUPPORTED_MATCH_CONTROLS2, msg);
6177 msg = "";
6178 }
6179
6180 if (msg[0] == 0) fprintf(outfile, "\n");
6181
6182 if (dat_datctl.oveccount > 0)
6183 {
6184 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * dat_datctl.oveccount);
6185 if (pmatch == NULL)
6186 {
6187 fprintf(outfile, "** Failed to get memory for recording matching "
6188 "information (size set = %du)\n", dat_datctl.oveccount);
6189 return PR_OK;
6190 }
6191 }
6192
6193 if ((dat_datctl.options & PCRE2_NOTBOL) != 0) eflags |= REG_NOTBOL;
6194 if ((dat_datctl.options & PCRE2_NOTEOL) != 0) eflags |= REG_NOTEOL;
6195 if ((dat_datctl.options & PCRE2_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
6196
6197 rc = regexec(&preg, (const char *)pp, dat_datctl.oveccount, pmatch, eflags);
6198 if (rc != 0)
6199 {
6200 (void)regerror(rc, &preg, (char *)pbuffer8, pbuffer8_size);
6201 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, pbuffer8);
6202 }
6203 else if ((pat_patctl.control & CTL_POSIX_NOSUB) != 0)
6204 fprintf(outfile, "Matched with REG_NOSUB\n");
6205 else if (dat_datctl.oveccount == 0)
6206 fprintf(outfile, "Matched without capture\n");
6207 else
6208 {
6209 size_t i;
6210 for (i = 0; i < (size_t)dat_datctl.oveccount; i++)
6211 {
6212 if (pmatch[i].rm_so >= 0)
6213 {
6214 PCRE2_SIZE start = pmatch[i].rm_so;
6215 PCRE2_SIZE end = pmatch[i].rm_eo;
6216 if (start > end)
6217 {
6218 start = pmatch[i].rm_eo;
6219 end = pmatch[i].rm_so;
6220 fprintf(outfile, "Start of matched string is beyond its end - "
6221 "displaying from end to start.\n");
6222 }
6223 fprintf(outfile, "%2d: ", (int)i);
6224 PCHARSV(pp, start, end - start, utf, outfile);
6225 fprintf(outfile, "\n");
6226
6227 if ((i == 0 && (dat_datctl.control & CTL_AFTERTEXT) != 0) ||
6228 (dat_datctl.control & CTL_ALLAFTERTEXT) != 0)
6229 {
6230 fprintf(outfile, "%2d+ ", (int)i);
6231 /* Note: don't use the start/end variables here because we want to
6232 show the text from what is reported as the end. */
6233 PCHARSV(pp, pmatch[i].rm_eo, len - pmatch[i].rm_eo, utf, outfile);
6234 fprintf(outfile, "\n"); }
6235 }
6236 }
6237 }
6238 free(pmatch);
6239 return PR_OK;
6240 }
6241 #endif /* SUPPORT_PCRE2_8 */
6242
6243 /* Handle matching via the native interface. Check for consistency of
6244 modifiers. */
6245
6246 if ((dat_datctl.control & (CTL_DFA|CTL_FINDLIMITS)) == (CTL_DFA|CTL_FINDLIMITS))
6247 {
6248 fprintf(outfile, "** Finding match limits is not relevant for DFA matching: ignored\n");
6249 dat_datctl.control &= ~CTL_FINDLIMITS;
6250 }
6251
6252 /* ALLUSEDTEXT is not supported with JIT, but JIT is not used with DFA
6253 matching, even if the JIT compiler was used. */
6254
6255 if ((dat_datctl.control & (CTL_ALLUSEDTEXT|CTL_DFA)) == CTL_ALLUSEDTEXT &&
6256 FLD(compiled_code, executable_jit) != NULL)
6257 {
6258 fprintf(outfile, "** Showing all consulted text is not supported by JIT: ignored\n");
6259 dat_datctl.control &= ~CTL_ALLUSEDTEXT;
6260 }
6261
6262 /* Handle passing the subject as zero-terminated. */
6263
6264 if ((dat_datctl.control & CTL_ZERO_TERMINATE) != 0)
6265 arg_ulen = PCRE2_ZERO_TERMINATED;
6266
6267 /* The nullcontext modifier is used to test calling pcre2_[jit_]match() with a
6268 NULL context. */
6269
6270 use_dat_context = ((dat_datctl.control & CTL_NULLCONTEXT) != 0)?
6271 NULL : PTR(dat_context);
6272
6273 /* Enable display of malloc/free if wanted. */
6274
6275 show_memory = (dat_datctl.control & CTL_MEMORY) != 0;
6276
6277 /* Create and assign a JIT stack if requested. */
6278
6279 if (dat_datctl.jitstack != 0)
6280 {
6281 if (dat_datctl.jitstack != jit_stack_size)
6282 {
6283 PCRE2_JIT_STACK_FREE(jit_stack);
6284 PCRE2_JIT_STACK_CREATE(jit_stack, 1, dat_datctl.jitstack * 1024, NULL);
6285 jit_stack_size = dat_datctl.jitstack;
6286 }
6287 PCRE2_JIT_STACK_ASSIGN(dat_context, jit_callback, jit_stack);
6288 }
6289
6290 /* Or de-assign */
6291
6292 else if (jit_stack != NULL)
6293 {
6294 PCRE2_JIT_STACK_ASSIGN(dat_context, NULL, NULL);
6295 PCRE2_JIT_STACK_FREE(jit_stack);
6296 jit_stack = NULL;
6297 jit_stack_size = 0;
6298 }
6299
6300 /* When no JIT stack is assigned, we must ensure that there is a JIT callback
6301 if we want to verify that JIT was actually used. */
6302
6303 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_stack == NULL)
6304 {
6305 PCRE2_JIT_STACK_ASSIGN(dat_context, jit_callback, NULL);
6306 }
6307
6308 /* Adjust match_data according to size of offsets required. A size of zero
6309 causes a new match data block to be obtained that exactly fits the pattern. */
6310
6311 if (dat_datctl.oveccount == 0)
6312 {
6313 PCRE2_MATCH_DATA_FREE(match_data);
6314 PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(match_data, compiled_code, NULL);
6315 PCRE2_GET_OVECTOR_COUNT(max_oveccount, match_data);
6316 }
6317 else if (dat_datctl.oveccount <= max_oveccount)
6318 {
6319 SETFLD(match_data, oveccount, dat_datctl.oveccount);
6320 }
6321 else
6322 {
6323 max_oveccount = dat_datctl.oveccount;
6324 PCRE2_MATCH_DATA_FREE(match_data);
6325 PCRE2_MATCH_DATA_CREATE(match_data, max_oveccount, NULL);
6326 }
6327
6328 if (CASTVAR(void *, match_data) == NULL)
6329 {
6330 fprintf(outfile, "** Failed to get memory for recording matching "
6331 "information (size requested: %d)\n", dat_datctl.oveccount);
6332 max_oveccount = 0;
6333 return PR_OK;
6334 }
6335
6336 /* Replacement processing is ignored for DFA matching. */
6337
6338 if (dat_datctl.replacement[0] != 0 && (dat_datctl.control & CTL_DFA) != 0)
6339 {
6340 fprintf(outfile, "** Ignored for DFA matching: replace\n");
6341 dat_datctl.replacement[0] = 0;
6342 }
6343
6344 /* If a replacement string is provided, call pcre2_substitute() instead of one
6345 of the matching functions. First we have to convert the replacement string to
6346 the appropriate width. */
6347
6348 if (dat_datctl.replacement[0] != 0)
6349 {
6350 int rc;
6351 uint8_t *pr;
6352 uint8_t rbuffer[REPLACE_BUFFSIZE];
6353 uint8_t nbuffer[REPLACE_BUFFSIZE];
6354 uint32_t xoptions;
6355 PCRE2_SIZE rlen, nsize, erroroffset;
6356 BOOL badutf = FALSE;
6357
6358 #ifdef SUPPORT_PCRE2_8
6359 uint8_t *r8 = NULL;
6360 #endif
6361 #ifdef SUPPORT_PCRE2_16
6362 uint16_t *r16 = NULL;
6363 #endif
6364 #ifdef SUPPORT_PCRE2_32
6365 uint32_t *r32 = NULL;
6366 #endif
6367
6368 if (timeitm)
6369 fprintf(outfile, "** Timing is not supported with replace: ignored\n");
6370
6371 xoptions = (((dat_datctl.control & CTL_GLOBAL) == 0)? 0 :
6372 PCRE2_SUBSTITUTE_GLOBAL) |
6373 (((dat_datctl.control2 & CTL2_SUBSTITUTE_EXTENDED) == 0)? 0 :
6374 PCRE2_SUBSTITUTE_EXTENDED) |
6375 (((dat_datctl.control2 & CTL2_SUBSTITUTE_OVERFLOW_LENGTH) == 0)? 0 :
6376 PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) |
6377 (((dat_datctl.control2 & CTL2_SUBSTITUTE_UNKNOWN_UNSET) == 0)? 0 :
6378 PCRE2_SUBSTITUTE_UNKNOWN_UNSET) |
6379 (((dat_datctl.control2 & CTL2_SUBSTITUTE_UNSET_EMPTY) == 0)? 0 :
6380 PCRE2_SUBSTITUTE_UNSET_EMPTY);
6381
6382 SETCASTPTR(r, rbuffer); /* Sets r8, r16, or r32, as appropriate. */
6383 pr = dat_datctl.replacement;
6384
6385 /* If the replacement starts with '[<number>]' we interpret that as length
6386 value for the replacement buffer. */
6387
6388 nsize = REPLACE_BUFFSIZE/code_unit_size;
6389 if (*pr == '[')
6390 {
6391 PCRE2_SIZE n = 0;
6392 while ((c = *(++pr)) >= CHAR_0 && c <= CHAR_9) n = n * 10 + c - CHAR_0;
6393 if (*pr++ != ']')
6394 {
6395 fprintf(outfile, "Bad buffer size in replacement string\n");
6396 return PR_OK;
6397 }
6398 if (n > nsize)
6399 {
6400 fprintf(outfile, "Replacement buffer setting (%lu) is too large "
6401 "(max %lu)\n", (unsigned long int)n, (unsigned long int)nsize);
6402 return PR_OK;
6403 }
6404 nsize = n;
6405 }
6406
6407 /* Now copy the replacement string to a buffer of the appropriate width. No
6408 escape processing is done for replacements. In UTF mode, check for an invalid
6409 UTF-8 input string, and if it is invalid, just copy its code units without
6410 UTF interpretation. This provides a means of checking that an invalid string
6411 is detected. Otherwise, UTF-8 can be used to include wide characters in a
6412 replacement. */
6413
6414 if (utf) badutf = valid_utf(pr, strlen((const char *)pr), &erroroffset);
6415
6416 /* Not UTF or invalid UTF-8: just copy the code units. */
6417
6418 if (!utf || badutf)
6419 {
6420 while ((c = *pr++) != 0)
6421 {
6422 #ifdef SUPPORT_PCRE2_8
6423 if (test_mode == PCRE8_MODE) *r8++ = c;
6424 #endif
6425 #ifdef SUPPORT_PCRE2_16
6426 if (test_mode == PCRE16_MODE) *r16++ = c;
6427 #endif
6428 #ifdef SUPPORT_PCRE2_32
6429 if (test_mode == PCRE32_MODE) *r32++ = c;
6430 #endif
6431 }
6432 }
6433
6434 /* Valid UTF-8 replacement string */
6435
6436 else while ((c = *pr++) != 0)
6437 {
6438 if (HASUTF8EXTRALEN(c)) { GETUTF8INC(c, pr); }
6439
6440 #ifdef SUPPORT_PCRE2_8
6441 if (test_mode == PCRE8_MODE) r8 += ord2utf8(c, r8);
6442 #endif
6443
6444 #ifdef SUPPORT_PCRE2_16
6445 if (test_mode == PCRE16_MODE)
6446 {
6447 if (c >= 0x10000u)
6448 {
6449 c-= 0x10000u;
6450 *r16++ = 0xD800 | (c >> 10);
6451 *r16++ = 0xDC00 | (c & 0x3ff);
6452 }
6453 else *r16++ = c;
6454 }
6455 #endif
6456
6457 #ifdef SUPPORT_PCRE2_32
6458 if (test_mode == PCRE32_MODE) *r32++ = c;
6459 #endif
6460 }
6461
6462 SET(*r, 0);
6463 if ((dat_datctl.control & CTL_ZERO_TERMINATE) != 0)
6464 rlen = PCRE2_ZERO_TERMINATED;
6465 else
6466 rlen = (CASTVAR(uint8_t *, r) - rbuffer)/code_unit_size;
6467 PCRE2_SUBSTITUTE(rc, compiled_code, pp, arg_ulen, dat_datctl.offset,
6468 dat_datctl.options|xoptions, match_data, dat_context,
6469 rbuffer, rlen, nbuffer, &nsize);
6470
6471 if (rc < 0)
6472 {
6473 PCRE2_SIZE msize;
6474 fprintf(outfile, "Failed: error %d", rc);
6475 if (rc != PCRE2_ERROR_NOMEMORY && nsize != PCRE2_UNSET)
6476 fprintf(outfile, " at offset %ld in replacement", (long int)nsize);
6477 fprintf(outfile, ": ");
6478 PCRE2_GET_ERROR_MESSAGE(msize, rc, pbuffer);
6479 PCHARSV(CASTVAR(void *, pbuffer), 0, msize, FALSE, outfile);
6480 if (rc == PCRE2_ERROR_NOMEMORY &&
6481 (xoptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) != 0)
6482 fprintf(outfile, ": %ld code units are needed", (long int)nsize);
6483 }
6484 else
6485 {
6486 fprintf(outfile, "%2d: ", rc);
6487 PCHARSV(nbuffer, 0, nsize, utf, outfile);
6488 }
6489
6490 fprintf(outfile, "\n");
6491 } /* End of substitution handling */
6492
6493 /* When a replacement string is not provided, run a loop for global matching
6494 with one of the basic matching functions. */
6495
6496 else for (gmatched = 0;; gmatched++)
6497 {
6498 PCRE2_SIZE j;
6499 int capcount;
6500 PCRE2_SIZE *ovector;
6501 PCRE2_SIZE ovecsave[2];
6502
6503 ovector = FLD(match_data, ovector);
6504
6505 /* After the first time round a global loop, for a normal global (/g)
6506 iteration, save the current ovector[0,1] so that we can check that they do
6507 change each time. Otherwise a matching bug that returns the same string
6508 causes an infinite loop. It has happened! */
6509
6510 if (gmatched > 0 && (dat_datctl.control & CTL_GLOBAL) != 0)
6511 {
6512 ovecsave[0] = ovector[0];
6513 ovecsave[1] = ovector[1];
6514 }
6515
6516 /* For altglobal (or first time round the loop), set an "unset" value. */
6517
6518 else ovecsave[0] = ovecsave[1] = PCRE2_UNSET;
6519
6520 /* Fill the ovector with junk to detect elements that do not get set
6521 when they should be. */
6522
6523 for (j = 0; j < 2*dat_datctl.oveccount; j++) ovector[j] = JUNK_OFFSET;
6524
6525 /* When matching is via pcre2_match(), we will detect the use of JIT via the
6526 stack callback function. */
6527
6528 jit_was_used = (pat_patctl.control & CTL_JITFAST) != 0;
6529
6530 /* Do timing if required. */
6531
6532 if (timeitm > 0)
6533 {
6534 int i;
6535 clock_t start_time, time_taken;
6536
6537 if ((dat_datctl.control & CTL_DFA) != 0)
6538 {
6539 if ((dat_datctl.options & PCRE2_DFA_RESTART) != 0)
6540 {
6541 fprintf(outfile, "Timing DFA restarts is not supported\n");
6542 return PR_OK;
6543 }
6544 if (dfa_workspace == NULL)
6545 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
6546 start_time = clock();
6547 for (i = 0; i < timeitm; i++)
6548 {
6549 PCRE2_DFA_MATCH(capcount, compiled_code, pp, arg_ulen,
6550 dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
6551 use_dat_context, dfa_workspace, DFA_WS_DIMENSION);
6552 }
6553 }
6554
6555 else if ((pat_patctl.control & CTL_JITFAST) != 0)
6556 {
6557 start_time = clock();
6558 for (i = 0; i < timeitm; i++)
6559 {
6560 PCRE2_JIT_MATCH(capcount, compiled_code, pp, arg_ulen,
6561 dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
6562 use_dat_context);
6563 }
6564 }
6565
6566 else
6567 {
6568 start_time = clock();
6569 for (i = 0; i < timeitm; i++)
6570 {
6571 PCRE2_MATCH(capcount, compiled_code, pp, arg_ulen,
6572 dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
6573 use_dat_context);
6574 }
6575 }
6576 total_match_time += (time_taken = clock() - start_time);
6577 fprintf(outfile, "Match time %.4f milliseconds\n",
6578 (((double)time_taken * 1000.0) / (double)timeitm) /
6579 (double)CLOCKS_PER_SEC);
6580 }
6581
6582 /* Find the match and depth limits if requested. The depth limit
6583 is not relevant for JIT. */
6584
6585 if ((dat_datctl.control & CTL_FINDLIMITS) != 0)
6586 {
6587 capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_MATCHLIMIT, "match");
6588 if (FLD(compiled_code, executable_jit) == NULL)
6589 (void)check_match_limit(pp, arg_ulen, PCRE2_ERROR_DEPTHLIMIT,
6590 "depth");
6591 }
6592
6593 /* Otherwise just run a single match, setting up a callout if required (the
6594 default). There is a copy of the pattern in pbuffer8 for use by callouts. */
6595
6596 else
6597 {
6598 if ((dat_datctl.control & CTL_CALLOUT_NONE) == 0)
6599 {
6600 PCRE2_SET_CALLOUT(dat_context, callout_function,
6601 (void *)(&dat_datctl.callout_data));
6602 first_callout = TRUE;
6603 last_callout_mark = NULL;
6604 callout_count = 0;
6605 }
6606 else
6607 {
6608