/[pcre]/code/trunk/pcre_jit_test.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_test.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1371 - (show annotations)
Fri Oct 11 10:59:41 2013 UTC (6 years, 1 month ago) by zherczeg
File MIME type: text/plain
File size: 70260 byte(s)
Support OP_DNREF and OP_DNREFI in JIT.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Main Library written by Philip Hazel
9 Copyright (c) 1997-2012 University of Cambridge
10
11 This JIT compiler regression test program was written by Zoltan Herczeg
12 Copyright (c) 2010-2012
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include <stdio.h>
48 #include <string.h>
49 #include "pcre.h"
50
51
52 #include "pcre_internal.h"
53
54 #define PCRE_BUG 0x80000000
55
56 /*
57 Letter characters:
58 \xe6\x92\xad = 0x64ad = 25773 (kanji)
59 Non-letter characters:
60 \xc2\xa1 = 0xa1 = (Inverted Exclamation Mark)
61 \xf3\xa9\xb7\x80 = 0xe9dc0 = 957888
62 \xed\xa0\x80 = 55296 = 0xd800 (Invalid UTF character)
63 \xed\xb0\x80 = 56320 = 0xdc00 (Invalid UTF character)
64 Newlines:
65 \xc2\x85 = 0x85 = 133 (NExt Line = NEL)
66 \xe2\x80\xa8 = 0x2028 = 8232 (Line Separator)
67 Othercase pairs:
68 \xc3\xa9 = 0xe9 = 233 (e')
69 \xc3\x89 = 0xc9 = 201 (E')
70 \xc3\xa1 = 0xe1 = 225 (a')
71 \xc3\x81 = 0xc1 = 193 (A')
72 \xc8\xba = 0x23a = 570
73 \xe2\xb1\xa5 = 0x2c65 = 11365
74 \xe1\xbd\xb8 = 0x1f78 = 8056
75 \xe1\xbf\xb8 = 0x1ff8 = 8184
76 \xf0\x90\x90\x80 = 0x10400 = 66560
77 \xf0\x90\x90\xa8 = 0x10428 = 66600
78 Mark property:
79 \xcc\x8d = 0x30d = 781
80 Special:
81 \xdf\xbf = 0x7ff = 2047 (highest 2 byte character)
82 \xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character)
83 \xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character)
84 \xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character)
85 \xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character)
86 */
87
88 static int regression_tests(void);
89
90 int main(void)
91 {
92 int jit = 0;
93 #if defined SUPPORT_PCRE8
94 pcre_config(PCRE_CONFIG_JIT, &jit);
95 #elif defined SUPPORT_PCRE16
96 pcre16_config(PCRE_CONFIG_JIT, &jit);
97 #elif defined SUPPORT_PCRE32
98 pcre32_config(PCRE_CONFIG_JIT, &jit);
99 #endif
100 if (!jit) {
101 printf("JIT must be enabled to run pcre_jit_test\n");
102 return 1;
103 }
104 return regression_tests();
105 }
106
107 /* --------------------------------------------------------------------------------------- */
108
109 #if !(defined SUPPORT_PCRE8) && !(defined SUPPORT_PCRE16) && !(defined SUPPORT_PCRE32)
110 #error SUPPORT_PCRE8 or SUPPORT_PCRE16 or SUPPORT_PCRE32 must be defined
111 #endif
112
113 #define MUA (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
114 #define MUAP (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
115 #define CMUA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
116 #define CMUAP (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
117 #define MA (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
118 #define MAP (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
119 #define CMA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
120
121 #define OFFSET_MASK 0x00ffff
122 #define F_NO8 0x010000
123 #define F_NO16 0x020000
124 #define F_NO32 0x020000
125 #define F_NOMATCH 0x040000
126 #define F_DIFF 0x080000
127 #define F_FORCECONV 0x100000
128 #define F_PROPERTY 0x200000
129 #define F_STUDY 0x400000
130
131 struct regression_test_case {
132 int flags;
133 int start_offset;
134 const char *pattern;
135 const char *input;
136 };
137
138 static struct regression_test_case regression_test_cases[] = {
139 /* Constant strings. */
140 { MUA, 0, "AbC", "AbAbC" },
141 { MUA, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
142 { CMUA, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
143 { MA, 0, "[^a]", "aAbB" },
144 { CMA, 0, "[^m]", "mMnN" },
145 { MA, 0, "a[^b][^#]", "abacd" },
146 { CMA, 0, "A[^B][^E]", "abacd" },
147 { CMUA, 0, "[^x][^#]", "XxBll" },
148 { MUA, 0, "[^a]", "aaa\xc3\xa1#Ab" },
149 { CMUA, 0, "[^A]", "aA\xe6\x92\xad" },
150 { MUA, 0, "\\W(\\W)?\\w", "\r\n+bc" },
151 { MUA, 0, "\\W(\\W)?\\w", "\n\r+bc" },
152 { MUA, 0, "\\W(\\W)?\\w", "\r\r+bc" },
153 { MUA, 0, "\\W(\\W)?\\w", "\n\n+bc" },
154 { MUA, 0, "[axd]", "sAXd" },
155 { CMUA, 0, "[axd]", "sAXd" },
156 { CMUA, 0 | F_NOMATCH, "[^axd]", "DxA" },
157 { MUA, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
158 { MUA, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
159 { CMUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
160 { MUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
161 { MUA, 0, "[^a]", "\xc2\x80[]" },
162 { CMUA, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
163 { CMA, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
164 { PCRE_CASELESS, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
165 { PCRE_CASELESS, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
166 { PCRE_CASELESS, 0, "a1", "Aa1" },
167 { MA, 0, "\\Ca", "cda" },
168 { CMA, 0, "\\Ca", "CDA" },
169 { MA, 0 | F_NOMATCH, "\\Cx", "cda" },
170 { CMA, 0 | F_NOMATCH, "\\Cx", "CDA" },
171 { CMUAP, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
172 { CMUAP, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
173 { CMUAP, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
174 { CMUAP, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
175
176 /* Assertions. */
177 { MUA, 0, "\\b[^A]", "A_B#" },
178 { MA, 0 | F_NOMATCH, "\\b\\W", "\n*" },
179 { MUA, 0, "\\B[^,]\\b[^s]\\b", "#X" },
180 { MAP, 0, "\\B", "_\xa1" },
181 { MAP, 0, "\\b_\\b[,A]\\B", "_," },
182 { MUAP, 0, "\\b", "\xe6\x92\xad!" },
183 { MUAP, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
184 { MUAP, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
185 { MUAP, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
186 { MUA, 0 | F_NOMATCH, "\\b.", "\xcd\xbe" },
187 { CMUAP, 0, "\\By", "\xf0\x90\x90\xa8y" },
188 { MA, 0 | F_NOMATCH, "\\R^", "\n" },
189 { MA, 1 | F_NOMATCH, "^", "\n" },
190 { 0, 0, "^ab", "ab" },
191 { 0, 0 | F_NOMATCH, "^ab", "aab" },
192 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "^a", "\r\raa\n\naa\r\naa" },
193 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
194 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^-", "a--b--\x85--" },
195 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xe2\x80\xa8--" },
196 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xc2\x85--" },
197 { 0, 0, "ab$", "ab" },
198 { 0, 0 | F_NOMATCH, "ab$", "abab\n\n" },
199 { PCRE_DOLLAR_ENDONLY, 0 | F_NOMATCH, "ab$", "abab\r\n" },
200 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "a$", "\r\raa\n\naa\r\naa" },
201 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aaa" },
202 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "#$", "#\xc2\x85###\r#" },
203 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "#$", "#\xe2\x80\xa9" },
204 { PCRE_NOTBOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "^a", "aa\naa" },
205 { PCRE_NOTBOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },
206 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\naa" },
207 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\r\n" },
208 { PCRE_UTF8 | PCRE_DOLLAR_ENDONLY | PCRE_NEWLINE_ANY, 0 | F_PROPERTY, "\\p{Any}{2,}$", "aa\r\n" },
209 { PCRE_NOTEOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },
210 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa" },
211 { PCRE_NEWLINE_CR | PCRE_UTF8, 0, "a\\Z", "aaa\r" },
212 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa\n" },
213 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r" },
214 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
215 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r\n" },
216 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
217 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
218 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
219 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
220 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
221 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
222 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
223 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
224 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
225 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xc2\x85" },
226 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
227 { MA, 0, "\\Aa", "aaa" },
228 { MA, 1 | F_NOMATCH, "\\Aa", "aaa" },
229 { MA, 1, "\\Ga", "aaa" },
230 { MA, 1 | F_NOMATCH, "\\Ga", "aba" },
231 { MA, 0, "a\\z", "aaa" },
232 { MA, 0 | F_NOMATCH, "a\\z", "aab" },
233
234 /* Brackets. */
235 { MUA, 0, "(ab|bb|cd)", "bacde" },
236 { MUA, 0, "(?:ab|a)(bc|c)", "ababc" },
237 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
238 { CMUA, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
239 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
240 { MUA, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
241
242 /* Greedy and non-greedy ? operators. */
243 { MUA, 0, "(?:a)?a", "laab" },
244 { CMUA, 0, "(A)?A", "llaab" },
245 { MUA, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
246 { MUA, 0, "(a)?a", "manm" },
247 { CMUA, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
248 { MUA, 0, "(a|b)?\?d((?:e)?)", "abcde" },
249 { MUA, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
250
251 /* Greedy and non-greedy + operators */
252 { MUA, 0, "(aa)+aa", "aaaaaaa" },
253 { MUA, 0, "(aa)+?aa", "aaaaaaa" },
254 { MUA, 0, "(?:aba|ab|a)+l", "ababamababal" },
255 { MUA, 0, "(?:aba|ab|a)+?l", "ababamababal" },
256 { MUA, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
257 { MUA, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
258 { MUA, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
259
260 /* Greedy and non-greedy * operators */
261 { CMUA, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
262 { MUA, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
263 { MUA, 0, "(aa|ab)*ab", "aaabaaab" },
264 { CMUA, 0, "(aa|Ab)*?aB", "aaabaaab" },
265 { MUA, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
266 { MUA, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
267 { MA, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
268 { MA, 0, "((?:a|)*){0}a", "a" },
269
270 /* Combining ? + * operators */
271 { MUA, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
272 { MUA, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
273 { MUA, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
274 { MUA, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
275 { MUA, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
276
277 /* Single character iterators. */
278 { MUA, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
279 { MUA, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
280 { MUA, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
281 { MUA, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
282 { MUA, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
283 { MUA, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
284 { MUA, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
285 { MUA, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
286 { MUA, 0, "(ba{2})+c", "baabaaabacbaabaac" },
287 { MUA, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
288 { MUA, 0, "(a?+[^b])+", "babaacacb" },
289 { MUA, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
290 { CMUA, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
291 { CMUA, 0, "[c-f]+k", "DemmFke" },
292 { MUA, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
293 { MUA, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
294 { CMUA, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
295 { CMUA, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
296 { CMUA, 0, "[ace]{3,}", "AcbDAcEEcEd" },
297 { CMUA, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
298 { MUA, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
299 { CMUA, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
300 { MUA, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
301 { MUA, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
302 { MUA, 0, "\\b\\w+\\B", "x,a_cd" },
303 { MUAP, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
304 { CMUA, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
305 { CMUAP, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" },
306 { CMUA, 0, "[^\xf0\x90\x90\x80]{2,4}@", "\xf0\x90\x90\xa8\xf0\x90\x90\x80###\xf0\x90\x90\x80@@@" },
307 { CMUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
308 { MUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
309 { MUA, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" },
310
311 /* Bracket repeats with limit. */
312 { MUA, 0, "(?:(ab){2}){5}M", "abababababababababababM" },
313 { MUA, 0, "(?:ab|abab){1,5}M", "abababababababababababM" },
314 { MUA, 0, "(?>ab|abab){1,5}M", "abababababababababababM" },
315 { MUA, 0, "(?:ab|abab){1,5}?M", "abababababababababababM" },
316 { MUA, 0, "(?>ab|abab){1,5}?M", "abababababababababababM" },
317 { MUA, 0, "(?:(ab){1,4}?){1,3}?M", "abababababababababababababM" },
318 { MUA, 0, "(?:(ab){1,4}){1,3}abababababababababababM", "ababababababababababababM" },
319 { MUA, 0 | F_NOMATCH, "(?:(ab){1,4}){1,3}abababababababababababM", "abababababababababababM" },
320 { MUA, 0, "(ab){4,6}?M", "abababababababM" },
321
322 /* Basic character sets. */
323 { MUA, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
324 { MUA, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
325 { MUA, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
326 { MUA, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
327 { MUA, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
328 { MUA, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
329
330 /* Unicode properties. */
331 { MUAP, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
332 { MUAP, 0 | F_PROPERTY, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
333 { MUAP, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
334 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}]", "abc" },
335 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}]", "abc" },
336 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
337 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
338 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
339 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
340 { MUAP, 0 | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
341 { MUAP, 0 | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
342 { MUAP, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
343 { CMUAP, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
344 { MUAP, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
345 { MUAP, 0 | F_PROPERTY, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
346 { MUA, 0 | F_PROPERTY, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
347 { CMUAP, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
348 { MUAP, 0 | F_PROPERTY, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
349 { MUAP, 0 | F_PROPERTY, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
350 { PCRE_UCP, 0, "[a-b\\s]{2,5}[^a]", "AB baaa" },
351
352 /* Possible empty brackets. */
353 { MUA, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
354 { MUA, 0, "(|ab||bc|a)+d", "abcxabcabd" },
355 { MUA, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
356 { MUA, 0, "(|ab||bc|a)*d", "abcxabcabd" },
357 { MUA, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
358 { MUA, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
359 { MUA, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
360 { MUA, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
361 { MUA, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
362 { MUA, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
363
364 /* Start offset. */
365 { MUA, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
366 { MUA, 4 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
367 { MUA, 2 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
368 { MUA, 1, "(\\w\\W\\w)+", "ab#d" },
369
370 /* Newline. */
371 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
372 { PCRE_MULTILINE | PCRE_NEWLINE_CR, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
373 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{1,3}[^#]", "\r\n##...." },
374
375 /* Any character except newline or any newline. */
376 { PCRE_NEWLINE_CRLF, 0, ".", "\r" },
377 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
378 { PCRE_NEWLINE_ANYCRLF, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
379 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
380 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
381 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0 | F_NOMATCH, ".(.).", "\xe2\x80\xa8\nb\r" },
382 { PCRE_NEWLINE_ANY, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
383 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
384 { PCRE_BSR_ANYCRLF, 0, "\\R", "\r" },
385 { PCRE_BSR_ANYCRLF, 0, "\\R", "\x85#\r\n#" },
386 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\xe2\x80\xa8#c" },
387 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\r\nc" },
388 { PCRE_NEWLINE_CRLF | PCRE_BSR_UNICODE | PCRE_UTF8, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
389 { MUA, 0 | F_NOMATCH, "\\R+", "ab" },
390 { MUA, 0, "\\R+", "ab\r\n\r" },
391 { MUA, 0, "\\R*", "ab\r\n\r" },
392 { MUA, 0, "\\R*", "\r\n\r" },
393 { MUA, 0, "\\R{2,4}", "\r\nab\r\r" },
394 { MUA, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
395 { MUA, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
396 { MUA, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
397 { MUA, 0 | F_NOMATCH, "\\R+\\R\\R", "\r\n\r\n" },
398 { MUA, 0, "\\R+\\R\\R", "\r\r\r" },
399 { MUA, 0, "\\R*\\R\\R", "\n\r" },
400 { MUA, 0 | F_NOMATCH, "\\R{2,4}\\R\\R", "\r\r\r" },
401 { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
402
403 /* Atomic groups (no fallback from "next" direction). */
404 { MUA, 0 | F_NOMATCH, "(?>ab)ab", "bab" },
405 { MUA, 0 | F_NOMATCH, "(?>(ab))ab", "bab" },
406 { MUA, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
407 "bababcdedefgheijijklmlmnop" },
408 { MUA, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
409 { MUA, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
410 { MUA, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
411 { MUA, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
412 { MUA, 0, "((?>a|)+?)b", "aaacaaab" },
413 { MUA, 0, "(?>x|)*$", "aaa" },
414 { MUA, 0, "(?>(x)|)*$", "aaa" },
415 { MUA, 0, "(?>x|())*$", "aaa" },
416 { MUA, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
417 { MUA, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
418 { MUA, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
419 { MUA, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
420 { MUA, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
421 { MUA, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
422 { MUA, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
423 { MUA, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
424 { MUA, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
425 { MUA, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
426 { MUA, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
427 { MUA, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
428 { MUA, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
429 { MUA, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
430 { CMA, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
431 { MUA, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
432 { MUA, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
433 { MUA, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d" },
434 { MUA, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
435 { MUA, 0 | F_PROPERTY, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
436 { MUA, 0 | F_PROPERTY, "\\X{2,4}", "abcdef" },
437 { MUA, 0 | F_PROPERTY, "\\X{2,4}?", "abcdef" },
438 { MUA, 0 | F_NOMATCH | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d##" },
439 { MUA, 0 | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
440 { MUA, 0, "(c(ab)?+ab)+", "cabcababcab" },
441 { MUA, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
442
443 /* Possessive quantifiers. */
444 { MUA, 0, "(?:a|b)++m", "mababbaaxababbaam" },
445 { MUA, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
446 { MUA, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
447 { MUA, 0, "(a|b)++m", "mababbaaxababbaam" },
448 { MUA, 0, "(a|b)*+m", "mababbaaxababbaam" },
449 { MUA, 0, "(a|b)*+m", "ababbaaxababbaam" },
450 { MUA, 0, "(a|b(*ACCEPT))++m", "maaxab" },
451 { MUA, 0, "(?:b*)++m", "bxbbxbbbxm" },
452 { MUA, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
453 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxm" },
454 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
455 { MUA, 0, "(b*)++m", "bxbbxbbbxm" },
456 { MUA, 0, "(b*)++m", "bxbbxbbbxbbm" },
457 { MUA, 0, "(b*)*+m", "bxbbxbbbxm" },
458 { MUA, 0, "(b*)*+m", "bxbbxbbbxbbm" },
459 { MUA, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
460 { MUA, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
461 { MUA, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
462 { MUA, 0, "(a|(b))++m", "mababbaaxababbaam" },
463 { MUA, 0, "((a)|b)*+m", "mababbaaxababbaam" },
464 { MUA, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
465 { MUA, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
466 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxm" },
467 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
468 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
469 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
470 { MUA, 0, "((b*))++m", "bxbbxbbbxm" },
471 { MUA, 0, "((b*))++m", "bxbbxbbbxbbm" },
472 { MUA, 0, "((b*))*+m", "bxbbxbbbxm" },
473 { MUA, 0, "((b*))*+m", "bxbbxbbbxbbm" },
474 { MUA, 0 | F_NOMATCH, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
475 { MUA, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
476 { MUA, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
477 { MUA, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
478 { MUA, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
479
480 /* Back references. */
481 { MUA, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
482 { CMUA, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
483 { CMA, 0, "(a{2,4})\\1", "AaAaaAaA" },
484 { MUA, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
485 { MUA, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
486 { MUA, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
487 { MUA, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
488 { MUA, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
489 { MUA, 0, "(?:(aa)|b)\\1?b", "bb" },
490 { CMUA, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
491 { MUA, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
492 { CMUA, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
493 { MUA, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
494 { CMA, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
495 { MUA, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
496 { MUA, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
497 { MA, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
498 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
499 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{0,2}", "wwwww." },
500 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwww" },
501 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwwww" },
502 { PCRE_UCP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
503 { CMUAP, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" },
504 { MUA | PCRE_DUPNAMES, 0 | F_NOMATCH, "\\k<A>{1,3}(?<A>aa)(?<A>bb)", "aabb" },
505 { MUA | PCRE_DUPNAMES | PCRE_JAVASCRIPT_COMPAT, 0, "\\k<A>{1,3}(?<A>aa)(?<A>bb)", "aabb" },
506 { MUA | PCRE_DUPNAMES | PCRE_JAVASCRIPT_COMPAT, 0, "\\k<A>*(?<A>aa)(?<A>bb)", "aabb" },
507 { MUA | PCRE_DUPNAMES, 0, "(?<A>aa)(?<A>bb)\\k<A>{0,3}aaaaaa", "aabbaaaaaa" },
508 { MUA | PCRE_DUPNAMES, 0, "(?<A>aa)(?<A>bb)\\k<A>{2,5}bb", "aabbaaaabb" },
509 { MUA | PCRE_DUPNAMES, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{0,3}m", "aaaaaaaabbbbaabbbbm" },
510 { MUA | PCRE_DUPNAMES, 0 | F_NOMATCH, "\\k<A>{1,3}?(?<A>aa)(?<A>bb)", "aabb" },
511 { MUA | PCRE_DUPNAMES | PCRE_JAVASCRIPT_COMPAT, 0, "\\k<A>{1,3}?(?<A>aa)(?<A>bb)", "aabb" },
512 { MUA | PCRE_DUPNAMES, 0, "\\k<A>*?(?<A>aa)(?<A>bb)", "aabb" },
513 { MUA | PCRE_DUPNAMES, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{0,3}?m", "aaaaaabbbbbbaabbbbbbbbbbm" },
514 { MUA | PCRE_DUPNAMES, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>*?m", "aaaaaabbbbbbaabbbbbbbbbbm" },
515 { MUA | PCRE_DUPNAMES, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{2,3}?", "aaaabbbbaaaabbbbbbbbbb" },
516 { CMUA | PCRE_DUPNAMES, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}M", "aaaaaaaabbbbaabbbbm" },
517 { CMUA | PCRE_DUPNAMES, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{1,3}M", "aaaaaaaabbbbaabbbbm" },
518 { CMUA | PCRE_DUPNAMES, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}?M", "aaaaaabbbbbbaabbbbbbbbbbm" },
519 { CMUA | PCRE_DUPNAMES, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{2,3}?", "aaaabbbbaaaabbbbbbbbbb" },
520
521 /* Assertions. */
522 { MUA, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
523 { MUA, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
524 { MUA, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
525 { MUA, 0, "(?<=aaa|aa|a)a", "aaa" },
526 { MUA, 2, "(?<=aaa|aa|a)a", "aaa" },
527 { MA, 0, "(?<=aaa|aa|a)a", "aaa" },
528 { MA, 2, "(?<=aaa|aa|a)a", "aaa" },
529 { MUA, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
530 { MUA, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
531 { MUA, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
532 { MUA, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
533 { MUA, 0, "((?(?=(a))a)+k)", "bbak" },
534 { MUA, 0, "((?(?=a)a)+k)", "bbak" },
535 { MUA, 0 | F_NOMATCH, "(?=(?>(a))m)amk", "a k" },
536 { MUA, 0 | F_NOMATCH, "(?!(?>(a))m)amk", "a k" },
537 { MUA, 0 | F_NOMATCH, "(?>(?=(a))am)amk", "a k" },
538 { MUA, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
539 { MUA, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
540 { MUA, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
541 { MUA, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
542 { MUA, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
543 { MUA, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
544 { MUA, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
545 { MUA, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
546
547 /* Not empty, ACCEPT, FAIL */
548 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*", "bcx" },
549 { MUA | PCRE_NOTEMPTY, 0, "a*", "bcaad" },
550 { MUA | PCRE_NOTEMPTY, 0, "a*?", "bcaad" },
551 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
552 { MUA, 0, "a(*ACCEPT)b", "ab" },
553 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*(*ACCEPT)b", "bcx" },
554 { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
555 { MUA | PCRE_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
556 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "(?:z|a*(*ACCEPT)b)", "bcx" },
557 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
558 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
559 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
560 { MUA | PCRE_NOTEMPTY_ATSTART, 0 | F_NOMATCH, "a*(*ACCEPT)b", "" },
561 { MUA, 0, "((a(*ACCEPT)b))", "ab" },
562 { MUA, 0, "(a(*FAIL)a|a)", "aaa" },
563 { MUA, 0, "(?=ab(*ACCEPT)b)a", "ab" },
564 { MUA, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
565 { MUA, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
566 { MUA | PCRE_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
567
568 /* Conditional blocks. */
569 { MUA, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
570 { MUA, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
571 { MUA, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
572 { MUA, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
573 { MUA, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
574 { MUA, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
575 { MUA, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
576 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
577 { MUA, 0 | F_DIFF, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
578 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
579 { MUA, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
580 { MUA, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
581 { MUA, 0, "(?(?=a)ab)", "a" },
582 { MUA, 0, "(?(?<!b)c)", "b" },
583 { MUA, 0, "(?(DEFINE)a(b))", "a" },
584 { MUA, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
585 { MUA, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
586 { MUA, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
587 { MUA, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
588 { MUA, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
589 { MUA, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
590 { MUA, 0, "(c)?\?(?(1)a|b)", "cbb" },
591 { MUA, 0 | F_DIFF, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
592 { MUA, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
593 { MUA, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
594 { MUA, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
595 { MUA, 0 | F_DIFF, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
596 { MUA, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
597 { MUA, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
598 { MUA, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
599 { MUA, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
600 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)*l", "bc ddd abccabccl" },
601 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+?dd", "bcabcacdb bdddd" },
602 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+l", "ababccddabdbccd abcccl" },
603 { MUA, 0, "((?:a|aa)(?(1)aaa))x", "aax" },
604
605 /* Set start of match. */
606 { MUA, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
607 { MUA, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
608 { MUA, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
609 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a\\K(*ACCEPT)b", "aa" },
610 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
611
612 /* First line. */
613 { MUA | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}a", "bb\naaa" },
614 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}a", "bb\r\naaa" },
615 { MUA | PCRE_FIRSTLINE, 0, "(?<=a)", "a" },
616 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[^a][^b]", "ab" },
617 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "a", "\na" },
618 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[abc]", "\na" },
619 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^a", "\na" },
620 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^(?<=\n)", "\na" },
621 { MUA | PCRE_FIRSTLINE, 0, "\xf0\x90\x90\x80", "\xf0\x90\x90\x80" },
622 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\xc2\x85#" },
623 { PCRE_MULTILINE | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\x85#" },
624 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^#", "\xe2\x80\xa8#" },
625 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}", "\r\na" },
626 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, ".", "\r" },
627 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "a", "\ra" },
628 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH, "ba", "bbb\r\nba" },
629 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}{4}|a", "\r\na" },
630 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 1, ".", "\r\n" },
631 { PCRE_FIRSTLINE | PCRE_NEWLINE_LF | PCRE_DOTALL, 0 | F_NOMATCH, "ab.", "ab" },
632
633 /* Recurse. */
634 { MUA, 0, "(a)(?1)", "aa" },
635 { MUA, 0, "((a))(?1)", "aa" },
636 { MUA, 0, "(b|a)(?1)", "aa" },
637 { MUA, 0, "(b|(a))(?1)", "aa" },
638 { MUA, 0 | F_NOMATCH, "((a)(b)(?:a*))(?1)", "aba" },
639 { MUA, 0, "((a)(b)(?:a*))(?1)", "abab" },
640 { MUA, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
641 { MUA, 0, "((?2)b|(a)){2}(?1)", "aabab" },
642 { MUA, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
643 { MUA, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
644 { MUA, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
645 { MUA, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
646 { MUA, 0 | F_NOMATCH, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
647 { MUA, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
648 { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
649 { MUA, 0 | F_NOMATCH, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
650 { MUA, 0, "b|<(?R)*>", "<<b>" },
651 { MUA, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
652 { MUA, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
653 { MUA, 0, "(a)((?(R)a|b))(?2)", "aabbabaa" },
654 { MUA, 0, "(a)((?(R2)a|b))(?2)", "aabbabaa" },
655 { MUA, 0, "(a)((?(R1)a|b))(?2)", "ababba" },
656 { MUA, 0, "(?(R0)aa|bb(?R))", "abba aabb bbaa" },
657 { MUA, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" },
658 { MUA, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" },
659 { MUA, 0, "((?(R)a|(?1)){3})", "XaaaaaaaaaX" },
660 { MUA, 0, "((?:(?(R)a|(?1))){3})", "XaaaaaaaaaX" },
661 { MUA, 0, "((?(R)a|(?1)){1,3})aaaaaa", "aaaaaaaaXaaaaaaaaa" },
662 { MUA, 0, "((?(R)a|(?1)){1,3}?)M", "aaaM" },
663
664 /* 16 bit specific tests. */
665 { CMA, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" },
666 { CMA, 0 | F_FORCECONV, "\xe1\xbd\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
667 { CMA, 0 | F_FORCECONV, "[\xc3\xa1]", "\xc3\x81\xc3\xa1" },
668 { CMA, 0 | F_FORCECONV, "[\xe1\xbd\xb8]", "\xe1\xbf\xb8\xe1\xbd\xb8" },
669 { CMA, 0 | F_FORCECONV, "[a-\xed\xb0\x80]", "A" },
670 { CMA, 0 | F_NO8 | F_FORCECONV, "[a-\\x{dc00}]", "B" },
671 { CMA, 0 | F_NO8 | F_NOMATCH | F_FORCECONV, "[b-\\x{dc00}]", "a" },
672 { CMA, 0 | F_NO8 | F_FORCECONV, "\xed\xa0\x80\\x{d800}\xed\xb0\x80\\x{dc00}", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80" },
673 { CMA, 0 | F_NO8 | F_FORCECONV, "[\xed\xa0\x80\\x{d800}]{1,2}?[\xed\xb0\x80\\x{dc00}]{1,2}?#", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80#" },
674 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80\xed\xb0\x80#]{0,3}(?<=\xed\xb0\x80.)", "\xed\xa0\x80#\xed\xa0\x80##\xed\xb0\x80\xed\xa0\x80" },
675 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\x9f\xbf\xed\xa0\x83" },
676 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\xb4\x80\xed\xb3\xb0" },
677 { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" },
678 { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" },
679 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" },
680 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80][\xed\xb0\x80]{2,}", "\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" },
681 { MA, 0 | F_FORCECONV, "[^\xed\xb0\x80]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
682 { MA, 0 | F_NO8 | F_FORCECONV, "[^\\x{dc00}]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
683 { CMA, 0 | F_FORCECONV, ".\\B.", "\xed\xa0\x80\xed\xb0\x80" },
684 { CMA, 0 | F_FORCECONV, "\\D+(?:\\d+|.)\\S+(?:\\s+|.)\\W+(?:\\w+|.)\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80" },
685 { CMA, 0 | F_FORCECONV, "\\d*\\s*\\w*\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80" },
686 { CMA, 0 | F_FORCECONV | F_NOMATCH, "\\d*?\\D*?\\s*?\\S*?\\w*?\\W*?##", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80#" },
687 { CMA | PCRE_EXTENDED, 0 | F_FORCECONV, "\xed\xa0\x80 \xed\xb0\x80 !", "\xed\xa0\x80\xed\xb0\x80!" },
688 { CMA, 0 | F_FORCECONV, "\xed\xa0\x80+#[^#]+\xed\xa0\x80", "\xed\xa0\x80#a\xed\xa0\x80" },
689 { CMA, 0 | F_FORCECONV, "(\xed\xa0\x80+)#\\1", "\xed\xa0\x80\xed\xa0\x80#\xed\xa0\x80\xed\xa0\x80" },
690 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0 | F_NO8 | F_FORCECONV, "^-", "a--\xe2\x80\xa8--" },
691 { PCRE_BSR_UNICODE, 0 | F_NO8 | F_FORCECONV, "\\R", "ab\xe2\x80\xa8" },
692 { 0, 0 | F_NO8 | F_FORCECONV, "\\v", "ab\xe2\x80\xa9" },
693 { 0, 0 | F_NO8 | F_FORCECONV, "\\h", "ab\xe1\xa0\x8e" },
694 { 0, 0 | F_NO8 | F_FORCECONV, "\\v+?\\V+?#", "\xe2\x80\xa9\xe2\x80\xa9\xef\xbf\xbf\xef\xbf\xbf#" },
695 { 0, 0 | F_NO8 | F_FORCECONV, "\\h+?\\H+?#", "\xe1\xa0\x8e\xe1\xa0\x8e\xef\xbf\xbf\xef\xbf\xbf#" },
696
697 /* Partial matching. */
698 { MUA | PCRE_PARTIAL_SOFT, 0, "ab", "a" },
699 { MUA | PCRE_PARTIAL_SOFT, 0, "ab|a", "a" },
700 { MUA | PCRE_PARTIAL_HARD, 0, "ab|a", "a" },
701 { MUA | PCRE_PARTIAL_SOFT, 0, "\\b#", "a" },
702 { MUA | PCRE_PARTIAL_SOFT, 0, "(?<=a)b", "a" },
703 { MUA | PCRE_PARTIAL_SOFT, 0, "abc|(?<=xxa)bc", "xxab" },
704 { MUA | PCRE_PARTIAL_SOFT, 0, "a\\B", "a" },
705 { MUA | PCRE_PARTIAL_HARD, 0, "a\\b", "a" },
706
707 /* (*MARK) verb. */
708 { MUA, 0, "a(*MARK:aa)a", "ababaa" },
709 { MUA, 0 | F_NOMATCH, "a(*:aa)a", "abab" },
710 { MUA, 0, "a(*:aa)(b(*:bb)b|bc)", "abc" },
711 { MUA, 0 | F_NOMATCH, "a(*:1)x|b(*:2)y", "abc" },
712 { MUA, 0, "(?>a(*:aa))b|ac", "ac" },
713 { MUA, 0, "(?(DEFINE)(a(*:aa)))(?1)", "a" },
714 { MUA, 0 | F_NOMATCH, "(?(DEFINE)((a)(*:aa)))(?1)b", "aa" },
715 { MUA, 0, "(?(DEFINE)(a(*:aa)))a(?1)b|aac", "aac" },
716 { MUA, 0, "(a(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
717 { MUA, 0, "(a(*:aa)){0}(?:b(?1)b)+", "babba" },
718 { MUA, 0 | F_NOMATCH | F_STUDY, "(a(*:aa)){0}(?:b(?1)b)+", "ba" },
719 { MUA, 0, "(a\\K(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
720 { MUA, 0, "(a\\K(*:aa)){0}(?:b(?1)b)+", "babba" },
721 { MUA, 0 | F_NOMATCH | F_STUDY, "(a\\K(*:aa)){0}(?:b(?1)b)+", "ba" },
722 { MUA, 0 | F_NOMATCH | F_STUDY, "(*:mark)m", "a" },
723
724 /* (*COMMIT) verb. */
725 { MUA, 0 | F_NOMATCH, "a(*COMMIT)b", "ac" },
726 { MUA, 0, "aa(*COMMIT)b", "xaxaab" },
727 { MUA, 0 | F_NOMATCH, "a(*COMMIT)(*:msg)b|ac", "ac" },
728 { MUA, 0 | F_NOMATCH, "(a(*COMMIT)b)++", "abac" },
729 { MUA, 0 | F_NOMATCH, "((a)(*COMMIT)b)++", "abac" },
730 { MUA, 0 | F_NOMATCH, "(?=a(*COMMIT)b)ab|ad", "ad" },
731
732 /* (*PRUNE) verb. */
733 { MUA, 0, "aa\\K(*PRUNE)b", "aaab" },
734 { MUA, 0, "aa(*PRUNE:bb)b|a", "aa" },
735 { MUA, 0, "(a)(a)(*PRUNE)b|(a)", "aa" },
736 { MUA, 0, "(a)(a)(a)(a)(a)(a)(a)(a)(*PRUNE)b|(a)", "aaaaaaaa" },
737 { MUA | PCRE_PARTIAL_SOFT, 0, "a(*PRUNE)a|", "a" },
738 { MUA | PCRE_PARTIAL_SOFT, 0, "a(*PRUNE)a|m", "a" },
739 { MUA, 0 | F_NOMATCH, "(?=a(*PRUNE)b)ab|ad", "ad" },
740 { MUA, 0, "a(*COMMIT)(*PRUNE)d|bc", "abc" },
741 { MUA, 0, "(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
742 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
743 { MUA, 0, "(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
744 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
745 { MUA, 0, "(a(*COMMIT)b){0}a(?1)(*PRUNE)c|bc", "abc" },
746 { MUA, 0 | F_NOMATCH, "(a(*COMMIT)b){0}a(*COMMIT)(?1)(*PRUNE)c|bc", "abc" },
747 { MUA, 0, "(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
748 { MUA, 0 | F_NOMATCH, "(*COMMIT)(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
749 { MUA, 0, "((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
750 { MUA, 0 | F_NOMATCH, "(*COMMIT)((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
751 { MUA, 0, "(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" },
752 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" },
753 { MUA, 0, "(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" },
754 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" },
755 { MUA, 0, "(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" },
756 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" },
757 { MUA, 0, "(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" },
758 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" },
759 { MUA, 0, "(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" },
760 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" },
761 { MUA, 0, "(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" },
762 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" },
763
764 /* (*SKIP) verb. */
765 { MUA, 0 | F_NOMATCH, "(?=a(*SKIP)b)ab|ad", "ad" },
766
767 /* (*THEN) verb. */
768 { MUA, 0, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcaabcaabcaabcnacm" },
769 { MUA, 0 | F_NOMATCH, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcm" },
770 { MUA, 0, "((?:a(*THEN)|aab)c|a+)+m", "aabcaabcnmaabcaabcm" },
771 { MUA, 0, "((?:a|aab)(*THEN)c|a+)+m", "aam" },
772 { MUA, 0, "((?:a(*COMMIT)|aab)(*THEN)c|a+)+m", "aam" },
773 { MUA, 0, "(?(?=a(*THEN)b)ab|ad)", "ad" },
774 { MUA, 0, "(?(?!a(*THEN)b)ad|add)", "add" },
775 { MUA, 0 | F_NOMATCH, "(?(?=a)a(*THEN)b|ad)", "ad" },
776 { MUA, 0, "(?!(?(?=a)ab|b(*THEN)d))bn|bnn", "bnn" },
777
778 /* Deep recursion. */
779 { MUA, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
780 { MUA, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
781 { MUA, 0, "((a?)+)+b", "aaaaaaaaaaaa b" },
782
783 /* Deep recursion: Stack limit reached. */
784 { MA, 0 | F_NOMATCH, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
785 { MA, 0 | F_NOMATCH, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
786 { MA, 0 | F_NOMATCH, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
787 { MA, 0 | F_NOMATCH, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
788 { MA, 0 | F_NOMATCH, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
789
790 { 0, 0, NULL, NULL }
791 };
792
793 static const unsigned char *tables(int mode)
794 {
795 /* The purpose of this function to allow valgrind
796 for reporting invalid reads and writes. */
797 static unsigned char *tables_copy;
798 const char *errorptr;
799 int erroroffset;
800 unsigned char *default_tables;
801 #if defined SUPPORT_PCRE8
802 pcre *regex;
803 char null_str[1] = { 0 };
804 #elif defined SUPPORT_PCRE16
805 pcre16 *regex;
806 PCRE_UCHAR16 null_str[1] = { 0 };
807 #elif defined SUPPORT_PCRE32
808 pcre32 *regex;
809 PCRE_UCHAR32 null_str[1] = { 0 };
810 #endif
811
812 if (mode) {
813 if (tables_copy)
814 free(tables_copy);
815 tables_copy = NULL;
816 return NULL;
817 }
818
819 if (tables_copy)
820 return tables_copy;
821
822 default_tables = NULL;
823 #if defined SUPPORT_PCRE8
824 regex = pcre_compile(null_str, 0, &errorptr, &erroroffset, NULL);
825 if (regex) {
826 pcre_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
827 pcre_free(regex);
828 }
829 #elif defined SUPPORT_PCRE16
830 regex = pcre16_compile(null_str, 0, &errorptr, &erroroffset, NULL);
831 if (regex) {
832 pcre16_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
833 pcre16_free(regex);
834 }
835 #elif defined SUPPORT_PCRE32
836 regex = pcre32_compile(null_str, 0, &errorptr, &erroroffset, NULL);
837 if (regex) {
838 pcre32_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
839 pcre32_free(regex);
840 }
841 #endif
842 /* Shouldn't ever happen. */
843 if (!default_tables)
844 return NULL;
845
846 /* Unfortunately this value cannot get from pcre_fullinfo.
847 Since this is a test program, this is acceptable at the moment. */
848 tables_copy = (unsigned char *)malloc(1088);
849 if (!tables_copy)
850 return NULL;
851
852 memcpy(tables_copy, default_tables, 1088);
853 return tables_copy;
854 }
855
856 #ifdef SUPPORT_PCRE8
857 static pcre_jit_stack* callback8(void *arg)
858 {
859 return (pcre_jit_stack *)arg;
860 }
861 #endif
862
863 #ifdef SUPPORT_PCRE16
864 static pcre16_jit_stack* callback16(void *arg)
865 {
866 return (pcre16_jit_stack *)arg;
867 }
868 #endif
869
870 #ifdef SUPPORT_PCRE32
871 static pcre32_jit_stack* callback32(void *arg)
872 {
873 return (pcre32_jit_stack *)arg;
874 }
875 #endif
876
877 #ifdef SUPPORT_PCRE8
878 static pcre_jit_stack *stack8;
879
880 static pcre_jit_stack *getstack8(void)
881 {
882 if (!stack8)
883 stack8 = pcre_jit_stack_alloc(1, 1024 * 1024);
884 return stack8;
885 }
886
887 static void setstack8(pcre_extra *extra)
888 {
889 if (!extra) {
890 if (stack8)
891 pcre_jit_stack_free(stack8);
892 stack8 = NULL;
893 return;
894 }
895
896 pcre_assign_jit_stack(extra, callback8, getstack8());
897 }
898 #endif /* SUPPORT_PCRE8 */
899
900 #ifdef SUPPORT_PCRE16
901 static pcre16_jit_stack *stack16;
902
903 static pcre16_jit_stack *getstack16(void)
904 {
905 if (!stack16)
906 stack16 = pcre16_jit_stack_alloc(1, 1024 * 1024);
907 return stack16;
908 }
909
910 static void setstack16(pcre16_extra *extra)
911 {
912 if (!extra) {
913 if (stack16)
914 pcre16_jit_stack_free(stack16);
915 stack16 = NULL;
916 return;
917 }
918
919 pcre16_assign_jit_stack(extra, callback16, getstack16());
920 }
921 #endif /* SUPPORT_PCRE8 */
922
923 #ifdef SUPPORT_PCRE32
924 static pcre32_jit_stack *stack32;
925
926 static pcre32_jit_stack *getstack32(void)
927 {
928 if (!stack32)
929 stack32 = pcre32_jit_stack_alloc(1, 1024 * 1024);
930 return stack32;
931 }
932
933 static void setstack32(pcre32_extra *extra)
934 {
935 if (!extra) {
936 if (stack32)
937 pcre32_jit_stack_free(stack32);
938 stack32 = NULL;
939 return;
940 }
941
942 pcre32_assign_jit_stack(extra, callback32, getstack32());
943 }
944 #endif /* SUPPORT_PCRE8 */
945
946 #ifdef SUPPORT_PCRE16
947
948 static int convert_utf8_to_utf16(const char *input, PCRE_UCHAR16 *output, int *offsetmap, int max_length)
949 {
950 unsigned char *iptr = (unsigned char*)input;
951 PCRE_UCHAR16 *optr = output;
952 unsigned int c;
953
954 if (max_length == 0)
955 return 0;
956
957 while (*iptr && max_length > 1) {
958 c = 0;
959 if (offsetmap)
960 *offsetmap++ = (int)(iptr - (unsigned char*)input);
961
962 if (!(*iptr & 0x80))
963 c = *iptr++;
964 else if (!(*iptr & 0x20)) {
965 c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
966 iptr += 2;
967 } else if (!(*iptr & 0x10)) {
968 c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
969 iptr += 3;
970 } else if (!(*iptr & 0x08)) {
971 c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
972 iptr += 4;
973 }
974
975 if (c < 65536) {
976 *optr++ = c;
977 max_length--;
978 } else if (max_length <= 2) {
979 *optr = '\0';
980 return (int)(optr - output);
981 } else {
982 c -= 0x10000;
983 *optr++ = 0xd800 | ((c >> 10) & 0x3ff);
984 *optr++ = 0xdc00 | (c & 0x3ff);
985 max_length -= 2;
986 if (offsetmap)
987 offsetmap++;
988 }
989 }
990 if (offsetmap)
991 *offsetmap = (int)(iptr - (unsigned char*)input);
992 *optr = '\0';
993 return (int)(optr - output);
994 }
995
996 static int copy_char8_to_char16(const char *input, PCRE_UCHAR16 *output, int max_length)
997 {
998 unsigned char *iptr = (unsigned char*)input;
999 PCRE_UCHAR16 *optr = output;
1000
1001 if (max_length == 0)
1002 return 0;
1003
1004 while (*iptr && max_length > 1) {
1005 *optr++ = *iptr++;
1006 max_length--;
1007 }
1008 *optr = '\0';
1009 return (int)(optr - output);
1010 }
1011
1012 #define REGTEST_MAX_LENGTH16 4096
1013 static PCRE_UCHAR16 regtest_buf16[REGTEST_MAX_LENGTH16];
1014 static int regtest_offsetmap16[REGTEST_MAX_LENGTH16];
1015
1016 #endif /* SUPPORT_PCRE16 */
1017
1018 #ifdef SUPPORT_PCRE32
1019
1020 static int convert_utf8_to_utf32(const char *input, PCRE_UCHAR32 *output, int *offsetmap, int max_length)
1021 {
1022 unsigned char *iptr = (unsigned char*)input;
1023 PCRE_UCHAR32 *optr = output;
1024 unsigned int c;
1025
1026 if (max_length == 0)
1027 return 0;
1028
1029 while (*iptr && max_length > 1) {
1030 c = 0;
1031 if (offsetmap)
1032 *offsetmap++ = (int)(iptr - (unsigned char*)input);
1033
1034 if (!(*iptr & 0x80))
1035 c = *iptr++;
1036 else if (!(*iptr & 0x20)) {
1037 c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
1038 iptr += 2;
1039 } else if (!(*iptr & 0x10)) {
1040 c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
1041 iptr += 3;
1042 } else if (!(*iptr & 0x08)) {
1043 c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
1044 iptr += 4;
1045 }
1046
1047 *optr++ = c;
1048 max_length--;
1049 }
1050 if (offsetmap)
1051 *offsetmap = (int)(iptr - (unsigned char*)input);
1052 *optr = 0;
1053 return (int)(optr - output);
1054 }
1055
1056 static int copy_char8_to_char32(const char *input, PCRE_UCHAR32 *output, int max_length)
1057 {
1058 unsigned char *iptr = (unsigned char*)input;
1059 PCRE_UCHAR32 *optr = output;
1060
1061 if (max_length == 0)
1062 return 0;
1063
1064 while (*iptr && max_length > 1) {
1065 *optr++ = *iptr++;
1066 max_length--;
1067 }
1068 *optr = '\0';
1069 return (int)(optr - output);
1070 }
1071
1072 #define REGTEST_MAX_LENGTH32 4096
1073 static PCRE_UCHAR32 regtest_buf32[REGTEST_MAX_LENGTH32];
1074 static int regtest_offsetmap32[REGTEST_MAX_LENGTH32];
1075
1076 #endif /* SUPPORT_PCRE32 */
1077
1078 static int check_ascii(const char *input)
1079 {
1080 const unsigned char *ptr = (unsigned char *)input;
1081 while (*ptr) {
1082 if (*ptr > 127)
1083 return 0;
1084 ptr++;
1085 }
1086 return 1;
1087 }
1088
1089 static int regression_tests(void)
1090 {
1091 struct regression_test_case *current = regression_test_cases;
1092 const char *error;
1093 char *cpu_info;
1094 int i, err_offs;
1095 int is_successful, is_ascii_pattern, is_ascii_input;
1096 int total = 0;
1097 int successful = 0;
1098 int successful_row = 0;
1099 int counter = 0;
1100 int study_mode;
1101 int utf = 0, ucp = 0;
1102 int disabled_flags = 0;
1103 #ifdef SUPPORT_PCRE8
1104 pcre *re8;
1105 pcre_extra *extra8;
1106 pcre_extra dummy_extra8;
1107 int ovector8_1[32];
1108 int ovector8_2[32];
1109 int return_value8[2];
1110 unsigned char *mark8_1, *mark8_2;
1111 #endif
1112 #ifdef SUPPORT_PCRE16
1113 pcre16 *re16;
1114 pcre16_extra *extra16;
1115 pcre16_extra dummy_extra16;
1116 int ovector16_1[32];
1117 int ovector16_2[32];
1118 int return_value16[2];
1119 PCRE_UCHAR16 *mark16_1, *mark16_2;
1120 int length16;
1121 #endif
1122 #ifdef SUPPORT_PCRE32
1123 pcre32 *re32;
1124 pcre32_extra *extra32;
1125 pcre32_extra dummy_extra32;
1126 int ovector32_1[32];
1127 int ovector32_2[32];
1128 int return_value32[2];
1129 PCRE_UCHAR32 *mark32_1, *mark32_2;
1130 int length32;
1131 #endif
1132
1133 /* This test compares the behaviour of interpreter and JIT. Although disabling
1134 utf or ucp may make tests fail, if the pcre_exec result is the SAME, it is
1135 still considered successful from pcre_jit_test point of view. */
1136
1137 #if defined SUPPORT_PCRE8
1138 pcre_config(PCRE_CONFIG_JITTARGET, &cpu_info);
1139 #elif defined SUPPORT_PCRE16
1140 pcre16_config(PCRE_CONFIG_JITTARGET, &cpu_info);
1141 #elif defined SUPPORT_PCRE32
1142 pcre32_config(PCRE_CONFIG_JITTARGET, &cpu_info);
1143 #endif
1144
1145 printf("Running JIT regression tests\n");
1146 printf(" target CPU of SLJIT compiler: %s\n", cpu_info);
1147
1148 #if defined SUPPORT_PCRE8
1149 pcre_config(PCRE_CONFIG_UTF8, &utf);
1150 pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
1151 #elif defined SUPPORT_PCRE16
1152 pcre16_config(PCRE_CONFIG_UTF16, &utf);
1153 pcre16_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
1154 #elif defined SUPPORT_PCRE16
1155 pcre32_config(PCRE_CONFIG_UTF32, &utf);
1156 pcre32_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
1157 #endif
1158
1159 if (!utf)
1160 disabled_flags |= PCRE_UTF8 | PCRE_UTF16 | PCRE_UTF32;
1161 if (!ucp)
1162 disabled_flags |= PCRE_UCP;
1163 #ifdef SUPPORT_PCRE8
1164 printf(" in 8 bit mode with UTF-8 %s and ucp %s:\n", utf ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
1165 #endif
1166 #ifdef SUPPORT_PCRE16
1167 printf(" in 16 bit mode with UTF-16 %s and ucp %s:\n", utf ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
1168 #endif
1169 #ifdef SUPPORT_PCRE32
1170 printf(" in 32 bit mode with UTF-32 %s and ucp %s:\n", utf ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
1171 #endif
1172
1173 while (current->pattern) {
1174 /* printf("\nPattern: %s :\n", current->pattern); */
1175 total++;
1176 if (current->start_offset & F_PROPERTY) {
1177 is_ascii_pattern = 0;
1178 is_ascii_input = 0;
1179 } else {
1180 is_ascii_pattern = check_ascii(current->pattern);
1181 is_ascii_input = check_ascii(current->input);
1182 }
1183
1184 if (current->flags & PCRE_PARTIAL_SOFT)
1185 study_mode = PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE;
1186 else if (current->flags & PCRE_PARTIAL_HARD)
1187 study_mode = PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE;
1188 else
1189 study_mode = PCRE_STUDY_JIT_COMPILE;
1190 error = NULL;
1191 #ifdef SUPPORT_PCRE8
1192 re8 = NULL;
1193 if (!(current->start_offset & F_NO8))
1194 re8 = pcre_compile(current->pattern,
1195 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags),
1196 &error, &err_offs, tables(0));
1197
1198 extra8 = NULL;
1199 if (re8) {
1200 error = NULL;
1201 extra8 = pcre_study(re8, study_mode, &error);
1202 if (!extra8) {
1203 printf("\n8 bit: Cannot study pattern: %s\n", current->pattern);
1204 pcre_free(re8);
1205 re8 = NULL;
1206 }
1207 else if (!(extra8->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
1208 printf("\n8 bit: JIT compiler does not support: %s\n", current->pattern);
1209 pcre_free_study(extra8);
1210 pcre_free(re8);
1211 re8 = NULL;
1212 }
1213 extra8->flags |= PCRE_EXTRA_MARK;
1214 } else if (((utf && ucp) || is_ascii_pattern) && !(current->start_offset & F_NO8))
1215 printf("\n8 bit: Cannot compile pattern \"%s\": %s\n", current->pattern, error);
1216 #endif
1217 #ifdef SUPPORT_PCRE16
1218 if ((current->flags & PCRE_UTF16) || (current->start_offset & F_FORCECONV))
1219 convert_utf8_to_utf16(current->pattern, regtest_buf16, NULL, REGTEST_MAX_LENGTH16);
1220 else
1221 copy_char8_to_char16(current->pattern, regtest_buf16, REGTEST_MAX_LENGTH16);
1222
1223 re16 = NULL;
1224 if (!(current->start_offset & F_NO16))
1225 re16 = pcre16_compile(regtest_buf16,
1226 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags),
1227 &error, &err_offs, tables(0));
1228
1229 extra16 = NULL;
1230 if (re16) {
1231 error = NULL;
1232 extra16 = pcre16_study(re16, study_mode, &error);
1233 if (!extra16) {
1234 printf("\n16 bit: Cannot study pattern: %s\n", current->pattern);
1235 pcre16_free(re16);
1236 re16 = NULL;
1237 }
1238 else if (!(extra16->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
1239 printf("\n16 bit: JIT compiler does not support: %s\n", current->pattern);
1240 pcre16_free_study(extra16);
1241 pcre16_free(re16);
1242 re16 = NULL;
1243 }
1244 extra16->flags |= PCRE_EXTRA_MARK;
1245 } else if (((utf && ucp) || is_ascii_pattern) && !(current->start_offset & F_NO16))
1246 printf("\n16 bit: Cannot compile pattern \"%s\": %s\n", current->pattern, error);
1247 #endif
1248 #ifdef SUPPORT_PCRE32
1249 if ((current->flags & PCRE_UTF32) || (current->start_offset & F_FORCECONV))
1250 convert_utf8_to_utf32(current->pattern, regtest_buf32, NULL, REGTEST_MAX_LENGTH32);
1251 else
1252 copy_char8_to_char32(current->pattern, regtest_buf32, REGTEST_MAX_LENGTH32);
1253
1254 re32 = NULL;
1255 if (!(current->start_offset & F_NO32))
1256 re32 = pcre32_compile(regtest_buf32,
1257 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags),
1258 &error, &err_offs, tables(0));
1259
1260 extra32 = NULL;
1261 if (re32) {
1262 error = NULL;
1263 extra32 = pcre32_study(re32, study_mode, &error);
1264 if (!extra32) {
1265 printf("\n32 bit: Cannot study pattern: %s\n", current->pattern);
1266 pcre32_free(re32);
1267 re32 = NULL;
1268 }
1269 if (!(extra32->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
1270 printf("\n32 bit: JIT compiler does not support: %s\n", current->pattern);
1271 pcre32_free_study(extra32);
1272 pcre32_free(re32);
1273 re32 = NULL;
1274 }
1275 extra32->flags |= PCRE_EXTRA_MARK;
1276 } else if (((utf && ucp) || is_ascii_pattern) && !(current->start_offset & F_NO32))
1277 printf("\n32 bit: Cannot compile pattern \"%s\": %s\n", current->pattern, error);
1278 #endif
1279
1280 counter++;
1281 if ((counter & 0x3) != 0) {
1282 #ifdef SUPPORT_PCRE8
1283 setstack8(NULL);
1284 #endif
1285 #ifdef SUPPORT_PCRE16
1286 setstack16(NULL);
1287 #endif
1288 #ifdef SUPPORT_PCRE32
1289 setstack32(NULL);
1290 #endif
1291 }
1292
1293 #ifdef SUPPORT_PCRE8
1294 return_value8[0] = -1000;
1295 return_value8[1] = -1000;
1296 for (i = 0; i < 32; ++i)
1297 ovector8_1[i] = -2;
1298 for (i = 0; i < 32; ++i)
1299 ovector8_2[i] = -2;
1300 if (re8) {
1301 mark8_1 = NULL;
1302 mark8_2 = NULL;
1303 extra8->mark = &mark8_1;
1304
1305 if ((counter & 0x1) != 0) {
1306 setstack8(extra8);
1307 return_value8[0] = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1308 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_1, 32);
1309 } else
1310 return_value8[0] = pcre_jit_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1311 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_1, 32, getstack8());
1312 memset(&dummy_extra8, 0, sizeof(pcre_extra));
1313 dummy_extra8.flags = PCRE_EXTRA_MARK;
1314 if (current->start_offset & F_STUDY) {
1315 dummy_extra8.flags |= PCRE_EXTRA_STUDY_DATA;
1316 dummy_extra8.study_data = extra8->study_data;
1317 }
1318 dummy_extra8.mark = &mark8_2;
1319 return_value8[1] = pcre_exec(re8, &dummy_extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1320 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_2, 32);
1321 }
1322 #endif
1323
1324 #ifdef SUPPORT_PCRE16
1325 return_value16[0] = -1000;
1326 return_value16[1] = -1000;
1327 for (i = 0; i < 32; ++i)
1328 ovector16_1[i] = -2;
1329 for (i = 0; i < 32; ++i)
1330 ovector16_2[i] = -2;
1331 if (re16) {
1332 mark16_1 = NULL;
1333 mark16_2 = NULL;
1334 if ((current->flags & PCRE_UTF16) || (current->start_offset & F_FORCECONV))
1335 length16 = convert_utf8_to_utf16(current->input, regtest_buf16, regtest_offsetmap16, REGTEST_MAX_LENGTH16);
1336 else
1337 length16 = copy_char8_to_char16(current->input, regtest_buf16, REGTEST_MAX_LENGTH16);
1338 extra16->mark = &mark16_1;
1339 if ((counter & 0x1) != 0) {
1340 setstack16(extra16);
1341 return_value16[0] = pcre16_exec(re16, extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
1342 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_1, 32);
1343 } else
1344 return_value16[0] = pcre16_jit_exec(re16, extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
1345 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_1, 32, getstack16());
1346 memset(&dummy_extra16, 0, sizeof(pcre16_extra));
1347 dummy_extra16.flags = PCRE_EXTRA_MARK;
1348 if (current->start_offset & F_STUDY) {
1349 dummy_extra16.flags |= PCRE_EXTRA_STUDY_DATA;
1350 dummy_extra16.study_data = extra16->study_data;
1351 }
1352 dummy_extra16.mark = &mark16_2;
1353 return_value16[1] = pcre16_exec(re16, &dummy_extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
1354 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_2, 32);
1355 }
1356 #endif
1357
1358 #ifdef SUPPORT_PCRE32
1359 return_value32[0] = -1000;
1360 return_value32[1] = -1000;
1361 for (i = 0; i < 32; ++i)
1362 ovector32_1[i] = -2;
1363 for (i = 0; i < 32; ++i)
1364 ovector32_2[i] = -2;
1365 if (re32) {
1366 mark32_1 = NULL;
1367 mark32_2 = NULL;
1368 if ((current->flags & PCRE_UTF32) || (current->start_offset & F_FORCECONV))
1369 length32 = convert_utf8_to_utf32(current->input, regtest_buf32, regtest_offsetmap32, REGTEST_MAX_LENGTH32);
1370 else
1371 length32 = copy_char8_to_char32(current->input, regtest_buf32, REGTEST_MAX_LENGTH32);
1372 extra32->mark = &mark32_1;
1373 if ((counter & 0x1) != 0) {
1374 setstack32(extra32);
1375 return_value32[0] = pcre32_exec(re32, extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
1376 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector32_1, 32);
1377 } else
1378 return_value32[0] = pcre32_jit_exec(re32, extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
1379 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector32_1, 32, getstack32());
1380 memset(&dummy_extra32, 0, sizeof(pcre32_extra));
1381 dummy_extra32.flags = PCRE_EXTRA_MARK;
1382 if (current->start_offset & F_STUDY) {
1383 dummy_extra32.flags |= PCRE_EXTRA_STUDY_DATA;
1384 dummy_extra32.study_data = extra32->study_data;
1385 }
1386 dummy_extra32.mark = &mark32_2;
1387 return_value32[1] = pcre32_exec(re32, &dummy_extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
1388 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector32_2, 32);
1389 }
1390 #endif
1391
1392 /* printf("[%d-%d-%d|%d-%d|%d-%d|%d-%d]%s",
1393 return_value8[0], return_value16[0], return_value32[0],
1394 ovector8_1[0], ovector8_1[1],
1395 ovector16_1[0], ovector16_1[1],
1396 ovector32_1[0], ovector32_1[1],
1397 (current->flags & PCRE_CASELESS) ? "C" : ""); */
1398
1399 /* If F_DIFF is set, just run the test, but do not compare the results.
1400 Segfaults can still be captured. */
1401
1402 is_successful = 1;
1403 if (!(current->start_offset & F_DIFF)) {
1404 #if defined SUPPORT_UTF && ((defined(SUPPORT_PCRE8) + defined(SUPPORT_PCRE16) + defined(SUPPORT_PCRE32)) >= 2)
1405 if (!(current->start_offset & F_FORCECONV)) {
1406 int return_value;
1407
1408 /* All results must be the same. */
1409 #ifdef SUPPORT_PCRE8
1410 if ((return_value = return_value8[0]) != return_value8[1]) {
1411 printf("\n8 bit: Return value differs(J8:%d,I8:%d): [%d] '%s' @ '%s'\n",
1412 return_value8[0], return_value8[1], total, current->pattern, current->input);
1413 is_successful = 0;
1414 } else
1415 #endif
1416 #ifdef SUPPORT_PCRE16
1417 if ((return_value = return_value16[0]) != return_value16[1]) {
1418 printf("\n16 bit: Return value differs(J16:%d,I16:%d): [%d] '%s' @ '%s'\n",
1419 return_value16[0], return_value16[1], total, current->pattern, current->input);
1420 is_successful = 0;
1421 } else
1422 #endif
1423 #ifdef SUPPORT_PCRE32
1424 if ((return_value = return_value32[0]) != return_value32[1]) {
1425 printf("\n32 bit: Return value differs(J32:%d,I32:%d): [%d] '%s' @ '%s'\n",
1426 return_value32[0], return_value32[1], total, current->pattern, current->input);
1427 is_successful = 0;
1428 } else
1429 #endif
1430 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1431 if (return_value8[0] != return_value16[0]) {
1432 printf("\n8 and 16 bit: Return value differs(J8:%d,J16:%d): [%d] '%s' @ '%s'\n",
1433 return_value8[0], return_value16[0],
1434 total, current->pattern, current->input);
1435 is_successful = 0;
1436 } else
1437 #endif
1438 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE32
1439 if (return_value8[0] != return_value32[0]) {
1440 printf("\n8 and 32 bit: Return value differs(J8:%d,J32:%d): [%d] '%s' @ '%s'\n",
1441 return_value8[0], return_value32[0],
1442 total, current->pattern, current->input);
1443 is_successful = 0;
1444 } else
1445 #endif
1446 #if defined SUPPORT_PCRE16 && defined SUPPORT_PCRE32
1447 if (return_value16[0] != return_value32[0]) {
1448 printf("\n16 and 32 bit: Return value differs(J16:%d,J32:%d): [%d] '%s' @ '%s'\n",
1449 return_value16[0], return_value32[0],
1450 total, current->pattern, current->input);
1451 is_successful = 0;
1452 } else
1453 #endif
1454 if (return_value >= 0 || return_value == PCRE_ERROR_PARTIAL) {
1455 if (return_value == PCRE_ERROR_PARTIAL) {
1456 return_value = 2;
1457 } else {
1458 return_value *= 2;
1459 }
1460 #ifdef SUPPORT_PCRE8
1461 return_value8[0] = return_value;
1462 #endif
1463 #ifdef SUPPORT_PCRE16
1464 return_value16[0] = return_value;
1465 #endif
1466 #ifdef SUPPORT_PCRE32
1467 return_value32[0] = return_value;
1468 #endif
1469 /* Transform back the results. */
1470 if (current->flags & PCRE_UTF8) {
1471 #ifdef SUPPORT_PCRE16
1472 for (i = 0; i < return_value; ++i) {
1473 if (ovector16_1[i] >= 0)
1474 ovector16_1[i] = regtest_offsetmap16[ovector16_1[i]];
1475 if (ovector16_2[i] >= 0)
1476 ovector16_2[i] = regtest_offsetmap16[ovector16_2[i]];
1477 }
1478 #endif
1479 #ifdef SUPPORT_PCRE32
1480 for (i = 0; i < return_value; ++i) {
1481 if (ovector32_1[i] >= 0)
1482 ovector32_1[i] = regtest_offsetmap32[ovector32_1[i]];
1483 if (ovector32_2[i] >= 0)
1484 ovector32_2[i] = regtest_offsetmap32[ovector32_2[i]];
1485 }
1486 #endif
1487 }
1488
1489 for (i = 0; i < return_value; ++i) {
1490 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1491 if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
1492 printf("\n8 and 16 bit: Ovector[%d] value differs(J8:%d,I8:%d,J16:%d,I16:%d): [%d] '%s' @ '%s' \n",
1493 i, ovector8_1[i], ovector8_2[i], ovector16_1[i], ovector16_2[i],
1494 total, current->pattern, current->input);
1495 is_successful = 0;
1496 }
1497 #endif
1498 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE32
1499 if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector32_1[i] || ovector8_1[i] != ovector32_2[i]) {
1500 printf("\n8 and 32 bit: Ovector[%d] value differs(J8:%d,I8:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
1501 i, ovector8_1[i], ovector8_2[i], ovector32_1[i], ovector32_2[i],
1502 total, current->pattern, current->input);
1503 is_successful = 0;
1504 }
1505 #endif
1506 #if defined SUPPORT_PCRE16 && defined SUPPORT_PCRE16
1507 if (ovector16_1[i] != ovector16_2[i] || ovector16_1[i] != ovector16_1[i] || ovector16_1[i] != ovector16_2[i]) {
1508 printf("\n16 and 16 bit: Ovector[%d] value differs(J16:%d,I16:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
1509 i, ovector16_1[i], ovector16_2[i], ovector16_1[i], ovector16_2[i],
1510 total, current->pattern, current->input);
1511 is_successful = 0;
1512 }
1513 #endif
1514 }
1515 }
1516 } else
1517 #endif /* more than one of SUPPORT_PCRE8, SUPPORT_PCRE16 and SUPPORT_PCRE32 */
1518 {
1519 /* Only the 8 bit and 16 bit results must be equal. */
1520 #ifdef SUPPORT_PCRE8
1521 if (return_value8[0] != return_value8[1]) {
1522 printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1523 return_value8[0], return_value8[1], total, current->pattern, current->input);
1524 is_successful = 0;
1525 } else if (return_value8[0] >= 0 || return_value8[0] == PCRE_ERROR_PARTIAL) {
1526 if (return_value8[0] == PCRE_ERROR_PARTIAL)
1527 return_value8[0] = 2;
1528 else
1529 return_value8[0] *= 2;
1530
1531 for (i = 0; i < return_value8[0]; ++i)
1532 if (ovector8_1[i] != ovector8_2[i]) {
1533 printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1534 i, ovector8_1[i], ovector8_2[i], total, current->pattern, current->input);
1535 is_successful = 0;
1536 }
1537 }
1538 #endif
1539
1540 #ifdef SUPPORT_PCRE16
1541 if (return_value16[0] != return_value16[1]) {
1542 printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1543 return_value16[0], return_value16[1], total, current->pattern, current->input);
1544 is_successful = 0;
1545 } else if (return_value16[0] >= 0 || return_value16[0] == PCRE_ERROR_PARTIAL) {
1546 if (return_value16[0] == PCRE_ERROR_PARTIAL)
1547 return_value16[0] = 2;
1548 else
1549 return_value16[0] *= 2;
1550
1551 for (i = 0; i < return_value16[0]; ++i)
1552 if (ovector16_1[i] != ovector16_2[i]) {
1553 printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1554 i, ovector16_1[i], ovector16_2[i], total, current->pattern, current->input);
1555 is_successful = 0;
1556 }
1557 }
1558 #endif
1559
1560 #ifdef SUPPORT_PCRE32
1561 if (return_value32[0] != return_value32[1]) {
1562 printf("\n32 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1563 return_value32[0], return_value32[1], total, current->pattern, current->input);
1564 is_successful = 0;
1565 } else if (return_value32[0] >= 0 || return_value32[0] == PCRE_ERROR_PARTIAL) {
1566 if (return_value32[0] == PCRE_ERROR_PARTIAL)
1567 return_value32[0] = 2;
1568 else
1569 return_value32[0] *= 2;
1570
1571 for (i = 0; i < return_value32[0]; ++i)
1572 if (ovector32_1[i] != ovector32_2[i]) {
1573 printf("\n32 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1574 i, ovector32_1[i], ovector32_2[i], total, current->pattern, current->input);
1575 is_successful = 0;
1576 }
1577 }
1578 #endif
1579 }
1580 }
1581
1582 if (is_successful) {
1583 #ifdef SUPPORT_PCRE8
1584 if (!(current->start_offset & F_NO8) && ((utf && ucp) || is_ascii_input)) {
1585 if (return_value8[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1586 printf("8 bit: Test should match: [%d] '%s' @ '%s'\n",
1587 total, current->pattern, current->input);
1588 is_successful = 0;
1589 }
1590
1591 if (return_value8[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1592 printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n",
1593 total, current->pattern, current->input);
1594 is_successful = 0;
1595 }
1596 }
1597 #endif
1598 #ifdef SUPPORT_PCRE16
1599 if (!(current->start_offset & F_NO16) && ((utf && ucp) || is_ascii_input)) {
1600 if (return_value16[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1601 printf("16 bit: Test should match: [%d] '%s' @ '%s'\n",
1602 total, current->pattern, current->input);
1603 is_successful = 0;
1604 }
1605
1606 if (return_value16[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1607 printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n",
1608 total, current->pattern, current->input);
1609 is_successful = 0;
1610 }
1611 }
1612 #endif
1613 #ifdef SUPPORT_PCRE32
1614 if (!(current->start_offset & F_NO32) && ((utf && ucp) || is_ascii_input)) {
1615 if (return_value32[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1616 printf("32 bit: Test should match: [%d] '%s' @ '%s'\n",
1617 total, current->pattern, current->input);
1618 is_successful = 0;
1619 }
1620
1621 if (return_value32[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1622 printf("32 bit: Test should not match: [%d] '%s' @ '%s'\n",
1623 total, current->pattern, current->input);
1624 is_successful = 0;
1625 }
1626 }
1627 #endif
1628 }
1629
1630 if (is_successful) {
1631 #ifdef SUPPORT_PCRE8
1632 if (mark8_1 != mark8_2) {
1633 printf("8 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1634 total, current->pattern, current->input);
1635 is_successful = 0;
1636 }
1637 #endif
1638 #ifdef SUPPORT_PCRE16
1639 if (mark16_1 != mark16_2) {
1640 printf("16 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1641 total, current->pattern, current->input);
1642 is_successful = 0;
1643 }
1644 #endif
1645 #ifdef SUPPORT_PCRE32
1646 if (mark32_1 != mark32_2) {
1647 printf("32 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1648 total, current->pattern, current->input);
1649 is_successful = 0;
1650 }
1651 #endif
1652 }
1653
1654 #ifdef SUPPORT_PCRE8
1655 if (re8) {
1656 pcre_free_study(extra8);
1657 pcre_free(re8);
1658 }
1659 #endif
1660 #ifdef SUPPORT_PCRE16
1661 if (re16) {
1662 pcre16_free_study(extra16);
1663 pcre16_free(re16);
1664 }
1665 #endif
1666 #ifdef SUPPORT_PCRE32
1667 if (re32) {
1668 pcre32_free_study(extra32);
1669 pcre32_free(re32);
1670 }
1671 #endif
1672
1673 if (is_successful) {
1674 successful++;
1675 successful_row++;
1676 printf(".");
1677 if (successful_row >= 60) {
1678 successful_row = 0;
1679 printf("\n");
1680 }
1681 } else
1682 successful_row = 0;
1683
1684 fflush(stdout);
1685 current++;
1686 }
1687 tables(1);
1688 #ifdef SUPPORT_PCRE8
1689 setstack8(NULL);
1690 #endif
1691 #ifdef SUPPORT_PCRE16
1692 setstack16(NULL);
1693 #endif
1694 #ifdef SUPPORT_PCRE32
1695 setstack32(NULL);
1696 #endif
1697
1698 if (total == successful) {
1699 printf("\nAll JIT regression tests are successfully passed.\n");
1700 return 0;
1701 } else {
1702 printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
1703 return 1;
1704 }
1705 }
1706
1707 /* End of pcre_jit_test.c */

  ViewVC Help
Powered by ViewVC 1.1.5