/[pcre]/code/trunk/pcre_jit_test.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_test.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1415 - (show annotations)
Sun Dec 22 20:47:08 2013 UTC (5 years, 9 months ago) by zherczeg
File MIME type: text/plain
File size: 70510 byte(s)
The auto-possessification of character sets were improved. The JIT compiler also optimizes more character set checks.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Main Library written by Philip Hazel
9 Copyright (c) 1997-2012 University of Cambridge
10
11 This JIT compiler regression test program was written by Zoltan Herczeg
12 Copyright (c) 2010-2012
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include <stdio.h>
48 #include <string.h>
49 #include "pcre.h"
50
51
52 #include "pcre_internal.h"
53
54 #define PCRE_BUG 0x80000000
55
56 /*
57 Letter characters:
58 \xe6\x92\xad = 0x64ad = 25773 (kanji)
59 Non-letter characters:
60 \xc2\xa1 = 0xa1 = (Inverted Exclamation Mark)
61 \xf3\xa9\xb7\x80 = 0xe9dc0 = 957888
62 \xed\xa0\x80 = 55296 = 0xd800 (Invalid UTF character)
63 \xed\xb0\x80 = 56320 = 0xdc00 (Invalid UTF character)
64 Newlines:
65 \xc2\x85 = 0x85 = 133 (NExt Line = NEL)
66 \xe2\x80\xa8 = 0x2028 = 8232 (Line Separator)
67 Othercase pairs:
68 \xc3\xa9 = 0xe9 = 233 (e')
69 \xc3\x89 = 0xc9 = 201 (E')
70 \xc3\xa1 = 0xe1 = 225 (a')
71 \xc3\x81 = 0xc1 = 193 (A')
72 \xc8\xba = 0x23a = 570
73 \xe2\xb1\xa5 = 0x2c65 = 11365
74 \xe1\xbd\xb8 = 0x1f78 = 8056
75 \xe1\xbf\xb8 = 0x1ff8 = 8184
76 \xf0\x90\x90\x80 = 0x10400 = 66560
77 \xf0\x90\x90\xa8 = 0x10428 = 66600
78 Mark property:
79 \xcc\x8d = 0x30d = 781
80 Special:
81 \xdf\xbf = 0x7ff = 2047 (highest 2 byte character)
82 \xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character)
83 \xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character)
84 \xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character)
85 \xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character)
86 */
87
88 static int regression_tests(void);
89
90 int main(void)
91 {
92 int jit = 0;
93 #if defined SUPPORT_PCRE8
94 pcre_config(PCRE_CONFIG_JIT, &jit);
95 #elif defined SUPPORT_PCRE16
96 pcre16_config(PCRE_CONFIG_JIT, &jit);
97 #elif defined SUPPORT_PCRE32
98 pcre32_config(PCRE_CONFIG_JIT, &jit);
99 #endif
100 if (!jit) {
101 printf("JIT must be enabled to run pcre_jit_test\n");
102 return 1;
103 }
104 return regression_tests();
105 }
106
107 /* --------------------------------------------------------------------------------------- */
108
109 #if !(defined SUPPORT_PCRE8) && !(defined SUPPORT_PCRE16) && !(defined SUPPORT_PCRE32)
110 #error SUPPORT_PCRE8 or SUPPORT_PCRE16 or SUPPORT_PCRE32 must be defined
111 #endif
112
113 #define MUA (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
114 #define MUAP (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
115 #define CMUA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
116 #define CMUAP (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
117 #define MA (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
118 #define MAP (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
119 #define CMA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
120
121 #define OFFSET_MASK 0x00ffff
122 #define F_NO8 0x010000
123 #define F_NO16 0x020000
124 #define F_NO32 0x020000
125 #define F_NOMATCH 0x040000
126 #define F_DIFF 0x080000
127 #define F_FORCECONV 0x100000
128 #define F_PROPERTY 0x200000
129 #define F_STUDY 0x400000
130
131 struct regression_test_case {
132 int flags;
133 int start_offset;
134 const char *pattern;
135 const char *input;
136 };
137
138 static struct regression_test_case regression_test_cases[] = {
139 /* Constant strings. */
140 { MUA, 0, "AbC", "AbAbC" },
141 { MUA, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
142 { CMUA, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
143 { MA, 0, "[^a]", "aAbB" },
144 { CMA, 0, "[^m]", "mMnN" },
145 { MA, 0, "a[^b][^#]", "abacd" },
146 { CMA, 0, "A[^B][^E]", "abacd" },
147 { CMUA, 0, "[^x][^#]", "XxBll" },
148 { MUA, 0, "[^a]", "aaa\xc3\xa1#Ab" },
149 { CMUA, 0, "[^A]", "aA\xe6\x92\xad" },
150 { MUA, 0, "\\W(\\W)?\\w", "\r\n+bc" },
151 { MUA, 0, "\\W(\\W)?\\w", "\n\r+bc" },
152 { MUA, 0, "\\W(\\W)?\\w", "\r\r+bc" },
153 { MUA, 0, "\\W(\\W)?\\w", "\n\n+bc" },
154 { MUA, 0, "[axd]", "sAXd" },
155 { CMUA, 0, "[axd]", "sAXd" },
156 { CMUA, 0 | F_NOMATCH, "[^axd]", "DxA" },
157 { MUA, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
158 { MUA, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
159 { CMUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
160 { MUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
161 { MUA, 0, "[^a]", "\xc2\x80[]" },
162 { CMUA, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
163 { CMA, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
164 { PCRE_CASELESS, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
165 { PCRE_CASELESS, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
166 { PCRE_CASELESS, 0, "a1", "Aa1" },
167 { MA, 0, "\\Ca", "cda" },
168 { CMA, 0, "\\Ca", "CDA" },
169 { MA, 0 | F_NOMATCH, "\\Cx", "cda" },
170 { CMA, 0 | F_NOMATCH, "\\Cx", "CDA" },
171 { CMUAP, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
172 { CMUAP, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
173 { CMUAP, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
174 { CMUAP, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
175
176 /* Assertions. */
177 { MUA, 0, "\\b[^A]", "A_B#" },
178 { MA, 0 | F_NOMATCH, "\\b\\W", "\n*" },
179 { MUA, 0, "\\B[^,]\\b[^s]\\b", "#X" },
180 { MAP, 0, "\\B", "_\xa1" },
181 { MAP, 0, "\\b_\\b[,A]\\B", "_," },
182 { MUAP, 0, "\\b", "\xe6\x92\xad!" },
183 { MUAP, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
184 { MUAP, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
185 { MUAP, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
186 { MUA, 0 | F_NOMATCH, "\\b.", "\xcd\xbe" },
187 { CMUAP, 0, "\\By", "\xf0\x90\x90\xa8y" },
188 { MA, 0 | F_NOMATCH, "\\R^", "\n" },
189 { MA, 1 | F_NOMATCH, "^", "\n" },
190 { 0, 0, "^ab", "ab" },
191 { 0, 0 | F_NOMATCH, "^ab", "aab" },
192 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "^a", "\r\raa\n\naa\r\naa" },
193 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
194 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^-", "a--b--\x85--" },
195 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xe2\x80\xa8--" },
196 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xc2\x85--" },
197 { 0, 0, "ab$", "ab" },
198 { 0, 0 | F_NOMATCH, "ab$", "abab\n\n" },
199 { PCRE_DOLLAR_ENDONLY, 0 | F_NOMATCH, "ab$", "abab\r\n" },
200 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "a$", "\r\raa\n\naa\r\naa" },
201 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aaa" },
202 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "#$", "#\xc2\x85###\r#" },
203 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "#$", "#\xe2\x80\xa9" },
204 { PCRE_NOTBOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "^a", "aa\naa" },
205 { PCRE_NOTBOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },
206 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\naa" },
207 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\r\n" },
208 { PCRE_UTF8 | PCRE_DOLLAR_ENDONLY | PCRE_NEWLINE_ANY, 0 | F_PROPERTY, "\\p{Any}{2,}$", "aa\r\n" },
209 { PCRE_NOTEOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },
210 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa" },
211 { PCRE_NEWLINE_CR | PCRE_UTF8, 0, "a\\Z", "aaa\r" },
212 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa\n" },
213 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r" },
214 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
215 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r\n" },
216 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
217 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
218 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
219 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
220 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
221 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
222 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
223 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
224 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
225 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xc2\x85" },
226 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
227 { MA, 0, "\\Aa", "aaa" },
228 { MA, 1 | F_NOMATCH, "\\Aa", "aaa" },
229 { MA, 1, "\\Ga", "aaa" },
230 { MA, 1 | F_NOMATCH, "\\Ga", "aba" },
231 { MA, 0, "a\\z", "aaa" },
232 { MA, 0 | F_NOMATCH, "a\\z", "aab" },
233
234 /* Brackets. */
235 { MUA, 0, "(ab|bb|cd)", "bacde" },
236 { MUA, 0, "(?:ab|a)(bc|c)", "ababc" },
237 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
238 { CMUA, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
239 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
240 { MUA, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
241
242 /* Greedy and non-greedy ? operators. */
243 { MUA, 0, "(?:a)?a", "laab" },
244 { CMUA, 0, "(A)?A", "llaab" },
245 { MUA, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
246 { MUA, 0, "(a)?a", "manm" },
247 { CMUA, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
248 { MUA, 0, "(a|b)?\?d((?:e)?)", "abcde" },
249 { MUA, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
250
251 /* Greedy and non-greedy + operators */
252 { MUA, 0, "(aa)+aa", "aaaaaaa" },
253 { MUA, 0, "(aa)+?aa", "aaaaaaa" },
254 { MUA, 0, "(?:aba|ab|a)+l", "ababamababal" },
255 { MUA, 0, "(?:aba|ab|a)+?l", "ababamababal" },
256 { MUA, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
257 { MUA, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
258 { MUA, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
259
260 /* Greedy and non-greedy * operators */
261 { CMUA, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
262 { MUA, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
263 { MUA, 0, "(aa|ab)*ab", "aaabaaab" },
264 { CMUA, 0, "(aa|Ab)*?aB", "aaabaaab" },
265 { MUA, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
266 { MUA, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
267 { MA, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
268 { MA, 0, "((?:a|)*){0}a", "a" },
269
270 /* Combining ? + * operators */
271 { MUA, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
272 { MUA, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
273 { MUA, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
274 { MUA, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
275 { MUA, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
276
277 /* Single character iterators. */
278 { MUA, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
279 { MUA, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
280 { MUA, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
281 { MUA, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
282 { MUA, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
283 { MUA, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
284 { MUA, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
285 { MUA, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
286 { MUA, 0, "(ba{2})+c", "baabaaabacbaabaac" },
287 { MUA, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
288 { MUA, 0, "(a?+[^b])+", "babaacacb" },
289 { MUA, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
290 { CMUA, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
291 { CMUA, 0, "[c-f]+k", "DemmFke" },
292 { MUA, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
293 { MUA, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
294 { CMUA, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
295 { CMUA, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
296 { CMUA, 0, "[ace]{3,}", "AcbDAcEEcEd" },
297 { CMUA, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
298 { MUA, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
299 { CMUA, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
300 { MUA, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
301 { MUA, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
302 { MUA, 0, "\\b\\w+\\B", "x,a_cd" },
303 { MUAP, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
304 { CMUA, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
305 { CMUAP, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" },
306 { CMUA, 0, "[^\xf0\x90\x90\x80]{2,4}@", "\xf0\x90\x90\xa8\xf0\x90\x90\x80###\xf0\x90\x90\x80@@@" },
307 { CMUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
308 { MUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
309 { MUA, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" },
310
311 /* Bracket repeats with limit. */
312 { MUA, 0, "(?:(ab){2}){5}M", "abababababababababababM" },
313 { MUA, 0, "(?:ab|abab){1,5}M", "abababababababababababM" },
314 { MUA, 0, "(?>ab|abab){1,5}M", "abababababababababababM" },
315 { MUA, 0, "(?:ab|abab){1,5}?M", "abababababababababababM" },
316 { MUA, 0, "(?>ab|abab){1,5}?M", "abababababababababababM" },
317 { MUA, 0, "(?:(ab){1,4}?){1,3}?M", "abababababababababababababM" },
318 { MUA, 0, "(?:(ab){1,4}){1,3}abababababababababababM", "ababababababababababababM" },
319 { MUA, 0 | F_NOMATCH, "(?:(ab){1,4}){1,3}abababababababababababM", "abababababababababababM" },
320 { MUA, 0, "(ab){4,6}?M", "abababababababM" },
321
322 /* Basic character sets. */
323 { MUA, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
324 { MUA, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
325 { MUA, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
326 { MUA, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
327 { MUA, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
328 { MUA, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
329 { MUA, 0, "x[bcef]+", "xaxdxecbfg" },
330 { MUA, 0, "x[bcdghij]+", "xaxexfxdgbjk" },
331 { MUA, 0, "x[^befg]+", "xbxexacdhg" },
332 { MUA, 0, "x[^bcdl]+", "xlxbxaekmd" },
333 { MUA, 0, "x[^bcdghi]+", "xbxdxgxaefji" },
334 { MUA, 0, "x[B-Fb-f]+", "xaxAxgxbfBFG" },
335
336 /* Unicode properties. */
337 { MUAP, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
338 { MUAP, 0 | F_PROPERTY, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
339 { MUAP, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
340 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}]", "abc" },
341 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}]", "abc" },
342 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
343 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
344 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
345 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
346 { MUAP, 0 | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
347 { MUAP, 0 | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
348 { MUAP, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
349 { CMUAP, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
350 { MUAP, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
351 { MUAP, 0 | F_PROPERTY, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
352 { MUA, 0 | F_PROPERTY, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
353 { CMUAP, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
354 { MUAP, 0 | F_PROPERTY, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
355 { MUAP, 0 | F_PROPERTY, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
356 { PCRE_UCP, 0, "[a-b\\s]{2,5}[^a]", "AB baaa" },
357
358 /* Possible empty brackets. */
359 { MUA, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
360 { MUA, 0, "(|ab||bc|a)+d", "abcxabcabd" },
361 { MUA, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
362 { MUA, 0, "(|ab||bc|a)*d", "abcxabcabd" },
363 { MUA, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
364 { MUA, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
365 { MUA, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
366 { MUA, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
367 { MUA, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
368 { MUA, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
369
370 /* Start offset. */
371 { MUA, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
372 { MUA, 4 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
373 { MUA, 2 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
374 { MUA, 1, "(\\w\\W\\w)+", "ab#d" },
375
376 /* Newline. */
377 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
378 { PCRE_MULTILINE | PCRE_NEWLINE_CR, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
379 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{1,3}[^#]", "\r\n##...." },
380
381 /* Any character except newline or any newline. */
382 { PCRE_NEWLINE_CRLF, 0, ".", "\r" },
383 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
384 { PCRE_NEWLINE_ANYCRLF, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
385 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
386 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
387 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0 | F_NOMATCH, ".(.).", "\xe2\x80\xa8\nb\r" },
388 { PCRE_NEWLINE_ANY, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
389 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
390 { PCRE_BSR_ANYCRLF, 0, "\\R", "\r" },
391 { PCRE_BSR_ANYCRLF, 0, "\\R", "\x85#\r\n#" },
392 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\xe2\x80\xa8#c" },
393 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\r\nc" },
394 { PCRE_NEWLINE_CRLF | PCRE_BSR_UNICODE | PCRE_UTF8, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
395 { MUA, 0 | F_NOMATCH, "\\R+", "ab" },
396 { MUA, 0, "\\R+", "ab\r\n\r" },
397 { MUA, 0, "\\R*", "ab\r\n\r" },
398 { MUA, 0, "\\R*", "\r\n\r" },
399 { MUA, 0, "\\R{2,4}", "\r\nab\r\r" },
400 { MUA, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
401 { MUA, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
402 { MUA, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
403 { MUA, 0 | F_NOMATCH, "\\R+\\R\\R", "\r\n\r\n" },
404 { MUA, 0, "\\R+\\R\\R", "\r\r\r" },
405 { MUA, 0, "\\R*\\R\\R", "\n\r" },
406 { MUA, 0 | F_NOMATCH, "\\R{2,4}\\R\\R", "\r\r\r" },
407 { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
408
409 /* Atomic groups (no fallback from "next" direction). */
410 { MUA, 0 | F_NOMATCH, "(?>ab)ab", "bab" },
411 { MUA, 0 | F_NOMATCH, "(?>(ab))ab", "bab" },
412 { MUA, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
413 "bababcdedefgheijijklmlmnop" },
414 { MUA, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
415 { MUA, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
416 { MUA, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
417 { MUA, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
418 { MUA, 0, "((?>a|)+?)b", "aaacaaab" },
419 { MUA, 0, "(?>x|)*$", "aaa" },
420 { MUA, 0, "(?>(x)|)*$", "aaa" },
421 { MUA, 0, "(?>x|())*$", "aaa" },
422 { MUA, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
423 { MUA, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
424 { MUA, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
425 { MUA, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
426 { MUA, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
427 { MUA, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
428 { MUA, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
429 { MUA, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
430 { MUA, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
431 { MUA, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
432 { MUA, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
433 { MUA, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
434 { MUA, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
435 { MUA, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
436 { CMA, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
437 { MUA, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
438 { MUA, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
439 { MUA, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d" },
440 { MUA, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
441 { MUA, 0 | F_PROPERTY, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
442 { MUA, 0 | F_PROPERTY, "\\X{2,4}", "abcdef" },
443 { MUA, 0 | F_PROPERTY, "\\X{2,4}?", "abcdef" },
444 { MUA, 0 | F_NOMATCH | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d##" },
445 { MUA, 0 | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
446 { MUA, 0, "(c(ab)?+ab)+", "cabcababcab" },
447 { MUA, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
448
449 /* Possessive quantifiers. */
450 { MUA, 0, "(?:a|b)++m", "mababbaaxababbaam" },
451 { MUA, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
452 { MUA, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
453 { MUA, 0, "(a|b)++m", "mababbaaxababbaam" },
454 { MUA, 0, "(a|b)*+m", "mababbaaxababbaam" },
455 { MUA, 0, "(a|b)*+m", "ababbaaxababbaam" },
456 { MUA, 0, "(a|b(*ACCEPT))++m", "maaxab" },
457 { MUA, 0, "(?:b*)++m", "bxbbxbbbxm" },
458 { MUA, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
459 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxm" },
460 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
461 { MUA, 0, "(b*)++m", "bxbbxbbbxm" },
462 { MUA, 0, "(b*)++m", "bxbbxbbbxbbm" },
463 { MUA, 0, "(b*)*+m", "bxbbxbbbxm" },
464 { MUA, 0, "(b*)*+m", "bxbbxbbbxbbm" },
465 { MUA, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
466 { MUA, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
467 { MUA, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
468 { MUA, 0, "(a|(b))++m", "mababbaaxababbaam" },
469 { MUA, 0, "((a)|b)*+m", "mababbaaxababbaam" },
470 { MUA, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
471 { MUA, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
472 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxm" },
473 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
474 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
475 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
476 { MUA, 0, "((b*))++m", "bxbbxbbbxm" },
477 { MUA, 0, "((b*))++m", "bxbbxbbbxbbm" },
478 { MUA, 0, "((b*))*+m", "bxbbxbbbxm" },
479 { MUA, 0, "((b*))*+m", "bxbbxbbbxbbm" },
480 { MUA, 0 | F_NOMATCH, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
481 { MUA, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
482 { MUA, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
483 { MUA, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
484 { MUA, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
485
486 /* Back references. */
487 { MUA, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
488 { CMUA, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
489 { CMA, 0, "(a{2,4})\\1", "AaAaaAaA" },
490 { MUA, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
491 { MUA, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
492 { MUA, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
493 { MUA, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
494 { MUA, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
495 { MUA, 0, "(?:(aa)|b)\\1?b", "bb" },
496 { CMUA, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
497 { MUA, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
498 { CMUA, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
499 { MUA, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
500 { CMA, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
501 { MUA, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
502 { MUA, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
503 { MA, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
504 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
505 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{0,2}", "wwwww." },
506 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwww" },
507 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwwww" },
508 { PCRE_UCP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
509 { CMUAP, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" },
510 { MUA | PCRE_DUPNAMES, 0 | F_NOMATCH, "\\k<A>{1,3}(?<A>aa)(?<A>bb)", "aabb" },
511 { MUA | PCRE_DUPNAMES | PCRE_JAVASCRIPT_COMPAT, 0, "\\k<A>{1,3}(?<A>aa)(?<A>bb)", "aabb" },
512 { MUA | PCRE_DUPNAMES | PCRE_JAVASCRIPT_COMPAT, 0, "\\k<A>*(?<A>aa)(?<A>bb)", "aabb" },
513 { MUA | PCRE_DUPNAMES, 0, "(?<A>aa)(?<A>bb)\\k<A>{0,3}aaaaaa", "aabbaaaaaa" },
514 { MUA | PCRE_DUPNAMES, 0, "(?<A>aa)(?<A>bb)\\k<A>{2,5}bb", "aabbaaaabb" },
515 { MUA | PCRE_DUPNAMES, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{0,3}m", "aaaaaaaabbbbaabbbbm" },
516 { MUA | PCRE_DUPNAMES, 0 | F_NOMATCH, "\\k<A>{1,3}?(?<A>aa)(?<A>bb)", "aabb" },
517 { MUA | PCRE_DUPNAMES | PCRE_JAVASCRIPT_COMPAT, 0, "\\k<A>{1,3}?(?<A>aa)(?<A>bb)", "aabb" },
518 { MUA | PCRE_DUPNAMES, 0, "\\k<A>*?(?<A>aa)(?<A>bb)", "aabb" },
519 { MUA | PCRE_DUPNAMES, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{0,3}?m", "aaaaaabbbbbbaabbbbbbbbbbm" },
520 { MUA | PCRE_DUPNAMES, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>*?m", "aaaaaabbbbbbaabbbbbbbbbbm" },
521 { MUA | PCRE_DUPNAMES, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{2,3}?", "aaaabbbbaaaabbbbbbbbbb" },
522 { CMUA | PCRE_DUPNAMES, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}M", "aaaaaaaabbbbaabbbbm" },
523 { CMUA | PCRE_DUPNAMES, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{1,3}M", "aaaaaaaabbbbaabbbbm" },
524 { CMUA | PCRE_DUPNAMES, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}?M", "aaaaaabbbbbbaabbbbbbbbbbm" },
525 { CMUA | PCRE_DUPNAMES, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{2,3}?", "aaaabbbbaaaabbbbbbbbbb" },
526
527 /* Assertions. */
528 { MUA, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
529 { MUA, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
530 { MUA, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
531 { MUA, 0, "(?<=aaa|aa|a)a", "aaa" },
532 { MUA, 2, "(?<=aaa|aa|a)a", "aaa" },
533 { MA, 0, "(?<=aaa|aa|a)a", "aaa" },
534 { MA, 2, "(?<=aaa|aa|a)a", "aaa" },
535 { MUA, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
536 { MUA, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
537 { MUA, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
538 { MUA, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
539 { MUA, 0, "((?(?=(a))a)+k)", "bbak" },
540 { MUA, 0, "((?(?=a)a)+k)", "bbak" },
541 { MUA, 0 | F_NOMATCH, "(?=(?>(a))m)amk", "a k" },
542 { MUA, 0 | F_NOMATCH, "(?!(?>(a))m)amk", "a k" },
543 { MUA, 0 | F_NOMATCH, "(?>(?=(a))am)amk", "a k" },
544 { MUA, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
545 { MUA, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
546 { MUA, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
547 { MUA, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
548 { MUA, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
549 { MUA, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
550 { MUA, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
551 { MUA, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
552
553 /* Not empty, ACCEPT, FAIL */
554 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*", "bcx" },
555 { MUA | PCRE_NOTEMPTY, 0, "a*", "bcaad" },
556 { MUA | PCRE_NOTEMPTY, 0, "a*?", "bcaad" },
557 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
558 { MUA, 0, "a(*ACCEPT)b", "ab" },
559 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*(*ACCEPT)b", "bcx" },
560 { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
561 { MUA | PCRE_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
562 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "(?:z|a*(*ACCEPT)b)", "bcx" },
563 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
564 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
565 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
566 { MUA | PCRE_NOTEMPTY_ATSTART, 0 | F_NOMATCH, "a*(*ACCEPT)b", "" },
567 { MUA, 0, "((a(*ACCEPT)b))", "ab" },
568 { MUA, 0, "(a(*FAIL)a|a)", "aaa" },
569 { MUA, 0, "(?=ab(*ACCEPT)b)a", "ab" },
570 { MUA, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
571 { MUA, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
572 { MUA | PCRE_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
573
574 /* Conditional blocks. */
575 { MUA, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
576 { MUA, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
577 { MUA, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
578 { MUA, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
579 { MUA, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
580 { MUA, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
581 { MUA, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
582 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
583 { MUA, 0 | F_DIFF, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
584 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
585 { MUA, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
586 { MUA, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
587 { MUA, 0, "(?(?=a)ab)", "a" },
588 { MUA, 0, "(?(?<!b)c)", "b" },
589 { MUA, 0, "(?(DEFINE)a(b))", "a" },
590 { MUA, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
591 { MUA, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
592 { MUA, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
593 { MUA, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
594 { MUA, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
595 { MUA, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
596 { MUA, 0, "(c)?\?(?(1)a|b)", "cbb" },
597 { MUA, 0 | F_DIFF, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
598 { MUA, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
599 { MUA, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
600 { MUA, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
601 { MUA, 0 | F_DIFF, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
602 { MUA, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
603 { MUA, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
604 { MUA, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
605 { MUA, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
606 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)*l", "bc ddd abccabccl" },
607 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+?dd", "bcabcacdb bdddd" },
608 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+l", "ababccddabdbccd abcccl" },
609 { MUA, 0, "((?:a|aa)(?(1)aaa))x", "aax" },
610
611 /* Set start of match. */
612 { MUA, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
613 { MUA, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
614 { MUA, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
615 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a\\K(*ACCEPT)b", "aa" },
616 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
617
618 /* First line. */
619 { MUA | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}a", "bb\naaa" },
620 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}a", "bb\r\naaa" },
621 { MUA | PCRE_FIRSTLINE, 0, "(?<=a)", "a" },
622 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[^a][^b]", "ab" },
623 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "a", "\na" },
624 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[abc]", "\na" },
625 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^a", "\na" },
626 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^(?<=\n)", "\na" },
627 { MUA | PCRE_FIRSTLINE, 0, "\xf0\x90\x90\x80", "\xf0\x90\x90\x80" },
628 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\xc2\x85#" },
629 { PCRE_MULTILINE | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\x85#" },
630 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^#", "\xe2\x80\xa8#" },
631 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}", "\r\na" },
632 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, ".", "\r" },
633 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "a", "\ra" },
634 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH, "ba", "bbb\r\nba" },
635 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}{4}|a", "\r\na" },
636 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 1, ".", "\r\n" },
637 { PCRE_FIRSTLINE | PCRE_NEWLINE_LF | PCRE_DOTALL, 0 | F_NOMATCH, "ab.", "ab" },
638
639 /* Recurse. */
640 { MUA, 0, "(a)(?1)", "aa" },
641 { MUA, 0, "((a))(?1)", "aa" },
642 { MUA, 0, "(b|a)(?1)", "aa" },
643 { MUA, 0, "(b|(a))(?1)", "aa" },
644 { MUA, 0 | F_NOMATCH, "((a)(b)(?:a*))(?1)", "aba" },
645 { MUA, 0, "((a)(b)(?:a*))(?1)", "abab" },
646 { MUA, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
647 { MUA, 0, "((?2)b|(a)){2}(?1)", "aabab" },
648 { MUA, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
649 { MUA, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
650 { MUA, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
651 { MUA, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
652 { MUA, 0 | F_NOMATCH, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
653 { MUA, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
654 { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
655 { MUA, 0 | F_NOMATCH, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
656 { MUA, 0, "b|<(?R)*>", "<<b>" },
657 { MUA, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
658 { MUA, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
659 { MUA, 0, "(a)((?(R)a|b))(?2)", "aabbabaa" },
660 { MUA, 0, "(a)((?(R2)a|b))(?2)", "aabbabaa" },
661 { MUA, 0, "(a)((?(R1)a|b))(?2)", "ababba" },
662 { MUA, 0, "(?(R0)aa|bb(?R))", "abba aabb bbaa" },
663 { MUA, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" },
664 { MUA, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" },
665 { MUA, 0, "((?(R)a|(?1)){3})", "XaaaaaaaaaX" },
666 { MUA, 0, "((?:(?(R)a|(?1))){3})", "XaaaaaaaaaX" },
667 { MUA, 0, "((?(R)a|(?1)){1,3})aaaaaa", "aaaaaaaaXaaaaaaaaa" },
668 { MUA, 0, "((?(R)a|(?1)){1,3}?)M", "aaaM" },
669
670 /* 16 bit specific tests. */
671 { CMA, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" },
672 { CMA, 0 | F_FORCECONV, "\xe1\xbd\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
673 { CMA, 0 | F_FORCECONV, "[\xc3\xa1]", "\xc3\x81\xc3\xa1" },
674 { CMA, 0 | F_FORCECONV, "[\xe1\xbd\xb8]", "\xe1\xbf\xb8\xe1\xbd\xb8" },
675 { CMA, 0 | F_FORCECONV, "[a-\xed\xb0\x80]", "A" },
676 { CMA, 0 | F_NO8 | F_FORCECONV, "[a-\\x{dc00}]", "B" },
677 { CMA, 0 | F_NO8 | F_NOMATCH | F_FORCECONV, "[b-\\x{dc00}]", "a" },
678 { CMA, 0 | F_NO8 | F_FORCECONV, "\xed\xa0\x80\\x{d800}\xed\xb0\x80\\x{dc00}", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80" },
679 { CMA, 0 | F_NO8 | F_FORCECONV, "[\xed\xa0\x80\\x{d800}]{1,2}?[\xed\xb0\x80\\x{dc00}]{1,2}?#", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80#" },
680 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80\xed\xb0\x80#]{0,3}(?<=\xed\xb0\x80.)", "\xed\xa0\x80#\xed\xa0\x80##\xed\xb0\x80\xed\xa0\x80" },
681 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\x9f\xbf\xed\xa0\x83" },
682 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\xb4\x80\xed\xb3\xb0" },
683 { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" },
684 { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" },
685 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" },
686 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80][\xed\xb0\x80]{2,}", "\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" },
687 { MA, 0 | F_FORCECONV, "[^\xed\xb0\x80]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
688 { MA, 0 | F_NO8 | F_FORCECONV, "[^\\x{dc00}]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
689 { CMA, 0 | F_FORCECONV, ".\\B.", "\xed\xa0\x80\xed\xb0\x80" },
690 { CMA, 0 | F_FORCECONV, "\\D+(?:\\d+|.)\\S+(?:\\s+|.)\\W+(?:\\w+|.)\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80" },
691 { CMA, 0 | F_FORCECONV, "\\d*\\s*\\w*\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80" },
692 { CMA, 0 | F_FORCECONV | F_NOMATCH, "\\d*?\\D*?\\s*?\\S*?\\w*?\\W*?##", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80#" },
693 { CMA | PCRE_EXTENDED, 0 | F_FORCECONV, "\xed\xa0\x80 \xed\xb0\x80 !", "\xed\xa0\x80\xed\xb0\x80!" },
694 { CMA, 0 | F_FORCECONV, "\xed\xa0\x80+#[^#]+\xed\xa0\x80", "\xed\xa0\x80#a\xed\xa0\x80" },
695 { CMA, 0 | F_FORCECONV, "(\xed\xa0\x80+)#\\1", "\xed\xa0\x80\xed\xa0\x80#\xed\xa0\x80\xed\xa0\x80" },
696 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0 | F_NO8 | F_FORCECONV, "^-", "a--\xe2\x80\xa8--" },
697 { PCRE_BSR_UNICODE, 0 | F_NO8 | F_FORCECONV, "\\R", "ab\xe2\x80\xa8" },
698 { 0, 0 | F_NO8 | F_FORCECONV, "\\v", "ab\xe2\x80\xa9" },
699 { 0, 0 | F_NO8 | F_FORCECONV, "\\h", "ab\xe1\xa0\x8e" },
700 { 0, 0 | F_NO8 | F_FORCECONV, "\\v+?\\V+?#", "\xe2\x80\xa9\xe2\x80\xa9\xef\xbf\xbf\xef\xbf\xbf#" },
701 { 0, 0 | F_NO8 | F_FORCECONV, "\\h+?\\H+?#", "\xe1\xa0\x8e\xe1\xa0\x8e\xef\xbf\xbf\xef\xbf\xbf#" },
702
703 /* Partial matching. */
704 { MUA | PCRE_PARTIAL_SOFT, 0, "ab", "a" },
705 { MUA | PCRE_PARTIAL_SOFT, 0, "ab|a", "a" },
706 { MUA | PCRE_PARTIAL_HARD, 0, "ab|a", "a" },
707 { MUA | PCRE_PARTIAL_SOFT, 0, "\\b#", "a" },
708 { MUA | PCRE_PARTIAL_SOFT, 0, "(?<=a)b", "a" },
709 { MUA | PCRE_PARTIAL_SOFT, 0, "abc|(?<=xxa)bc", "xxab" },
710 { MUA | PCRE_PARTIAL_SOFT, 0, "a\\B", "a" },
711 { MUA | PCRE_PARTIAL_HARD, 0, "a\\b", "a" },
712
713 /* (*MARK) verb. */
714 { MUA, 0, "a(*MARK:aa)a", "ababaa" },
715 { MUA, 0 | F_NOMATCH, "a(*:aa)a", "abab" },
716 { MUA, 0, "a(*:aa)(b(*:bb)b|bc)", "abc" },
717 { MUA, 0 | F_NOMATCH, "a(*:1)x|b(*:2)y", "abc" },
718 { MUA, 0, "(?>a(*:aa))b|ac", "ac" },
719 { MUA, 0, "(?(DEFINE)(a(*:aa)))(?1)", "a" },
720 { MUA, 0 | F_NOMATCH, "(?(DEFINE)((a)(*:aa)))(?1)b", "aa" },
721 { MUA, 0, "(?(DEFINE)(a(*:aa)))a(?1)b|aac", "aac" },
722 { MUA, 0, "(a(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
723 { MUA, 0, "(a(*:aa)){0}(?:b(?1)b)+", "babba" },
724 { MUA, 0 | F_NOMATCH | F_STUDY, "(a(*:aa)){0}(?:b(?1)b)+", "ba" },
725 { MUA, 0, "(a\\K(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
726 { MUA, 0, "(a\\K(*:aa)){0}(?:b(?1)b)+", "babba" },
727 { MUA, 0 | F_NOMATCH | F_STUDY, "(a\\K(*:aa)){0}(?:b(?1)b)+", "ba" },
728 { MUA, 0 | F_NOMATCH | F_STUDY, "(*:mark)m", "a" },
729
730 /* (*COMMIT) verb. */
731 { MUA, 0 | F_NOMATCH, "a(*COMMIT)b", "ac" },
732 { MUA, 0, "aa(*COMMIT)b", "xaxaab" },
733 { MUA, 0 | F_NOMATCH, "a(*COMMIT)(*:msg)b|ac", "ac" },
734 { MUA, 0 | F_NOMATCH, "(a(*COMMIT)b)++", "abac" },
735 { MUA, 0 | F_NOMATCH, "((a)(*COMMIT)b)++", "abac" },
736 { MUA, 0 | F_NOMATCH, "(?=a(*COMMIT)b)ab|ad", "ad" },
737
738 /* (*PRUNE) verb. */
739 { MUA, 0, "aa\\K(*PRUNE)b", "aaab" },
740 { MUA, 0, "aa(*PRUNE:bb)b|a", "aa" },
741 { MUA, 0, "(a)(a)(*PRUNE)b|(a)", "aa" },
742 { MUA, 0, "(a)(a)(a)(a)(a)(a)(a)(a)(*PRUNE)b|(a)", "aaaaaaaa" },
743 { MUA | PCRE_PARTIAL_SOFT, 0, "a(*PRUNE)a|", "a" },
744 { MUA | PCRE_PARTIAL_SOFT, 0, "a(*PRUNE)a|m", "a" },
745 { MUA, 0 | F_NOMATCH, "(?=a(*PRUNE)b)ab|ad", "ad" },
746 { MUA, 0, "a(*COMMIT)(*PRUNE)d|bc", "abc" },
747 { MUA, 0, "(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
748 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
749 { MUA, 0, "(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
750 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
751 { MUA, 0, "(a(*COMMIT)b){0}a(?1)(*PRUNE)c|bc", "abc" },
752 { MUA, 0 | F_NOMATCH, "(a(*COMMIT)b){0}a(*COMMIT)(?1)(*PRUNE)c|bc", "abc" },
753 { MUA, 0, "(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
754 { MUA, 0 | F_NOMATCH, "(*COMMIT)(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
755 { MUA, 0, "((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
756 { MUA, 0 | F_NOMATCH, "(*COMMIT)((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
757 { MUA, 0, "(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" },
758 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" },
759 { MUA, 0, "(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" },
760 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" },
761 { MUA, 0, "(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" },
762 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" },
763 { MUA, 0, "(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" },
764 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" },
765 { MUA, 0, "(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" },
766 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" },
767 { MUA, 0, "(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" },
768 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" },
769
770 /* (*SKIP) verb. */
771 { MUA, 0 | F_NOMATCH, "(?=a(*SKIP)b)ab|ad", "ad" },
772
773 /* (*THEN) verb. */
774 { MUA, 0, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcaabcaabcaabcnacm" },
775 { MUA, 0 | F_NOMATCH, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcm" },
776 { MUA, 0, "((?:a(*THEN)|aab)c|a+)+m", "aabcaabcnmaabcaabcm" },
777 { MUA, 0, "((?:a|aab)(*THEN)c|a+)+m", "aam" },
778 { MUA, 0, "((?:a(*COMMIT)|aab)(*THEN)c|a+)+m", "aam" },
779 { MUA, 0, "(?(?=a(*THEN)b)ab|ad)", "ad" },
780 { MUA, 0, "(?(?!a(*THEN)b)ad|add)", "add" },
781 { MUA, 0 | F_NOMATCH, "(?(?=a)a(*THEN)b|ad)", "ad" },
782 { MUA, 0, "(?!(?(?=a)ab|b(*THEN)d))bn|bnn", "bnn" },
783
784 /* Deep recursion. */
785 { MUA, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
786 { MUA, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
787 { MUA, 0, "((a?)+)+b", "aaaaaaaaaaaa b" },
788
789 /* Deep recursion: Stack limit reached. */
790 { MA, 0 | F_NOMATCH, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
791 { MA, 0 | F_NOMATCH, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
792 { MA, 0 | F_NOMATCH, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
793 { MA, 0 | F_NOMATCH, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
794 { MA, 0 | F_NOMATCH, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
795
796 { 0, 0, NULL, NULL }
797 };
798
799 static const unsigned char *tables(int mode)
800 {
801 /* The purpose of this function to allow valgrind
802 for reporting invalid reads and writes. */
803 static unsigned char *tables_copy;
804 const char *errorptr;
805 int erroroffset;
806 unsigned char *default_tables;
807 #if defined SUPPORT_PCRE8
808 pcre *regex;
809 char null_str[1] = { 0 };
810 #elif defined SUPPORT_PCRE16
811 pcre16 *regex;
812 PCRE_UCHAR16 null_str[1] = { 0 };
813 #elif defined SUPPORT_PCRE32
814 pcre32 *regex;
815 PCRE_UCHAR32 null_str[1] = { 0 };
816 #endif
817
818 if (mode) {
819 if (tables_copy)
820 free(tables_copy);
821 tables_copy = NULL;
822 return NULL;
823 }
824
825 if (tables_copy)
826 return tables_copy;
827
828 default_tables = NULL;
829 #if defined SUPPORT_PCRE8
830 regex = pcre_compile(null_str, 0, &errorptr, &erroroffset, NULL);
831 if (regex) {
832 pcre_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
833 pcre_free(regex);
834 }
835 #elif defined SUPPORT_PCRE16
836 regex = pcre16_compile(null_str, 0, &errorptr, &erroroffset, NULL);
837 if (regex) {
838 pcre16_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
839 pcre16_free(regex);
840 }
841 #elif defined SUPPORT_PCRE32
842 regex = pcre32_compile(null_str, 0, &errorptr, &erroroffset, NULL);
843 if (regex) {
844 pcre32_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
845 pcre32_free(regex);
846 }
847 #endif
848 /* Shouldn't ever happen. */
849 if (!default_tables)
850 return NULL;
851
852 /* Unfortunately this value cannot get from pcre_fullinfo.
853 Since this is a test program, this is acceptable at the moment. */
854 tables_copy = (unsigned char *)malloc(1088);
855 if (!tables_copy)
856 return NULL;
857
858 memcpy(tables_copy, default_tables, 1088);
859 return tables_copy;
860 }
861
862 #ifdef SUPPORT_PCRE8
863 static pcre_jit_stack* callback8(void *arg)
864 {
865 return (pcre_jit_stack *)arg;
866 }
867 #endif
868
869 #ifdef SUPPORT_PCRE16
870 static pcre16_jit_stack* callback16(void *arg)
871 {
872 return (pcre16_jit_stack *)arg;
873 }
874 #endif
875
876 #ifdef SUPPORT_PCRE32
877 static pcre32_jit_stack* callback32(void *arg)
878 {
879 return (pcre32_jit_stack *)arg;
880 }
881 #endif
882
883 #ifdef SUPPORT_PCRE8
884 static pcre_jit_stack *stack8;
885
886 static pcre_jit_stack *getstack8(void)
887 {
888 if (!stack8)
889 stack8 = pcre_jit_stack_alloc(1, 1024 * 1024);
890 return stack8;
891 }
892
893 static void setstack8(pcre_extra *extra)
894 {
895 if (!extra) {
896 if (stack8)
897 pcre_jit_stack_free(stack8);
898 stack8 = NULL;
899 return;
900 }
901
902 pcre_assign_jit_stack(extra, callback8, getstack8());
903 }
904 #endif /* SUPPORT_PCRE8 */
905
906 #ifdef SUPPORT_PCRE16
907 static pcre16_jit_stack *stack16;
908
909 static pcre16_jit_stack *getstack16(void)
910 {
911 if (!stack16)
912 stack16 = pcre16_jit_stack_alloc(1, 1024 * 1024);
913 return stack16;
914 }
915
916 static void setstack16(pcre16_extra *extra)
917 {
918 if (!extra) {
919 if (stack16)
920 pcre16_jit_stack_free(stack16);
921 stack16 = NULL;
922 return;
923 }
924
925 pcre16_assign_jit_stack(extra, callback16, getstack16());
926 }
927 #endif /* SUPPORT_PCRE8 */
928
929 #ifdef SUPPORT_PCRE32
930 static pcre32_jit_stack *stack32;
931
932 static pcre32_jit_stack *getstack32(void)
933 {
934 if (!stack32)
935 stack32 = pcre32_jit_stack_alloc(1, 1024 * 1024);
936 return stack32;
937 }
938
939 static void setstack32(pcre32_extra *extra)
940 {
941 if (!extra) {
942 if (stack32)
943 pcre32_jit_stack_free(stack32);
944 stack32 = NULL;
945 return;
946 }
947
948 pcre32_assign_jit_stack(extra, callback32, getstack32());
949 }
950 #endif /* SUPPORT_PCRE8 */
951
952 #ifdef SUPPORT_PCRE16
953
954 static int convert_utf8_to_utf16(const char *input, PCRE_UCHAR16 *output, int *offsetmap, int max_length)
955 {
956 unsigned char *iptr = (unsigned char*)input;
957 PCRE_UCHAR16 *optr = output;
958 unsigned int c;
959
960 if (max_length == 0)
961 return 0;
962
963 while (*iptr && max_length > 1) {
964 c = 0;
965 if (offsetmap)
966 *offsetmap++ = (int)(iptr - (unsigned char*)input);
967
968 if (!(*iptr & 0x80))
969 c = *iptr++;
970 else if (!(*iptr & 0x20)) {
971 c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
972 iptr += 2;
973 } else if (!(*iptr & 0x10)) {
974 c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
975 iptr += 3;
976 } else if (!(*iptr & 0x08)) {
977 c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
978 iptr += 4;
979 }
980
981 if (c < 65536) {
982 *optr++ = c;
983 max_length--;
984 } else if (max_length <= 2) {
985 *optr = '\0';
986 return (int)(optr - output);
987 } else {
988 c -= 0x10000;
989 *optr++ = 0xd800 | ((c >> 10) & 0x3ff);
990 *optr++ = 0xdc00 | (c & 0x3ff);
991 max_length -= 2;
992 if (offsetmap)
993 offsetmap++;
994 }
995 }
996 if (offsetmap)
997 *offsetmap = (int)(iptr - (unsigned char*)input);
998 *optr = '\0';
999 return (int)(optr - output);
1000 }
1001
1002 static int copy_char8_to_char16(const char *input, PCRE_UCHAR16 *output, int max_length)
1003 {
1004 unsigned char *iptr = (unsigned char*)input;
1005 PCRE_UCHAR16 *optr = output;
1006
1007 if (max_length == 0)
1008 return 0;
1009
1010 while (*iptr && max_length > 1) {
1011 *optr++ = *iptr++;
1012 max_length--;
1013 }
1014 *optr = '\0';
1015 return (int)(optr - output);
1016 }
1017
1018 #define REGTEST_MAX_LENGTH16 4096
1019 static PCRE_UCHAR16 regtest_buf16[REGTEST_MAX_LENGTH16];
1020 static int regtest_offsetmap16[REGTEST_MAX_LENGTH16];
1021
1022 #endif /* SUPPORT_PCRE16 */
1023
1024 #ifdef SUPPORT_PCRE32
1025
1026 static int convert_utf8_to_utf32(const char *input, PCRE_UCHAR32 *output, int *offsetmap, int max_length)
1027 {
1028 unsigned char *iptr = (unsigned char*)input;
1029 PCRE_UCHAR32 *optr = output;
1030 unsigned int c;
1031
1032 if (max_length == 0)
1033 return 0;
1034
1035 while (*iptr && max_length > 1) {
1036 c = 0;
1037 if (offsetmap)
1038 *offsetmap++ = (int)(iptr - (unsigned char*)input);
1039
1040 if (!(*iptr & 0x80))
1041 c = *iptr++;
1042 else if (!(*iptr & 0x20)) {
1043 c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
1044 iptr += 2;
1045 } else if (!(*iptr & 0x10)) {
1046 c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
1047 iptr += 3;
1048 } else if (!(*iptr & 0x08)) {
1049 c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
1050 iptr += 4;
1051 }
1052
1053 *optr++ = c;
1054 max_length--;
1055 }
1056 if (offsetmap)
1057 *offsetmap = (int)(iptr - (unsigned char*)input);
1058 *optr = 0;
1059 return (int)(optr - output);
1060 }
1061
1062 static int copy_char8_to_char32(const char *input, PCRE_UCHAR32 *output, int max_length)
1063 {
1064 unsigned char *iptr = (unsigned char*)input;
1065 PCRE_UCHAR32 *optr = output;
1066
1067 if (max_length == 0)
1068 return 0;
1069
1070 while (*iptr && max_length > 1) {
1071 *optr++ = *iptr++;
1072 max_length--;
1073 }
1074 *optr = '\0';
1075 return (int)(optr - output);
1076 }
1077
1078 #define REGTEST_MAX_LENGTH32 4096
1079 static PCRE_UCHAR32 regtest_buf32[REGTEST_MAX_LENGTH32];
1080 static int regtest_offsetmap32[REGTEST_MAX_LENGTH32];
1081
1082 #endif /* SUPPORT_PCRE32 */
1083
1084 static int check_ascii(const char *input)
1085 {
1086 const unsigned char *ptr = (unsigned char *)input;
1087 while (*ptr) {
1088 if (*ptr > 127)
1089 return 0;
1090 ptr++;
1091 }
1092 return 1;
1093 }
1094
1095 static int regression_tests(void)
1096 {
1097 struct regression_test_case *current = regression_test_cases;
1098 const char *error;
1099 char *cpu_info;
1100 int i, err_offs;
1101 int is_successful, is_ascii_pattern, is_ascii_input;
1102 int total = 0;
1103 int successful = 0;
1104 int successful_row = 0;
1105 int counter = 0;
1106 int study_mode;
1107 int utf = 0, ucp = 0;
1108 int disabled_flags = 0;
1109 #ifdef SUPPORT_PCRE8
1110 pcre *re8;
1111 pcre_extra *extra8;
1112 pcre_extra dummy_extra8;
1113 int ovector8_1[32];
1114 int ovector8_2[32];
1115 int return_value8[2];
1116 unsigned char *mark8_1, *mark8_2;
1117 #endif
1118 #ifdef SUPPORT_PCRE16
1119 pcre16 *re16;
1120 pcre16_extra *extra16;
1121 pcre16_extra dummy_extra16;
1122 int ovector16_1[32];
1123 int ovector16_2[32];
1124 int return_value16[2];
1125 PCRE_UCHAR16 *mark16_1, *mark16_2;
1126 int length16;
1127 #endif
1128 #ifdef SUPPORT_PCRE32
1129 pcre32 *re32;
1130 pcre32_extra *extra32;
1131 pcre32_extra dummy_extra32;
1132 int ovector32_1[32];
1133 int ovector32_2[32];
1134 int return_value32[2];
1135 PCRE_UCHAR32 *mark32_1, *mark32_2;
1136 int length32;
1137 #endif
1138
1139 /* This test compares the behaviour of interpreter and JIT. Although disabling
1140 utf or ucp may make tests fail, if the pcre_exec result is the SAME, it is
1141 still considered successful from pcre_jit_test point of view. */
1142
1143 #if defined SUPPORT_PCRE8
1144 pcre_config(PCRE_CONFIG_JITTARGET, &cpu_info);
1145 #elif defined SUPPORT_PCRE16
1146 pcre16_config(PCRE_CONFIG_JITTARGET, &cpu_info);
1147 #elif defined SUPPORT_PCRE32
1148 pcre32_config(PCRE_CONFIG_JITTARGET, &cpu_info);
1149 #endif
1150
1151 printf("Running JIT regression tests\n");
1152 printf(" target CPU of SLJIT compiler: %s\n", cpu_info);
1153
1154 #if defined SUPPORT_PCRE8
1155 pcre_config(PCRE_CONFIG_UTF8, &utf);
1156 pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
1157 #elif defined SUPPORT_PCRE16
1158 pcre16_config(PCRE_CONFIG_UTF16, &utf);
1159 pcre16_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
1160 #elif defined SUPPORT_PCRE16
1161 pcre32_config(PCRE_CONFIG_UTF32, &utf);
1162 pcre32_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
1163 #endif
1164
1165 if (!utf)
1166 disabled_flags |= PCRE_UTF8 | PCRE_UTF16 | PCRE_UTF32;
1167 if (!ucp)
1168 disabled_flags |= PCRE_UCP;
1169 #ifdef SUPPORT_PCRE8
1170 printf(" in 8 bit mode with UTF-8 %s and ucp %s:\n", utf ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
1171 #endif
1172 #ifdef SUPPORT_PCRE16
1173 printf(" in 16 bit mode with UTF-16 %s and ucp %s:\n", utf ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
1174 #endif
1175 #ifdef SUPPORT_PCRE32
1176 printf(" in 32 bit mode with UTF-32 %s and ucp %s:\n", utf ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
1177 #endif
1178
1179 while (current->pattern) {
1180 /* printf("\nPattern: %s :\n", current->pattern); */
1181 total++;
1182 if (current->start_offset & F_PROPERTY) {
1183 is_ascii_pattern = 0;
1184 is_ascii_input = 0;
1185 } else {
1186 is_ascii_pattern = check_ascii(current->pattern);
1187 is_ascii_input = check_ascii(current->input);
1188 }
1189
1190 if (current->flags & PCRE_PARTIAL_SOFT)
1191 study_mode = PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE;
1192 else if (current->flags & PCRE_PARTIAL_HARD)
1193 study_mode = PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE;
1194 else
1195 study_mode = PCRE_STUDY_JIT_COMPILE;
1196 error = NULL;
1197 #ifdef SUPPORT_PCRE8
1198 re8 = NULL;
1199 if (!(current->start_offset & F_NO8))
1200 re8 = pcre_compile(current->pattern,
1201 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags),
1202 &error, &err_offs, tables(0));
1203
1204 extra8 = NULL;
1205 if (re8) {
1206 error = NULL;
1207 extra8 = pcre_study(re8, study_mode, &error);
1208 if (!extra8) {
1209 printf("\n8 bit: Cannot study pattern: %s\n", current->pattern);
1210 pcre_free(re8);
1211 re8 = NULL;
1212 }
1213 else if (!(extra8->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
1214 printf("\n8 bit: JIT compiler does not support: %s\n", current->pattern);
1215 pcre_free_study(extra8);
1216 pcre_free(re8);
1217 re8 = NULL;
1218 }
1219 extra8->flags |= PCRE_EXTRA_MARK;
1220 } else if (((utf && ucp) || is_ascii_pattern) && !(current->start_offset & F_NO8))
1221 printf("\n8 bit: Cannot compile pattern \"%s\": %s\n", current->pattern, error);
1222 #endif
1223 #ifdef SUPPORT_PCRE16
1224 if ((current->flags & PCRE_UTF16) || (current->start_offset & F_FORCECONV))
1225 convert_utf8_to_utf16(current->pattern, regtest_buf16, NULL, REGTEST_MAX_LENGTH16);
1226 else
1227 copy_char8_to_char16(current->pattern, regtest_buf16, REGTEST_MAX_LENGTH16);
1228
1229 re16 = NULL;
1230 if (!(current->start_offset & F_NO16))
1231 re16 = pcre16_compile(regtest_buf16,
1232 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags),
1233 &error, &err_offs, tables(0));
1234
1235 extra16 = NULL;
1236 if (re16) {
1237 error = NULL;
1238 extra16 = pcre16_study(re16, study_mode, &error);
1239 if (!extra16) {
1240 printf("\n16 bit: Cannot study pattern: %s\n", current->pattern);
1241 pcre16_free(re16);
1242 re16 = NULL;
1243 }
1244 else if (!(extra16->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
1245 printf("\n16 bit: JIT compiler does not support: %s\n", current->pattern);
1246 pcre16_free_study(extra16);
1247 pcre16_free(re16);
1248 re16 = NULL;
1249 }
1250 extra16->flags |= PCRE_EXTRA_MARK;
1251 } else if (((utf && ucp) || is_ascii_pattern) && !(current->start_offset & F_NO16))
1252 printf("\n16 bit: Cannot compile pattern \"%s\": %s\n", current->pattern, error);
1253 #endif
1254 #ifdef SUPPORT_PCRE32
1255 if ((current->flags & PCRE_UTF32) || (current->start_offset & F_FORCECONV))
1256 convert_utf8_to_utf32(current->pattern, regtest_buf32, NULL, REGTEST_MAX_LENGTH32);
1257 else
1258 copy_char8_to_char32(current->pattern, regtest_buf32, REGTEST_MAX_LENGTH32);
1259
1260 re32 = NULL;
1261 if (!(current->start_offset & F_NO32))
1262 re32 = pcre32_compile(regtest_buf32,
1263 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags),
1264 &error, &err_offs, tables(0));
1265
1266 extra32 = NULL;
1267 if (re32) {
1268 error = NULL;
1269 extra32 = pcre32_study(re32, study_mode, &error);
1270 if (!extra32) {
1271 printf("\n32 bit: Cannot study pattern: %s\n", current->pattern);
1272 pcre32_free(re32);
1273 re32 = NULL;
1274 }
1275 if (!(extra32->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
1276 printf("\n32 bit: JIT compiler does not support: %s\n", current->pattern);
1277 pcre32_free_study(extra32);
1278 pcre32_free(re32);
1279 re32 = NULL;
1280 }
1281 extra32->flags |= PCRE_EXTRA_MARK;
1282 } else if (((utf && ucp) || is_ascii_pattern) && !(current->start_offset & F_NO32))
1283 printf("\n32 bit: Cannot compile pattern \"%s\": %s\n", current->pattern, error);
1284 #endif
1285
1286 counter++;
1287 if ((counter & 0x3) != 0) {
1288 #ifdef SUPPORT_PCRE8
1289 setstack8(NULL);
1290 #endif
1291 #ifdef SUPPORT_PCRE16
1292 setstack16(NULL);
1293 #endif
1294 #ifdef SUPPORT_PCRE32
1295 setstack32(NULL);
1296 #endif
1297 }
1298
1299 #ifdef SUPPORT_PCRE8
1300 return_value8[0] = -1000;
1301 return_value8[1] = -1000;
1302 for (i = 0; i < 32; ++i)
1303 ovector8_1[i] = -2;
1304 for (i = 0; i < 32; ++i)
1305 ovector8_2[i] = -2;
1306 if (re8) {
1307 mark8_1 = NULL;
1308 mark8_2 = NULL;
1309 extra8->mark = &mark8_1;
1310
1311 if ((counter & 0x1) != 0) {
1312 setstack8(extra8);
1313 return_value8[0] = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1314 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_1, 32);
1315 } else
1316 return_value8[0] = pcre_jit_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1317 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_1, 32, getstack8());
1318 memset(&dummy_extra8, 0, sizeof(pcre_extra));
1319 dummy_extra8.flags = PCRE_EXTRA_MARK;
1320 if (current->start_offset & F_STUDY) {
1321 dummy_extra8.flags |= PCRE_EXTRA_STUDY_DATA;
1322 dummy_extra8.study_data = extra8->study_data;
1323 }
1324 dummy_extra8.mark = &mark8_2;
1325 return_value8[1] = pcre_exec(re8, &dummy_extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1326 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_2, 32);
1327 }
1328 #endif
1329
1330 #ifdef SUPPORT_PCRE16
1331 return_value16[0] = -1000;
1332 return_value16[1] = -1000;
1333 for (i = 0; i < 32; ++i)
1334 ovector16_1[i] = -2;
1335 for (i = 0; i < 32; ++i)
1336 ovector16_2[i] = -2;
1337 if (re16) {
1338 mark16_1 = NULL;
1339 mark16_2 = NULL;
1340 if ((current->flags & PCRE_UTF16) || (current->start_offset & F_FORCECONV))
1341 length16 = convert_utf8_to_utf16(current->input, regtest_buf16, regtest_offsetmap16, REGTEST_MAX_LENGTH16);
1342 else
1343 length16 = copy_char8_to_char16(current->input, regtest_buf16, REGTEST_MAX_LENGTH16);
1344 extra16->mark = &mark16_1;
1345 if ((counter & 0x1) != 0) {
1346 setstack16(extra16);
1347 return_value16[0] = pcre16_exec(re16, extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
1348 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_1, 32);
1349 } else
1350 return_value16[0] = pcre16_jit_exec(re16, extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
1351 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_1, 32, getstack16());
1352 memset(&dummy_extra16, 0, sizeof(pcre16_extra));
1353 dummy_extra16.flags = PCRE_EXTRA_MARK;
1354 if (current->start_offset & F_STUDY) {
1355 dummy_extra16.flags |= PCRE_EXTRA_STUDY_DATA;
1356 dummy_extra16.study_data = extra16->study_data;
1357 }
1358 dummy_extra16.mark = &mark16_2;
1359 return_value16[1] = pcre16_exec(re16, &dummy_extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
1360 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_2, 32);
1361 }
1362 #endif
1363
1364 #ifdef SUPPORT_PCRE32
1365 return_value32[0] = -1000;
1366 return_value32[1] = -1000;
1367 for (i = 0; i < 32; ++i)
1368 ovector32_1[i] = -2;
1369 for (i = 0; i < 32; ++i)
1370 ovector32_2[i] = -2;
1371 if (re32) {
1372 mark32_1 = NULL;
1373 mark32_2 = NULL;
1374 if ((current->flags & PCRE_UTF32) || (current->start_offset & F_FORCECONV))
1375 length32 = convert_utf8_to_utf32(current->input, regtest_buf32, regtest_offsetmap32, REGTEST_MAX_LENGTH32);
1376 else
1377 length32 = copy_char8_to_char32(current->input, regtest_buf32, REGTEST_MAX_LENGTH32);
1378 extra32->mark = &mark32_1;
1379 if ((counter & 0x1) != 0) {
1380 setstack32(extra32);
1381 return_value32[0] = pcre32_exec(re32, extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
1382 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector32_1, 32);
1383 } else
1384 return_value32[0] = pcre32_jit_exec(re32, extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
1385 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector32_1, 32, getstack32());
1386 memset(&dummy_extra32, 0, sizeof(pcre32_extra));
1387 dummy_extra32.flags = PCRE_EXTRA_MARK;
1388 if (current->start_offset & F_STUDY) {
1389 dummy_extra32.flags |= PCRE_EXTRA_STUDY_DATA;
1390 dummy_extra32.study_data = extra32->study_data;
1391 }
1392 dummy_extra32.mark = &mark32_2;
1393 return_value32[1] = pcre32_exec(re32, &dummy_extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
1394 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector32_2, 32);
1395 }
1396 #endif
1397
1398 /* printf("[%d-%d-%d|%d-%d|%d-%d|%d-%d]%s",
1399 return_value8[0], return_value16[0], return_value32[0],
1400 ovector8_1[0], ovector8_1[1],
1401 ovector16_1[0], ovector16_1[1],
1402 ovector32_1[0], ovector32_1[1],
1403 (current->flags & PCRE_CASELESS) ? "C" : ""); */
1404
1405 /* If F_DIFF is set, just run the test, but do not compare the results.
1406 Segfaults can still be captured. */
1407
1408 is_successful = 1;
1409 if (!(current->start_offset & F_DIFF)) {
1410 #if defined SUPPORT_UTF && ((defined(SUPPORT_PCRE8) + defined(SUPPORT_PCRE16) + defined(SUPPORT_PCRE32)) >= 2)
1411 if (!(current->start_offset & F_FORCECONV)) {
1412 int return_value;
1413
1414 /* All results must be the same. */
1415 #ifdef SUPPORT_PCRE8
1416 if ((return_value = return_value8[0]) != return_value8[1]) {
1417 printf("\n8 bit: Return value differs(J8:%d,I8:%d): [%d] '%s' @ '%s'\n",
1418 return_value8[0], return_value8[1], total, current->pattern, current->input);
1419 is_successful = 0;
1420 } else
1421 #endif
1422 #ifdef SUPPORT_PCRE16
1423 if ((return_value = return_value16[0]) != return_value16[1]) {
1424 printf("\n16 bit: Return value differs(J16:%d,I16:%d): [%d] '%s' @ '%s'\n",
1425 return_value16[0], return_value16[1], total, current->pattern, current->input);
1426 is_successful = 0;
1427 } else
1428 #endif
1429 #ifdef SUPPORT_PCRE32
1430 if ((return_value = return_value32[0]) != return_value32[1]) {
1431 printf("\n32 bit: Return value differs(J32:%d,I32:%d): [%d] '%s' @ '%s'\n",
1432 return_value32[0], return_value32[1], total, current->pattern, current->input);
1433 is_successful = 0;
1434 } else
1435 #endif
1436 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1437 if (return_value8[0] != return_value16[0]) {
1438 printf("\n8 and 16 bit: Return value differs(J8:%d,J16:%d): [%d] '%s' @ '%s'\n",
1439 return_value8[0], return_value16[0],
1440 total, current->pattern, current->input);
1441 is_successful = 0;
1442 } else
1443 #endif
1444 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE32
1445 if (return_value8[0] != return_value32[0]) {
1446 printf("\n8 and 32 bit: Return value differs(J8:%d,J32:%d): [%d] '%s' @ '%s'\n",
1447 return_value8[0], return_value32[0],
1448 total, current->pattern, current->input);
1449 is_successful = 0;
1450 } else
1451 #endif
1452 #if defined SUPPORT_PCRE16 && defined SUPPORT_PCRE32
1453 if (return_value16[0] != return_value32[0]) {
1454 printf("\n16 and 32 bit: Return value differs(J16:%d,J32:%d): [%d] '%s' @ '%s'\n",
1455 return_value16[0], return_value32[0],
1456 total, current->pattern, current->input);
1457 is_successful = 0;
1458 } else
1459 #endif
1460 if (return_value >= 0 || return_value == PCRE_ERROR_PARTIAL) {
1461 if (return_value == PCRE_ERROR_PARTIAL) {
1462 return_value = 2;
1463 } else {
1464 return_value *= 2;
1465 }
1466 #ifdef SUPPORT_PCRE8
1467 return_value8[0] = return_value;
1468 #endif
1469 #ifdef SUPPORT_PCRE16
1470 return_value16[0] = return_value;
1471 #endif
1472 #ifdef SUPPORT_PCRE32
1473 return_value32[0] = return_value;
1474 #endif
1475 /* Transform back the results. */
1476 if (current->flags & PCRE_UTF8) {
1477 #ifdef SUPPORT_PCRE16
1478 for (i = 0; i < return_value; ++i) {
1479 if (ovector16_1[i] >= 0)
1480 ovector16_1[i] = regtest_offsetmap16[ovector16_1[i]];
1481 if (ovector16_2[i] >= 0)
1482 ovector16_2[i] = regtest_offsetmap16[ovector16_2[i]];
1483 }
1484 #endif
1485 #ifdef SUPPORT_PCRE32
1486 for (i = 0; i < return_value; ++i) {
1487 if (ovector32_1[i] >= 0)
1488 ovector32_1[i] = regtest_offsetmap32[ovector32_1[i]];
1489 if (ovector32_2[i] >= 0)
1490 ovector32_2[i] = regtest_offsetmap32[ovector32_2[i]];
1491 }
1492 #endif
1493 }
1494
1495 for (i = 0; i < return_value; ++i) {
1496 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1497 if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
1498 printf("\n8 and 16 bit: Ovector[%d] value differs(J8:%d,I8:%d,J16:%d,I16:%d): [%d] '%s' @ '%s' \n",
1499 i, ovector8_1[i], ovector8_2[i], ovector16_1[i], ovector16_2[i],
1500 total, current->pattern, current->input);
1501 is_successful = 0;
1502 }
1503 #endif
1504 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE32
1505 if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector32_1[i] || ovector8_1[i] != ovector32_2[i]) {
1506 printf("\n8 and 32 bit: Ovector[%d] value differs(J8:%d,I8:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
1507 i, ovector8_1[i], ovector8_2[i], ovector32_1[i], ovector32_2[i],
1508 total, current->pattern, current->input);
1509 is_successful = 0;
1510 }
1511 #endif
1512 #if defined SUPPORT_PCRE16 && defined SUPPORT_PCRE16
1513 if (ovector16_1[i] != ovector16_2[i] || ovector16_1[i] != ovector16_1[i] || ovector16_1[i] != ovector16_2[i]) {
1514 printf("\n16 and 16 bit: Ovector[%d] value differs(J16:%d,I16:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
1515 i, ovector16_1[i], ovector16_2[i], ovector16_1[i], ovector16_2[i],
1516 total, current->pattern, current->input);
1517 is_successful = 0;
1518 }
1519 #endif
1520 }
1521 }
1522 } else
1523 #endif /* more than one of SUPPORT_PCRE8, SUPPORT_PCRE16 and SUPPORT_PCRE32 */
1524 {
1525 /* Only the 8 bit and 16 bit results must be equal. */
1526 #ifdef SUPPORT_PCRE8
1527 if (return_value8[0] != return_value8[1]) {
1528 printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1529 return_value8[0], return_value8[1], total, current->pattern, current->input);
1530 is_successful = 0;
1531 } else if (return_value8[0] >= 0 || return_value8[0] == PCRE_ERROR_PARTIAL) {
1532 if (return_value8[0] == PCRE_ERROR_PARTIAL)
1533 return_value8[0] = 2;
1534 else
1535 return_value8[0] *= 2;
1536
1537 for (i = 0; i < return_value8[0]; ++i)
1538 if (ovector8_1[i] != ovector8_2[i]) {
1539 printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1540 i, ovector8_1[i], ovector8_2[i], total, current->pattern, current->input);
1541 is_successful = 0;
1542 }
1543 }
1544 #endif
1545
1546 #ifdef SUPPORT_PCRE16
1547 if (return_value16[0] != return_value16[1]) {
1548 printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1549 return_value16[0], return_value16[1], total, current->pattern, current->input);
1550 is_successful = 0;
1551 } else if (return_value16[0] >= 0 || return_value16[0] == PCRE_ERROR_PARTIAL) {
1552 if (return_value16[0] == PCRE_ERROR_PARTIAL)
1553 return_value16[0] = 2;
1554 else
1555 return_value16[0] *= 2;
1556
1557 for (i = 0; i < return_value16[0]; ++i)
1558 if (ovector16_1[i] != ovector16_2[i]) {
1559 printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1560 i, ovector16_1[i], ovector16_2[i], total, current->pattern, current->input);
1561 is_successful = 0;
1562 }
1563 }
1564 #endif
1565
1566 #ifdef SUPPORT_PCRE32
1567 if (return_value32[0] != return_value32[1]) {
1568 printf("\n32 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1569 return_value32[0], return_value32[1], total, current->pattern, current->input);
1570 is_successful = 0;
1571 } else if (return_value32[0] >= 0 || return_value32[0] == PCRE_ERROR_PARTIAL) {
1572 if (return_value32[0] == PCRE_ERROR_PARTIAL)
1573 return_value32[0] = 2;
1574 else
1575 return_value32[0] *= 2;
1576
1577 for (i = 0; i < return_value32[0]; ++i)
1578 if (ovector32_1[i] != ovector32_2[i]) {
1579 printf("\n32 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1580 i, ovector32_1[i], ovector32_2[i], total, current->pattern, current->input);
1581 is_successful = 0;
1582 }
1583 }
1584 #endif
1585 }
1586 }
1587
1588 if (is_successful) {
1589 #ifdef SUPPORT_PCRE8
1590 if (!(current->start_offset & F_NO8) && ((utf && ucp) || is_ascii_input)) {
1591 if (return_value8[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1592 printf("8 bit: Test should match: [%d] '%s' @ '%s'\n",
1593 total, current->pattern, current->input);
1594 is_successful = 0;
1595 }
1596
1597 if (return_value8[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1598 printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n",
1599 total, current->pattern, current->input);
1600 is_successful = 0;
1601 }
1602 }
1603 #endif
1604 #ifdef SUPPORT_PCRE16
1605 if (!(current->start_offset & F_NO16) && ((utf && ucp) || is_ascii_input)) {
1606 if (return_value16[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1607 printf("16 bit: Test should match: [%d] '%s' @ '%s'\n",
1608 total, current->pattern, current->input);
1609 is_successful = 0;
1610 }
1611
1612 if (return_value16[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1613 printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n",
1614 total, current->pattern, current->input);
1615 is_successful = 0;
1616 }
1617 }
1618 #endif
1619 #ifdef SUPPORT_PCRE32
1620 if (!(current->start_offset & F_NO32) && ((utf && ucp) || is_ascii_input)) {
1621 if (return_value32[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1622 printf("32 bit: Test should match: [%d] '%s' @ '%s'\n",
1623 total, current->pattern, current->input);
1624 is_successful = 0;
1625 }
1626
1627 if (return_value32[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1628 printf("32 bit: Test should not match: [%d] '%s' @ '%s'\n",
1629 total, current->pattern, current->input);
1630 is_successful = 0;
1631 }
1632 }
1633 #endif
1634 }
1635
1636 if (is_successful) {
1637 #ifdef SUPPORT_PCRE8
1638 if (mark8_1 != mark8_2) {
1639 printf("8 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1640 total, current->pattern, current->input);
1641 is_successful = 0;
1642 }
1643 #endif
1644 #ifdef SUPPORT_PCRE16
1645 if (mark16_1 != mark16_2) {
1646 printf("16 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1647 total, current->pattern, current->input);
1648 is_successful = 0;
1649 }
1650 #endif
1651 #ifdef SUPPORT_PCRE32
1652 if (mark32_1 != mark32_2) {
1653 printf("32 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1654 total, current->pattern, current->input);
1655 is_successful = 0;
1656 }
1657 #endif
1658 }
1659
1660 #ifdef SUPPORT_PCRE8
1661 if (re8) {
1662 pcre_free_study(extra8);
1663 pcre_free(re8);
1664 }
1665 #endif
1666 #ifdef SUPPORT_PCRE16
1667 if (re16) {
1668 pcre16_free_study(extra16);
1669 pcre16_free(re16);
1670 }
1671 #endif
1672 #ifdef SUPPORT_PCRE32
1673 if (re32) {
1674 pcre32_free_study(extra32);
1675 pcre32_free(re32);
1676 }
1677 #endif
1678
1679 if (is_successful) {
1680 successful++;
1681 successful_row++;
1682 printf(".");
1683 if (successful_row >= 60) {
1684 successful_row = 0;
1685 printf("\n");
1686 }
1687 } else
1688 successful_row = 0;
1689
1690 fflush(stdout);
1691 current++;
1692 }
1693 tables(1);
1694 #ifdef SUPPORT_PCRE8
1695 setstack8(NULL);
1696 #endif
1697 #ifdef SUPPORT_PCRE16
1698 setstack16(NULL);
1699 #endif
1700 #ifdef SUPPORT_PCRE32
1701 setstack32(NULL);
1702 #endif
1703
1704 if (total == successful) {
1705 printf("\nAll JIT regression tests are successfully passed.\n");
1706 return 0;
1707 } else {
1708 printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
1709 return 1;
1710 }
1711 }
1712
1713 /* End of pcre_jit_test.c */

  ViewVC Help
Powered by ViewVC 1.1.5