/[pcre]/code/trunk/pcre_jit_test.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_test.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1282 - (show annotations)
Fri Mar 15 08:01:41 2013 UTC (6 years, 8 months ago) by zherczeg
File MIME type: text/plain
File size: 67825 byte(s)
Fix a crash and an invalid return value in JIT when *THEN verb is used.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Main Library written by Philip Hazel
9 Copyright (c) 1997-2012 University of Cambridge
10
11 This JIT compiler regression test program was written by Zoltan Herczeg
12 Copyright (c) 2010-2012
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include <stdio.h>
48 #include <string.h>
49 #include "pcre.h"
50
51
52 #include "pcre_internal.h"
53
54 #define PCRE_BUG 0x80000000
55
56 /*
57 Letter characters:
58 \xe6\x92\xad = 0x64ad = 25773 (kanji)
59 Non-letter characters:
60 \xc2\xa1 = 0xa1 = (Inverted Exclamation Mark)
61 \xf3\xa9\xb7\x80 = 0xe9dc0 = 957888
62 \xed\xa0\x80 = 55296 = 0xd800 (Invalid UTF character)
63 \xed\xb0\x80 = 56320 = 0xdc00 (Invalid UTF character)
64 Newlines:
65 \xc2\x85 = 0x85 = 133 (NExt Line = NEL)
66 \xe2\x80\xa8 = 0x2028 = 8232 (Line Separator)
67 Othercase pairs:
68 \xc3\xa9 = 0xe9 = 233 (e')
69 \xc3\x89 = 0xc9 = 201 (E')
70 \xc3\xa1 = 0xe1 = 225 (a')
71 \xc3\x81 = 0xc1 = 193 (A')
72 \xc8\xba = 0x23a = 570
73 \xe2\xb1\xa5 = 0x2c65 = 11365
74 \xe1\xbd\xb8 = 0x1f78 = 8056
75 \xe1\xbf\xb8 = 0x1ff8 = 8184
76 \xf0\x90\x90\x80 = 0x10400 = 66560
77 \xf0\x90\x90\xa8 = 0x10428 = 66600
78 Mark property:
79 \xcc\x8d = 0x30d = 781
80 Special:
81 \xdf\xbf = 0x7ff = 2047 (highest 2 byte character)
82 \xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character)
83 \xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character)
84 \xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character)
85 \xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character)
86 */
87
88 static int regression_tests(void);
89
90 int main(void)
91 {
92 int jit = 0;
93 #if defined SUPPORT_PCRE8
94 pcre_config(PCRE_CONFIG_JIT, &jit);
95 #elif defined SUPPORT_PCRE16
96 pcre16_config(PCRE_CONFIG_JIT, &jit);
97 #elif defined SUPPORT_PCRE32
98 pcre32_config(PCRE_CONFIG_JIT, &jit);
99 #endif
100 if (!jit) {
101 printf("JIT must be enabled to run pcre_jit_test\n");
102 return 1;
103 }
104 return regression_tests();
105 }
106
107 /* --------------------------------------------------------------------------------------- */
108
109 #if !(defined SUPPORT_PCRE8) && !(defined SUPPORT_PCRE16) && !(defined SUPPORT_PCRE32)
110 #error SUPPORT_PCRE8 or SUPPORT_PCRE16 or SUPPORT_PCRE32 must be defined
111 #endif
112
113 #define MUA (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
114 #define MUAP (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
115 #define CMUA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
116 #define CMUAP (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
117 #define MA (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
118 #define MAP (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
119 #define CMA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
120
121 #define OFFSET_MASK 0x00ffff
122 #define F_NO8 0x010000
123 #define F_NO16 0x020000
124 #define F_NO32 0x020000
125 #define F_NOMATCH 0x040000
126 #define F_DIFF 0x080000
127 #define F_FORCECONV 0x100000
128 #define F_PROPERTY 0x200000
129 #define F_STUDY 0x400000
130
131 struct regression_test_case {
132 int flags;
133 int start_offset;
134 const char *pattern;
135 const char *input;
136 };
137
138 static struct regression_test_case regression_test_cases[] = {
139 /* Constant strings. */
140 { MUA, 0, "AbC", "AbAbC" },
141 { MUA, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
142 { CMUA, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
143 { MA, 0, "[^a]", "aAbB" },
144 { CMA, 0, "[^m]", "mMnN" },
145 { MA, 0, "a[^b][^#]", "abacd" },
146 { CMA, 0, "A[^B][^E]", "abacd" },
147 { CMUA, 0, "[^x][^#]", "XxBll" },
148 { MUA, 0, "[^a]", "aaa\xc3\xa1#Ab" },
149 { CMUA, 0, "[^A]", "aA\xe6\x92\xad" },
150 { MUA, 0, "\\W(\\W)?\\w", "\r\n+bc" },
151 { MUA, 0, "\\W(\\W)?\\w", "\n\r+bc" },
152 { MUA, 0, "\\W(\\W)?\\w", "\r\r+bc" },
153 { MUA, 0, "\\W(\\W)?\\w", "\n\n+bc" },
154 { MUA, 0, "[axd]", "sAXd" },
155 { CMUA, 0, "[axd]", "sAXd" },
156 { CMUA, 0 | F_NOMATCH, "[^axd]", "DxA" },
157 { MUA, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
158 { MUA, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
159 { CMUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
160 { MUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
161 { MUA, 0, "[^a]", "\xc2\x80[]" },
162 { CMUA, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
163 { CMA, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
164 { PCRE_CASELESS, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
165 { PCRE_CASELESS, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
166 { PCRE_CASELESS, 0, "a1", "Aa1" },
167 { MA, 0, "\\Ca", "cda" },
168 { CMA, 0, "\\Ca", "CDA" },
169 { MA, 0 | F_NOMATCH, "\\Cx", "cda" },
170 { CMA, 0 | F_NOMATCH, "\\Cx", "CDA" },
171 { CMUAP, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
172 { CMUAP, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
173 { CMUAP, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
174 { CMUAP, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
175
176 /* Assertions. */
177 { MUA, 0, "\\b[^A]", "A_B#" },
178 { MA, 0 | F_NOMATCH, "\\b\\W", "\n*" },
179 { MUA, 0, "\\B[^,]\\b[^s]\\b", "#X" },
180 { MAP, 0, "\\B", "_\xa1" },
181 { MAP, 0, "\\b_\\b[,A]\\B", "_," },
182 { MUAP, 0, "\\b", "\xe6\x92\xad!" },
183 { MUAP, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
184 { MUAP, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
185 { MUAP, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
186 { MUA, 0 | F_NOMATCH, "\\b.", "\xcd\xbe" },
187 { CMUAP, 0, "\\By", "\xf0\x90\x90\xa8y" },
188 { MA, 0 | F_NOMATCH, "\\R^", "\n" },
189 { MA, 1 | F_NOMATCH, "^", "\n" },
190 { 0, 0, "^ab", "ab" },
191 { 0, 0 | F_NOMATCH, "^ab", "aab" },
192 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "^a", "\r\raa\n\naa\r\naa" },
193 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
194 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^-", "a--b--\x85--" },
195 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xe2\x80\xa8--" },
196 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xc2\x85--" },
197 { 0, 0, "ab$", "ab" },
198 { 0, 0 | F_NOMATCH, "ab$", "abab\n\n" },
199 { PCRE_DOLLAR_ENDONLY, 0 | F_NOMATCH, "ab$", "abab\r\n" },
200 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "a$", "\r\raa\n\naa\r\naa" },
201 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aaa" },
202 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "#$", "#\xc2\x85###\r#" },
203 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "#$", "#\xe2\x80\xa9" },
204 { PCRE_NOTBOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "^a", "aa\naa" },
205 { PCRE_NOTBOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },
206 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\naa" },
207 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\r\n" },
208 { PCRE_UTF8 | PCRE_DOLLAR_ENDONLY | PCRE_NEWLINE_ANY, 0 | F_PROPERTY, "\\p{Any}{2,}$", "aa\r\n" },
209 { PCRE_NOTEOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },
210 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa" },
211 { PCRE_NEWLINE_CR | PCRE_UTF8, 0, "a\\Z", "aaa\r" },
212 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa\n" },
213 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r" },
214 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
215 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r\n" },
216 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
217 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
218 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
219 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
220 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
221 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
222 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
223 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
224 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
225 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xc2\x85" },
226 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
227 { MA, 0, "\\Aa", "aaa" },
228 { MA, 1 | F_NOMATCH, "\\Aa", "aaa" },
229 { MA, 1, "\\Ga", "aaa" },
230 { MA, 1 | F_NOMATCH, "\\Ga", "aba" },
231 { MA, 0, "a\\z", "aaa" },
232 { MA, 0 | F_NOMATCH, "a\\z", "aab" },
233
234 /* Brackets. */
235 { MUA, 0, "(ab|bb|cd)", "bacde" },
236 { MUA, 0, "(?:ab|a)(bc|c)", "ababc" },
237 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
238 { CMUA, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
239 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
240 { MUA, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
241
242 /* Greedy and non-greedy ? operators. */
243 { MUA, 0, "(?:a)?a", "laab" },
244 { CMUA, 0, "(A)?A", "llaab" },
245 { MUA, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
246 { MUA, 0, "(a)?a", "manm" },
247 { CMUA, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
248 { MUA, 0, "(a|b)?\?d((?:e)?)", "abcde" },
249 { MUA, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
250
251 /* Greedy and non-greedy + operators */
252 { MUA, 0, "(aa)+aa", "aaaaaaa" },
253 { MUA, 0, "(aa)+?aa", "aaaaaaa" },
254 { MUA, 0, "(?:aba|ab|a)+l", "ababamababal" },
255 { MUA, 0, "(?:aba|ab|a)+?l", "ababamababal" },
256 { MUA, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
257 { MUA, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
258 { MUA, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
259
260 /* Greedy and non-greedy * operators */
261 { CMUA, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
262 { MUA, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
263 { MUA, 0, "(aa|ab)*ab", "aaabaaab" },
264 { CMUA, 0, "(aa|Ab)*?aB", "aaabaaab" },
265 { MUA, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
266 { MUA, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
267 { MA, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
268 { MA, 0, "((?:a|)*){0}a", "a" },
269
270 /* Combining ? + * operators */
271 { MUA, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
272 { MUA, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
273 { MUA, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
274 { MUA, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
275 { MUA, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
276
277 /* Single character iterators. */
278 { MUA, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
279 { MUA, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
280 { MUA, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
281 { MUA, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
282 { MUA, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
283 { MUA, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
284 { MUA, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
285 { MUA, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
286 { MUA, 0, "(ba{2})+c", "baabaaabacbaabaac" },
287 { MUA, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
288 { MUA, 0, "(a?+[^b])+", "babaacacb" },
289 { MUA, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
290 { CMUA, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
291 { CMUA, 0, "[c-f]+k", "DemmFke" },
292 { MUA, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
293 { MUA, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
294 { CMUA, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
295 { CMUA, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
296 { CMUA, 0, "[ace]{3,}", "AcbDAcEEcEd" },
297 { CMUA, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
298 { MUA, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
299 { CMUA, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
300 { MUA, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
301 { MUA, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
302 { MUA, 0, "\\b\\w+\\B", "x,a_cd" },
303 { MUAP, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
304 { CMUA, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
305 { CMUAP, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" },
306 { CMUA, 0, "[^\xf0\x90\x90\x80]{2,4}@", "\xf0\x90\x90\xa8\xf0\x90\x90\x80###\xf0\x90\x90\x80@@@" },
307 { CMUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
308 { MUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
309 { MUA, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" },
310
311 /* Basic character sets. */
312 { MUA, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
313 { MUA, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
314 { MUA, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
315 { MUA, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
316 { MUA, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
317 { MUA, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
318
319 /* Unicode properties. */
320 { MUAP, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
321 { MUAP, 0 | F_PROPERTY, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
322 { MUAP, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
323 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}]", "abc" },
324 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}]", "abc" },
325 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
326 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
327 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
328 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
329 { MUAP, 0 | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
330 { MUAP, 0 | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
331 { MUAP, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
332 { CMUAP, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
333 { MUAP, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
334 { MUAP, 0 | F_PROPERTY, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
335 { MUA, 0 | F_PROPERTY, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
336 { CMUAP, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
337 { MUAP, 0 | F_PROPERTY, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
338 { MUAP, 0 | F_PROPERTY, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
339 { PCRE_UCP, 0, "[a-b\\s]{2,5}[^a]", "AB baaa" },
340
341 /* Possible empty brackets. */
342 { MUA, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
343 { MUA, 0, "(|ab||bc|a)+d", "abcxabcabd" },
344 { MUA, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
345 { MUA, 0, "(|ab||bc|a)*d", "abcxabcabd" },
346 { MUA, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
347 { MUA, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
348 { MUA, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
349 { MUA, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
350 { MUA, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
351 { MUA, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
352
353 /* Start offset. */
354 { MUA, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
355 { MUA, 4 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
356 { MUA, 2 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
357 { MUA, 1, "(\\w\\W\\w)+", "ab#d" },
358
359 /* Newline. */
360 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
361 { PCRE_MULTILINE | PCRE_NEWLINE_CR, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
362 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{1,3}[^#]", "\r\n##...." },
363
364 /* Any character except newline or any newline. */
365 { PCRE_NEWLINE_CRLF, 0, ".", "\r" },
366 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
367 { PCRE_NEWLINE_ANYCRLF, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
368 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
369 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
370 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0 | F_NOMATCH, ".(.).", "\xe2\x80\xa8\nb\r" },
371 { PCRE_NEWLINE_ANY, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
372 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
373 { PCRE_BSR_ANYCRLF, 0, "\\R", "\r" },
374 { PCRE_BSR_ANYCRLF, 0, "\\R", "\x85#\r\n#" },
375 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\xe2\x80\xa8#c" },
376 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\r\nc" },
377 { PCRE_NEWLINE_CRLF | PCRE_BSR_UNICODE | PCRE_UTF8, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
378 { MUA, 0 | F_NOMATCH, "\\R+", "ab" },
379 { MUA, 0, "\\R+", "ab\r\n\r" },
380 { MUA, 0, "\\R*", "ab\r\n\r" },
381 { MUA, 0, "\\R*", "\r\n\r" },
382 { MUA, 0, "\\R{2,4}", "\r\nab\r\r" },
383 { MUA, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
384 { MUA, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
385 { MUA, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
386 { MUA, 0 | F_NOMATCH, "\\R+\\R\\R", "\r\n\r\n" },
387 { MUA, 0, "\\R+\\R\\R", "\r\r\r" },
388 { MUA, 0, "\\R*\\R\\R", "\n\r" },
389 { MUA, 0 | F_NOMATCH, "\\R{2,4}\\R\\R", "\r\r\r" },
390 { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
391
392 /* Atomic groups (no fallback from "next" direction). */
393 { MUA, 0 | F_NOMATCH, "(?>ab)ab", "bab" },
394 { MUA, 0 | F_NOMATCH, "(?>(ab))ab", "bab" },
395 { MUA, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
396 "bababcdedefgheijijklmlmnop" },
397 { MUA, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
398 { MUA, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
399 { MUA, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
400 { MUA, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
401 { MUA, 0, "((?>a|)+?)b", "aaacaaab" },
402 { MUA, 0, "(?>x|)*$", "aaa" },
403 { MUA, 0, "(?>(x)|)*$", "aaa" },
404 { MUA, 0, "(?>x|())*$", "aaa" },
405 { MUA, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
406 { MUA, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
407 { MUA, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
408 { MUA, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
409 { MUA, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
410 { MUA, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
411 { MUA, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
412 { MUA, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
413 { MUA, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
414 { MUA, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
415 { MUA, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
416 { MUA, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
417 { MUA, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
418 { MUA, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
419 { CMA, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
420 { MUA, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
421 { MUA, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
422 { MUA, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d" },
423 { MUA, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
424 { MUA, 0 | F_PROPERTY, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
425 { MUA, 0 | F_PROPERTY, "\\X{2,4}", "abcdef" },
426 { MUA, 0 | F_PROPERTY, "\\X{2,4}?", "abcdef" },
427 { MUA, 0 | F_NOMATCH | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d##" },
428 { MUA, 0 | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
429 { MUA, 0, "(c(ab)?+ab)+", "cabcababcab" },
430 { MUA, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
431
432 /* Possessive quantifiers. */
433 { MUA, 0, "(?:a|b)++m", "mababbaaxababbaam" },
434 { MUA, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
435 { MUA, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
436 { MUA, 0, "(a|b)++m", "mababbaaxababbaam" },
437 { MUA, 0, "(a|b)*+m", "mababbaaxababbaam" },
438 { MUA, 0, "(a|b)*+m", "ababbaaxababbaam" },
439 { MUA, 0, "(a|b(*ACCEPT))++m", "maaxab" },
440 { MUA, 0, "(?:b*)++m", "bxbbxbbbxm" },
441 { MUA, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
442 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxm" },
443 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
444 { MUA, 0, "(b*)++m", "bxbbxbbbxm" },
445 { MUA, 0, "(b*)++m", "bxbbxbbbxbbm" },
446 { MUA, 0, "(b*)*+m", "bxbbxbbbxm" },
447 { MUA, 0, "(b*)*+m", "bxbbxbbbxbbm" },
448 { MUA, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
449 { MUA, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
450 { MUA, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
451 { MUA, 0, "(a|(b))++m", "mababbaaxababbaam" },
452 { MUA, 0, "((a)|b)*+m", "mababbaaxababbaam" },
453 { MUA, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
454 { MUA, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
455 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxm" },
456 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
457 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
458 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
459 { MUA, 0, "((b*))++m", "bxbbxbbbxm" },
460 { MUA, 0, "((b*))++m", "bxbbxbbbxbbm" },
461 { MUA, 0, "((b*))*+m", "bxbbxbbbxm" },
462 { MUA, 0, "((b*))*+m", "bxbbxbbbxbbm" },
463 { MUA, 0 | F_NOMATCH, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
464 { MUA, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
465 { MUA, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
466 { MUA, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
467 { MUA, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
468
469 /* Back references. */
470 { MUA, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
471 { CMUA, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
472 { CMA, 0, "(a{2,4})\\1", "AaAaaAaA" },
473 { MUA, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
474 { MUA, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
475 { MUA, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
476 { MUA, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
477 { MUA, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
478 { MUA, 0, "(?:(aa)|b)\\1?b", "bb" },
479 { CMUA, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
480 { MUA, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
481 { CMUA, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
482 { MUA, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
483 { CMA, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
484 { MUA, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
485 { MUA, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
486 { MA, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
487 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
488 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{0,2}", "wwwww." },
489 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwww" },
490 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwwww" },
491 { PCRE_UCP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
492 { CMUAP, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" },
493
494 /* Assertions. */
495 { MUA, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
496 { MUA, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
497 { MUA, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
498 { MUA, 0, "(?<=aaa|aa|a)a", "aaa" },
499 { MUA, 2, "(?<=aaa|aa|a)a", "aaa" },
500 { MA, 0, "(?<=aaa|aa|a)a", "aaa" },
501 { MA, 2, "(?<=aaa|aa|a)a", "aaa" },
502 { MUA, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
503 { MUA, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
504 { MUA, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
505 { MUA, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
506 { MUA, 0, "((?(?=(a))a)+k)", "bbak" },
507 { MUA, 0, "((?(?=a)a)+k)", "bbak" },
508 { MUA, 0 | F_NOMATCH, "(?=(?>(a))m)amk", "a k" },
509 { MUA, 0 | F_NOMATCH, "(?!(?>(a))m)amk", "a k" },
510 { MUA, 0 | F_NOMATCH, "(?>(?=(a))am)amk", "a k" },
511 { MUA, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
512 { MUA, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
513 { MUA, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
514 { MUA, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
515 { MUA, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
516 { MUA, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
517 { MUA, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
518 { MUA, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
519
520 /* Not empty, ACCEPT, FAIL */
521 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*", "bcx" },
522 { MUA | PCRE_NOTEMPTY, 0, "a*", "bcaad" },
523 { MUA | PCRE_NOTEMPTY, 0, "a*?", "bcaad" },
524 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
525 { MUA, 0, "a(*ACCEPT)b", "ab" },
526 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*(*ACCEPT)b", "bcx" },
527 { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
528 { MUA | PCRE_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
529 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "(?:z|a*(*ACCEPT)b)", "bcx" },
530 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
531 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
532 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
533 { MUA | PCRE_NOTEMPTY_ATSTART, 0 | F_NOMATCH, "a*(*ACCEPT)b", "" },
534 { MUA, 0, "((a(*ACCEPT)b))", "ab" },
535 { MUA, 0, "(a(*FAIL)a|a)", "aaa" },
536 { MUA, 0, "(?=ab(*ACCEPT)b)a", "ab" },
537 { MUA, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
538 { MUA, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
539 { MUA | PCRE_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
540
541 /* Conditional blocks. */
542 { MUA, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
543 { MUA, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
544 { MUA, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
545 { MUA, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
546 { MUA, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
547 { MUA, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
548 { MUA, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
549 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
550 { MUA, 0 | F_DIFF, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
551 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
552 { MUA, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
553 { MUA, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
554 { MUA, 0, "(?(?=a)ab)", "a" },
555 { MUA, 0, "(?(?<!b)c)", "b" },
556 { MUA, 0, "(?(DEFINE)a(b))", "a" },
557 { MUA, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
558 { MUA, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
559 { MUA, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
560 { MUA, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
561 { MUA, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
562 { MUA, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
563 { MUA, 0, "(c)?\?(?(1)a|b)", "cbb" },
564 { MUA, 0 | F_DIFF, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
565 { MUA, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
566 { MUA, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
567 { MUA, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
568 { MUA, 0 | F_DIFF, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
569 { MUA, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
570 { MUA, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
571 { MUA, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
572 { MUA, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
573 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)*l", "bc ddd abccabccl" },
574 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+?dd", "bcabcacdb bdddd" },
575 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+l", "ababccddabdbccd abcccl" },
576 { MUA, 0, "((?:a|aa)(?(1)aaa))x", "aax" },
577
578 /* Set start of match. */
579 { MUA, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
580 { MUA, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
581 { MUA, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
582 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a\\K(*ACCEPT)b", "aa" },
583 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
584
585 /* First line. */
586 { MUA | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}a", "bb\naaa" },
587 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}a", "bb\r\naaa" },
588 { MUA | PCRE_FIRSTLINE, 0, "(?<=a)", "a" },
589 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[^a][^b]", "ab" },
590 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "a", "\na" },
591 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[abc]", "\na" },
592 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^a", "\na" },
593 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^(?<=\n)", "\na" },
594 { MUA | PCRE_FIRSTLINE, 0, "\xf0\x90\x90\x80", "\xf0\x90\x90\x80" },
595 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\xc2\x85#" },
596 { PCRE_MULTILINE | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\x85#" },
597 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^#", "\xe2\x80\xa8#" },
598 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}", "\r\na" },
599 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, ".", "\r" },
600 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "a", "\ra" },
601 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH, "ba", "bbb\r\nba" },
602 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}{4}|a", "\r\na" },
603 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 1, ".", "\r\n" },
604 { PCRE_FIRSTLINE | PCRE_NEWLINE_LF | PCRE_DOTALL, 0 | F_NOMATCH, "ab.", "ab" },
605
606 /* Recurse. */
607 { MUA, 0, "(a)(?1)", "aa" },
608 { MUA, 0, "((a))(?1)", "aa" },
609 { MUA, 0, "(b|a)(?1)", "aa" },
610 { MUA, 0, "(b|(a))(?1)", "aa" },
611 { MUA, 0 | F_NOMATCH, "((a)(b)(?:a*))(?1)", "aba" },
612 { MUA, 0, "((a)(b)(?:a*))(?1)", "abab" },
613 { MUA, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
614 { MUA, 0, "((?2)b|(a)){2}(?1)", "aabab" },
615 { MUA, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
616 { MUA, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
617 { MUA, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
618 { MUA, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
619 { MUA, 0 | F_NOMATCH, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
620 { MUA, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
621 { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
622 { MUA, 0 | F_NOMATCH, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
623 { MUA, 0, "b|<(?R)*>", "<<b>" },
624 { MUA, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
625 { MUA, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
626 { MUA, 0, "(a)((?(R)a|b))(?2)", "aabbabaa" },
627 { MUA, 0, "(a)((?(R2)a|b))(?2)", "aabbabaa" },
628 { MUA, 0, "(a)((?(R1)a|b))(?2)", "ababba" },
629 { MUA, 0, "(?(R0)aa|bb(?R))", "abba aabb bbaa" },
630 { MUA, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" },
631 { MUA, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" },
632
633 /* 16 bit specific tests. */
634 { CMA, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" },
635 { CMA, 0 | F_FORCECONV, "\xe1\xbd\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
636 { CMA, 0 | F_FORCECONV, "[\xc3\xa1]", "\xc3\x81\xc3\xa1" },
637 { CMA, 0 | F_FORCECONV, "[\xe1\xbd\xb8]", "\xe1\xbf\xb8\xe1\xbd\xb8" },
638 { CMA, 0 | F_FORCECONV, "[a-\xed\xb0\x80]", "A" },
639 { CMA, 0 | F_NO8 | F_FORCECONV, "[a-\\x{dc00}]", "B" },
640 { CMA, 0 | F_NO8 | F_NOMATCH | F_FORCECONV, "[b-\\x{dc00}]", "a" },
641 { CMA, 0 | F_NO8 | F_FORCECONV, "\xed\xa0\x80\\x{d800}\xed\xb0\x80\\x{dc00}", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80" },
642 { CMA, 0 | F_NO8 | F_FORCECONV, "[\xed\xa0\x80\\x{d800}]{1,2}?[\xed\xb0\x80\\x{dc00}]{1,2}?#", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80#" },
643 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80\xed\xb0\x80#]{0,3}(?<=\xed\xb0\x80.)", "\xed\xa0\x80#\xed\xa0\x80##\xed\xb0\x80\xed\xa0\x80" },
644 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\x9f\xbf\xed\xa0\x83" },
645 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\xb4\x80\xed\xb3\xb0" },
646 { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" },
647 { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" },
648 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" },
649 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80][\xed\xb0\x80]{2,}", "\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" },
650 { MA, 0 | F_FORCECONV, "[^\xed\xb0\x80]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
651 { MA, 0 | F_NO8 | F_FORCECONV, "[^\\x{dc00}]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
652 { CMA, 0 | F_FORCECONV, ".\\B.", "\xed\xa0\x80\xed\xb0\x80" },
653 { CMA, 0 | F_FORCECONV, "\\D+(?:\\d+|.)\\S+(?:\\s+|.)\\W+(?:\\w+|.)\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80" },
654 { CMA, 0 | F_FORCECONV, "\\d*\\s*\\w*\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80" },
655 { CMA, 0 | F_FORCECONV | F_NOMATCH, "\\d*?\\D*?\\s*?\\S*?\\w*?\\W*?##", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80#" },
656 { CMA | PCRE_EXTENDED, 0 | F_FORCECONV, "\xed\xa0\x80 \xed\xb0\x80 !", "\xed\xa0\x80\xed\xb0\x80!" },
657 { CMA, 0 | F_FORCECONV, "\xed\xa0\x80+#[^#]+\xed\xa0\x80", "\xed\xa0\x80#a\xed\xa0\x80" },
658 { CMA, 0 | F_FORCECONV, "(\xed\xa0\x80+)#\\1", "\xed\xa0\x80\xed\xa0\x80#\xed\xa0\x80\xed\xa0\x80" },
659 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0 | F_NO8 | F_FORCECONV, "^-", "a--\xe2\x80\xa8--" },
660 { PCRE_BSR_UNICODE, 0 | F_NO8 | F_FORCECONV, "\\R", "ab\xe2\x80\xa8" },
661 { 0, 0 | F_NO8 | F_FORCECONV, "\\v", "ab\xe2\x80\xa9" },
662 { 0, 0 | F_NO8 | F_FORCECONV, "\\h", "ab\xe1\xa0\x8e" },
663 { 0, 0 | F_NO8 | F_FORCECONV, "\\v+?\\V+?#", "\xe2\x80\xa9\xe2\x80\xa9\xef\xbf\xbf\xef\xbf\xbf#" },
664 { 0, 0 | F_NO8 | F_FORCECONV, "\\h+?\\H+?#", "\xe1\xa0\x8e\xe1\xa0\x8e\xef\xbf\xbf\xef\xbf\xbf#" },
665
666 /* Partial matching. */
667 { MUA | PCRE_PARTIAL_SOFT, 0, "ab", "a" },
668 { MUA | PCRE_PARTIAL_SOFT, 0, "ab|a", "a" },
669 { MUA | PCRE_PARTIAL_HARD, 0, "ab|a", "a" },
670 { MUA | PCRE_PARTIAL_SOFT, 0, "\\b#", "a" },
671 { MUA | PCRE_PARTIAL_SOFT, 0, "(?<=a)b", "a" },
672 { MUA | PCRE_PARTIAL_SOFT, 0, "abc|(?<=xxa)bc", "xxab" },
673 { MUA | PCRE_PARTIAL_SOFT, 0, "a\\B", "a" },
674 { MUA | PCRE_PARTIAL_HARD, 0, "a\\b", "a" },
675
676 /* (*MARK) verb. */
677 { MUA, 0, "a(*MARK:aa)a", "ababaa" },
678 { MUA, 0 | F_NOMATCH, "a(*:aa)a", "abab" },
679 { MUA, 0, "a(*:aa)(b(*:bb)b|bc)", "abc" },
680 { MUA, 0 | F_NOMATCH, "a(*:1)x|b(*:2)y", "abc" },
681 { MUA, 0, "(?>a(*:aa))b|ac", "ac" },
682 { MUA, 0, "(?(DEFINE)(a(*:aa)))(?1)", "a" },
683 { MUA, 0 | F_NOMATCH, "(?(DEFINE)((a)(*:aa)))(?1)b", "aa" },
684 { MUA, 0, "(?(DEFINE)(a(*:aa)))a(?1)b|aac", "aac" },
685 { MUA, 0, "(a(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
686 { MUA, 0, "(a(*:aa)){0}(?:b(?1)b)+", "babba" },
687 { MUA, 0 | F_NOMATCH | F_STUDY, "(a(*:aa)){0}(?:b(?1)b)+", "ba" },
688 { MUA, 0, "(a\\K(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
689 { MUA, 0, "(a\\K(*:aa)){0}(?:b(?1)b)+", "babba" },
690 { MUA, 0 | F_NOMATCH | F_STUDY, "(a\\K(*:aa)){0}(?:b(?1)b)+", "ba" },
691 { MUA, 0 | F_NOMATCH | F_STUDY, "(*:mark)m", "a" },
692
693 /* (*COMMIT) verb. */
694 { MUA, 0 | F_NOMATCH, "a(*COMMIT)b", "ac" },
695 { MUA, 0, "aa(*COMMIT)b", "xaxaab" },
696 { MUA, 0 | F_NOMATCH, "a(*COMMIT)(*:msg)b|ac", "ac" },
697 { MUA, 0, "(?=a(*COMMIT)b|ac)ac|(*:m)(a)c", "ac" },
698 { MUA, 0, "(?!a(*COMMIT)(*:msg)b)a(c)|cd", "acd" },
699 { MUA, 0, "(?=(a)(*COMMIT)b)|ac", "ac" },
700 { MUA, 0, "(?=(a)+(*COMMIT)b)|ac", "ac" },
701 { MUA, 0 | F_NOMATCH, "(a(*COMMIT)b)++", "abac" },
702 { MUA, 0 | F_NOMATCH, "((a)(*COMMIT)b)++", "abac" },
703
704 /* (*PRUNE) verb. */
705 { MUA, 0, "aa\\K(*PRUNE)b", "aaab" },
706 { MUA, 0, "aa(*PRUNE:bb)b|a", "aa" },
707 { MUA, 0, "(a)(a)(*PRUNE)b|(a)", "aa" },
708 { MUA, 0, "(a)(a)(a)(a)(a)(a)(a)(a)(*PRUNE)b|(a)", "aaaaaaaa" },
709 { MUA | PCRE_PARTIAL_SOFT, 0, "a(*PRUNE)a|", "a" },
710 { MUA | PCRE_PARTIAL_SOFT, 0, "a(*PRUNE)a|m", "a" },
711 { MUA, 0 | F_NOMATCH, "a(*COMMIT)(*PRUNE)d|bc", "abc" },
712 { MUA, 0, "(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
713 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
714 { MUA, 0, "(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
715 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
716 { MUA, 0, "(a(*COMMIT)b){0}a(?1)(*PRUNE)c|bc", "abc" },
717 { MUA, 0 | F_NOMATCH, "(a(*COMMIT)b){0}a(*COMMIT)(?1)(*PRUNE)c|bc", "abc" },
718 { MUA, 0, "(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
719 { MUA, 0 | F_NOMATCH, "(*COMMIT)(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
720 { MUA, 0, "((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
721 { MUA, 0 | F_NOMATCH, "(*COMMIT)((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
722 { MUA, 0, "(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" },
723 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" },
724 { MUA, 0, "(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" },
725 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" },
726 { MUA, 0, "(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" },
727 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" },
728 { MUA, 0, "(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" },
729 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" },
730 { MUA, 0, "(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" },
731 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" },
732 { MUA, 0, "(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" },
733 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" },
734
735 /* (*THEN) verb. */
736 { MUA, 0, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcaabcaabcaabcnacm" },
737 { MUA, 0 | F_NOMATCH, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcm" },
738 { MUA, 0, "((?:a(*THEN)|aab)c|a+)+m", "aabcaabcnmaabcaabcm" },
739 { MUA, 0, "((?:a|aab)(*THEN)c|a+)+m", "aam" },
740 { MUA, 0 | F_NOMATCH, "((?:a(*COMMIT)|aab)(*THEN)c|a+)+m", "aam" },
741
742 /* Deep recursion. */
743 { MUA, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
744 { MUA, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
745 { MUA, 0, "((a?)+)+b", "aaaaaaaaaaaa b" },
746
747 /* Deep recursion: Stack limit reached. */
748 { MA, 0 | F_NOMATCH, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
749 { MA, 0 | F_NOMATCH, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
750 { MA, 0 | F_NOMATCH, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
751 { MA, 0 | F_NOMATCH, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
752 { MA, 0 | F_NOMATCH, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
753
754 { 0, 0, NULL, NULL }
755 };
756
757 static const unsigned char *tables(int mode)
758 {
759 /* The purpose of this function to allow valgrind
760 for reporting invalid reads and writes. */
761 static unsigned char *tables_copy;
762 const char *errorptr;
763 int erroroffset;
764 unsigned char *default_tables;
765 #if defined SUPPORT_PCRE8
766 pcre *regex;
767 char null_str[1] = { 0 };
768 #elif defined SUPPORT_PCRE16
769 pcre16 *regex;
770 PCRE_UCHAR16 null_str[1] = { 0 };
771 #elif defined SUPPORT_PCRE32
772 pcre32 *regex;
773 PCRE_UCHAR32 null_str[1] = { 0 };
774 #endif
775
776 if (mode) {
777 if (tables_copy)
778 free(tables_copy);
779 tables_copy = NULL;
780 return NULL;
781 }
782
783 if (tables_copy)
784 return tables_copy;
785
786 default_tables = NULL;
787 #if defined SUPPORT_PCRE8
788 regex = pcre_compile(null_str, 0, &errorptr, &erroroffset, NULL);
789 if (regex) {
790 pcre_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
791 pcre_free(regex);
792 }
793 #elif defined SUPPORT_PCRE16
794 regex = pcre16_compile(null_str, 0, &errorptr, &erroroffset, NULL);
795 if (regex) {
796 pcre16_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
797 pcre16_free(regex);
798 }
799 #elif defined SUPPORT_PCRE32
800 regex = pcre32_compile(null_str, 0, &errorptr, &erroroffset, NULL);
801 if (regex) {
802 pcre32_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
803 pcre32_free(regex);
804 }
805 #endif
806 /* Shouldn't ever happen. */
807 if (!default_tables)
808 return NULL;
809
810 /* Unfortunately this value cannot get from pcre_fullinfo.
811 Since this is a test program, this is acceptable at the moment. */
812 tables_copy = (unsigned char *)malloc(1088);
813 if (!tables_copy)
814 return NULL;
815
816 memcpy(tables_copy, default_tables, 1088);
817 return tables_copy;
818 }
819
820 #ifdef SUPPORT_PCRE8
821 static pcre_jit_stack* callback8(void *arg)
822 {
823 return (pcre_jit_stack *)arg;
824 }
825 #endif
826
827 #ifdef SUPPORT_PCRE16
828 static pcre16_jit_stack* callback16(void *arg)
829 {
830 return (pcre16_jit_stack *)arg;
831 }
832 #endif
833
834 #ifdef SUPPORT_PCRE32
835 static pcre32_jit_stack* callback32(void *arg)
836 {
837 return (pcre32_jit_stack *)arg;
838 }
839 #endif
840
841 #ifdef SUPPORT_PCRE8
842 static pcre_jit_stack *stack8;
843
844 static pcre_jit_stack *getstack8(void)
845 {
846 if (!stack8)
847 stack8 = pcre_jit_stack_alloc(1, 1024 * 1024);
848 return stack8;
849 }
850
851 static void setstack8(pcre_extra *extra)
852 {
853 if (!extra) {
854 if (stack8)
855 pcre_jit_stack_free(stack8);
856 stack8 = NULL;
857 return;
858 }
859
860 pcre_assign_jit_stack(extra, callback8, getstack8());
861 }
862 #endif /* SUPPORT_PCRE8 */
863
864 #ifdef SUPPORT_PCRE16
865 static pcre16_jit_stack *stack16;
866
867 static pcre16_jit_stack *getstack16(void)
868 {
869 if (!stack16)
870 stack16 = pcre16_jit_stack_alloc(1, 1024 * 1024);
871 return stack16;
872 }
873
874 static void setstack16(pcre16_extra *extra)
875 {
876 if (!extra) {
877 if (stack16)
878 pcre16_jit_stack_free(stack16);
879 stack16 = NULL;
880 return;
881 }
882
883 pcre16_assign_jit_stack(extra, callback16, getstack16());
884 }
885 #endif /* SUPPORT_PCRE8 */
886
887 #ifdef SUPPORT_PCRE32
888 static pcre32_jit_stack *stack32;
889
890 static pcre32_jit_stack *getstack32(void)
891 {
892 if (!stack32)
893 stack32 = pcre32_jit_stack_alloc(1, 1024 * 1024);
894 return stack32;
895 }
896
897 static void setstack32(pcre32_extra *extra)
898 {
899 if (!extra) {
900 if (stack32)
901 pcre32_jit_stack_free(stack32);
902 stack32 = NULL;
903 return;
904 }
905
906 pcre32_assign_jit_stack(extra, callback32, getstack32());
907 }
908 #endif /* SUPPORT_PCRE8 */
909
910 #ifdef SUPPORT_PCRE16
911
912 static int convert_utf8_to_utf16(const char *input, PCRE_UCHAR16 *output, int *offsetmap, int max_length)
913 {
914 unsigned char *iptr = (unsigned char*)input;
915 PCRE_UCHAR16 *optr = output;
916 unsigned int c;
917
918 if (max_length == 0)
919 return 0;
920
921 while (*iptr && max_length > 1) {
922 c = 0;
923 if (offsetmap)
924 *offsetmap++ = (int)(iptr - (unsigned char*)input);
925
926 if (!(*iptr & 0x80))
927 c = *iptr++;
928 else if (!(*iptr & 0x20)) {
929 c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
930 iptr += 2;
931 } else if (!(*iptr & 0x10)) {
932 c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
933 iptr += 3;
934 } else if (!(*iptr & 0x08)) {
935 c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
936 iptr += 4;
937 }
938
939 if (c < 65536) {
940 *optr++ = c;
941 max_length--;
942 } else if (max_length <= 2) {
943 *optr = '\0';
944 return (int)(optr - output);
945 } else {
946 c -= 0x10000;
947 *optr++ = 0xd800 | ((c >> 10) & 0x3ff);
948 *optr++ = 0xdc00 | (c & 0x3ff);
949 max_length -= 2;
950 if (offsetmap)
951 offsetmap++;
952 }
953 }
954 if (offsetmap)
955 *offsetmap = (int)(iptr - (unsigned char*)input);
956 *optr = '\0';
957 return (int)(optr - output);
958 }
959
960 static int copy_char8_to_char16(const char *input, PCRE_UCHAR16 *output, int max_length)
961 {
962 unsigned char *iptr = (unsigned char*)input;
963 PCRE_UCHAR16 *optr = output;
964
965 if (max_length == 0)
966 return 0;
967
968 while (*iptr && max_length > 1) {
969 *optr++ = *iptr++;
970 max_length--;
971 }
972 *optr = '\0';
973 return (int)(optr - output);
974 }
975
976 #define REGTEST_MAX_LENGTH16 4096
977 static PCRE_UCHAR16 regtest_buf16[REGTEST_MAX_LENGTH16];
978 static int regtest_offsetmap16[REGTEST_MAX_LENGTH16];
979
980 #endif /* SUPPORT_PCRE16 */
981
982 #ifdef SUPPORT_PCRE32
983
984 static int convert_utf8_to_utf32(const char *input, PCRE_UCHAR32 *output, int *offsetmap, int max_length)
985 {
986 unsigned char *iptr = (unsigned char*)input;
987 PCRE_UCHAR32 *optr = output;
988 unsigned int c;
989
990 if (max_length == 0)
991 return 0;
992
993 while (*iptr && max_length > 1) {
994 c = 0;
995 if (offsetmap)
996 *offsetmap++ = (int)(iptr - (unsigned char*)input);
997
998 if (!(*iptr & 0x80))
999 c = *iptr++;
1000 else if (!(*iptr & 0x20)) {
1001 c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
1002 iptr += 2;
1003 } else if (!(*iptr & 0x10)) {
1004 c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
1005 iptr += 3;
1006 } else if (!(*iptr & 0x08)) {
1007 c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
1008 iptr += 4;
1009 }
1010
1011 *optr++ = c;
1012 max_length--;
1013 }
1014 if (offsetmap)
1015 *offsetmap = (int)(iptr - (unsigned char*)input);
1016 *optr = 0;
1017 return (int)(optr - output);
1018 }
1019
1020 static int copy_char8_to_char32(const char *input, PCRE_UCHAR32 *output, int max_length)
1021 {
1022 unsigned char *iptr = (unsigned char*)input;
1023 PCRE_UCHAR32 *optr = output;
1024
1025 if (max_length == 0)
1026 return 0;
1027
1028 while (*iptr && max_length > 1) {
1029 *optr++ = *iptr++;
1030 max_length--;
1031 }
1032 *optr = '\0';
1033 return (int)(optr - output);
1034 }
1035
1036 #define REGTEST_MAX_LENGTH32 4096
1037 static PCRE_UCHAR32 regtest_buf32[REGTEST_MAX_LENGTH32];
1038 static int regtest_offsetmap32[REGTEST_MAX_LENGTH32];
1039
1040 #endif /* SUPPORT_PCRE32 */
1041
1042 static int check_ascii(const char *input)
1043 {
1044 const unsigned char *ptr = (unsigned char *)input;
1045 while (*ptr) {
1046 if (*ptr > 127)
1047 return 0;
1048 ptr++;
1049 }
1050 return 1;
1051 }
1052
1053 static int regression_tests(void)
1054 {
1055 struct regression_test_case *current = regression_test_cases;
1056 const char *error;
1057 char *cpu_info;
1058 int i, err_offs;
1059 int is_successful, is_ascii_pattern, is_ascii_input;
1060 int total = 0;
1061 int successful = 0;
1062 int successful_row = 0;
1063 int counter = 0;
1064 int study_mode;
1065 int utf = 0, ucp = 0;
1066 int disabled_flags = 0;
1067 #ifdef SUPPORT_PCRE8
1068 pcre *re8;
1069 pcre_extra *extra8;
1070 pcre_extra dummy_extra8;
1071 int ovector8_1[32];
1072 int ovector8_2[32];
1073 int return_value8[2];
1074 unsigned char *mark8_1, *mark8_2;
1075 #endif
1076 #ifdef SUPPORT_PCRE16
1077 pcre16 *re16;
1078 pcre16_extra *extra16;
1079 pcre16_extra dummy_extra16;
1080 int ovector16_1[32];
1081 int ovector16_2[32];
1082 int return_value16[2];
1083 PCRE_UCHAR16 *mark16_1, *mark16_2;
1084 int length16;
1085 #endif
1086 #ifdef SUPPORT_PCRE32
1087 pcre32 *re32;
1088 pcre32_extra *extra32;
1089 pcre32_extra dummy_extra32;
1090 int ovector32_1[32];
1091 int ovector32_2[32];
1092 int return_value32[2];
1093 PCRE_UCHAR32 *mark32_1, *mark32_2;
1094 int length32;
1095 #endif
1096
1097 /* This test compares the behaviour of interpreter and JIT. Although disabling
1098 utf or ucp may make tests fail, if the pcre_exec result is the SAME, it is
1099 still considered successful from pcre_jit_test point of view. */
1100
1101 #if defined SUPPORT_PCRE8
1102 pcre_config(PCRE_CONFIG_JITTARGET, &cpu_info);
1103 #elif defined SUPPORT_PCRE16
1104 pcre16_config(PCRE_CONFIG_JITTARGET, &cpu_info);
1105 #elif defined SUPPORT_PCRE32
1106 pcre32_config(PCRE_CONFIG_JITTARGET, &cpu_info);
1107 #endif
1108
1109 printf("Running JIT regression tests\n");
1110 printf(" target CPU of SLJIT compiler: %s\n", cpu_info);
1111
1112 #if defined SUPPORT_PCRE8
1113 pcre_config(PCRE_CONFIG_UTF8, &utf);
1114 pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
1115 #elif defined SUPPORT_PCRE16
1116 pcre16_config(PCRE_CONFIG_UTF16, &utf);
1117 pcre16_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
1118 #elif defined SUPPORT_PCRE16
1119 pcre32_config(PCRE_CONFIG_UTF32, &utf);
1120 pcre32_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
1121 #endif
1122
1123 if (!utf)
1124 disabled_flags |= PCRE_UTF8 | PCRE_UTF16 | PCRE_UTF32;
1125 if (!ucp)
1126 disabled_flags |= PCRE_UCP;
1127 #ifdef SUPPORT_PCRE8
1128 printf(" in 8 bit mode with UTF-8 %s and ucp %s:\n", utf ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
1129 #endif
1130 #ifdef SUPPORT_PCRE16
1131 printf(" in 16 bit mode with UTF-16 %s and ucp %s:\n", utf ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
1132 #endif
1133 #ifdef SUPPORT_PCRE32
1134 printf(" in 32 bit mode with UTF-32 %s and ucp %s:\n", utf ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
1135 #endif
1136
1137 while (current->pattern) {
1138 /* printf("\nPattern: %s :\n", current->pattern); */
1139 total++;
1140 if (current->start_offset & F_PROPERTY) {
1141 is_ascii_pattern = 0;
1142 is_ascii_input = 0;
1143 } else {
1144 is_ascii_pattern = check_ascii(current->pattern);
1145 is_ascii_input = check_ascii(current->input);
1146 }
1147
1148 if (current->flags & PCRE_PARTIAL_SOFT)
1149 study_mode = PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE;
1150 else if (current->flags & PCRE_PARTIAL_HARD)
1151 study_mode = PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE;
1152 else
1153 study_mode = PCRE_STUDY_JIT_COMPILE;
1154 error = NULL;
1155 #ifdef SUPPORT_PCRE8
1156 re8 = NULL;
1157 if (!(current->start_offset & F_NO8))
1158 re8 = pcre_compile(current->pattern,
1159 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags),
1160 &error, &err_offs, tables(0));
1161
1162 extra8 = NULL;
1163 if (re8) {
1164 error = NULL;
1165 extra8 = pcre_study(re8, study_mode, &error);
1166 if (!extra8) {
1167 printf("\n8 bit: Cannot study pattern: %s\n", current->pattern);
1168 pcre_free(re8);
1169 re8 = NULL;
1170 }
1171 else if (!(extra8->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
1172 printf("\n8 bit: JIT compiler does not support: %s\n", current->pattern);
1173 pcre_free_study(extra8);
1174 pcre_free(re8);
1175 re8 = NULL;
1176 }
1177 extra8->flags |= PCRE_EXTRA_MARK;
1178 } else if (((utf && ucp) || is_ascii_pattern) && !(current->start_offset & F_NO8))
1179 printf("\n8 bit: Cannot compile pattern \"%s\": %s\n", current->pattern, error);
1180 #endif
1181 #ifdef SUPPORT_PCRE16
1182 if ((current->flags & PCRE_UTF16) || (current->start_offset & F_FORCECONV))
1183 convert_utf8_to_utf16(current->pattern, regtest_buf16, NULL, REGTEST_MAX_LENGTH16);
1184 else
1185 copy_char8_to_char16(current->pattern, regtest_buf16, REGTEST_MAX_LENGTH16);
1186
1187 re16 = NULL;
1188 if (!(current->start_offset & F_NO16))
1189 re16 = pcre16_compile(regtest_buf16,
1190 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags),
1191 &error, &err_offs, tables(0));
1192
1193 extra16 = NULL;
1194 if (re16) {
1195 error = NULL;
1196 extra16 = pcre16_study(re16, study_mode, &error);
1197 if (!extra16) {
1198 printf("\n16 bit: Cannot study pattern: %s\n", current->pattern);
1199 pcre16_free(re16);
1200 re16 = NULL;
1201 }
1202 else if (!(extra16->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
1203 printf("\n16 bit: JIT compiler does not support: %s\n", current->pattern);
1204 pcre16_free_study(extra16);
1205 pcre16_free(re16);
1206 re16 = NULL;
1207 }
1208 extra16->flags |= PCRE_EXTRA_MARK;
1209 } else if (((utf && ucp) || is_ascii_pattern) && !(current->start_offset & F_NO16))
1210 printf("\n16 bit: Cannot compile pattern \"%s\": %s\n", current->pattern, error);
1211 #endif
1212 #ifdef SUPPORT_PCRE32
1213 if ((current->flags & PCRE_UTF32) || (current->start_offset & F_FORCECONV))
1214 convert_utf8_to_utf32(current->pattern, regtest_buf32, NULL, REGTEST_MAX_LENGTH32);
1215 else
1216 copy_char8_to_char32(current->pattern, regtest_buf32, REGTEST_MAX_LENGTH32);
1217
1218 re32 = NULL;
1219 if (!(current->start_offset & F_NO32))
1220 re32 = pcre32_compile(regtest_buf32,
1221 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags),
1222 &error, &err_offs, tables(0));
1223
1224 extra32 = NULL;
1225 if (re32) {
1226 error = NULL;
1227 extra32 = pcre32_study(re32, study_mode, &error);
1228 if (!extra32) {
1229 printf("\n32 bit: Cannot study pattern: %s\n", current->pattern);
1230 pcre32_free(re32);
1231 re32 = NULL;
1232 }
1233 if (!(extra32->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
1234 printf("\n32 bit: JIT compiler does not support: %s\n", current->pattern);
1235 pcre32_free_study(extra32);
1236 pcre32_free(re32);
1237 re32 = NULL;
1238 }
1239 extra32->flags |= PCRE_EXTRA_MARK;
1240 } else if (((utf && ucp) || is_ascii_pattern) && !(current->start_offset & F_NO32))
1241 printf("\n32 bit: Cannot compile pattern \"%s\": %s\n", current->pattern, error);
1242 #endif
1243
1244 counter++;
1245 if ((counter & 0x3) != 0) {
1246 #ifdef SUPPORT_PCRE8
1247 setstack8(NULL);
1248 #endif
1249 #ifdef SUPPORT_PCRE16
1250 setstack16(NULL);
1251 #endif
1252 #ifdef SUPPORT_PCRE32
1253 setstack32(NULL);
1254 #endif
1255 }
1256
1257 #ifdef SUPPORT_PCRE8
1258 return_value8[0] = -1000;
1259 return_value8[1] = -1000;
1260 for (i = 0; i < 32; ++i)
1261 ovector8_1[i] = -2;
1262 for (i = 0; i < 32; ++i)
1263 ovector8_2[i] = -2;
1264 if (re8) {
1265 mark8_1 = NULL;
1266 mark8_2 = NULL;
1267 extra8->mark = &mark8_1;
1268
1269 if ((counter & 0x1) != 0) {
1270 setstack8(extra8);
1271 return_value8[0] = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1272 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_1, 32);
1273 } else
1274 return_value8[0] = pcre_jit_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1275 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_1, 32, getstack8());
1276 memset(&dummy_extra8, 0, sizeof(pcre_extra));
1277 dummy_extra8.flags = PCRE_EXTRA_MARK;
1278 if (current->start_offset & F_STUDY) {
1279 dummy_extra8.flags |= PCRE_EXTRA_STUDY_DATA;
1280 dummy_extra8.study_data = extra8->study_data;
1281 }
1282 dummy_extra8.mark = &mark8_2;
1283 return_value8[1] = pcre_exec(re8, &dummy_extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1284 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_2, 32);
1285 }
1286 #endif
1287
1288 #ifdef SUPPORT_PCRE16
1289 return_value16[0] = -1000;
1290 return_value16[1] = -1000;
1291 for (i = 0; i < 32; ++i)
1292 ovector16_1[i] = -2;
1293 for (i = 0; i < 32; ++i)
1294 ovector16_2[i] = -2;
1295 if (re16) {
1296 mark16_1 = NULL;
1297 mark16_2 = NULL;
1298 if ((current->flags & PCRE_UTF16) || (current->start_offset & F_FORCECONV))
1299 length16 = convert_utf8_to_utf16(current->input, regtest_buf16, regtest_offsetmap16, REGTEST_MAX_LENGTH16);
1300 else
1301 length16 = copy_char8_to_char16(current->input, regtest_buf16, REGTEST_MAX_LENGTH16);
1302 extra16->mark = &mark16_1;
1303 if ((counter & 0x1) != 0) {
1304 setstack16(extra16);
1305 return_value16[0] = pcre16_exec(re16, extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
1306 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_1, 32);
1307 } else
1308 return_value16[0] = pcre16_jit_exec(re16, extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
1309 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_1, 32, getstack16());
1310 memset(&dummy_extra16, 0, sizeof(pcre16_extra));
1311 dummy_extra16.flags = PCRE_EXTRA_MARK;
1312 if (current->start_offset & F_STUDY) {
1313 dummy_extra16.flags |= PCRE_EXTRA_STUDY_DATA;
1314 dummy_extra16.study_data = extra16->study_data;
1315 }
1316 dummy_extra16.mark = &mark16_2;
1317 return_value16[1] = pcre16_exec(re16, &dummy_extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
1318 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_2, 32);
1319 }
1320 #endif
1321
1322 #ifdef SUPPORT_PCRE32
1323 return_value32[0] = -1000;
1324 return_value32[1] = -1000;
1325 for (i = 0; i < 32; ++i)
1326 ovector32_1[i] = -2;
1327 for (i = 0; i < 32; ++i)
1328 ovector32_2[i] = -2;
1329 if (re32) {
1330 mark32_1 = NULL;
1331 mark32_2 = NULL;
1332 if ((current->flags & PCRE_UTF32) || (current->start_offset & F_FORCECONV))
1333 length32 = convert_utf8_to_utf32(current->input, regtest_buf32, regtest_offsetmap32, REGTEST_MAX_LENGTH32);
1334 else
1335 length32 = copy_char8_to_char32(current->input, regtest_buf32, REGTEST_MAX_LENGTH32);
1336 extra32->mark = &mark32_1;
1337 if ((counter & 0x1) != 0) {
1338 setstack32(extra32);
1339 return_value32[0] = pcre32_exec(re32, extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
1340 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector32_1, 32);
1341 } else
1342 return_value32[0] = pcre32_jit_exec(re32, extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
1343 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector32_1, 32, getstack32());
1344 memset(&dummy_extra32, 0, sizeof(pcre32_extra));
1345 dummy_extra32.flags = PCRE_EXTRA_MARK;
1346 if (current->start_offset & F_STUDY) {
1347 dummy_extra32.flags |= PCRE_EXTRA_STUDY_DATA;
1348 dummy_extra32.study_data = extra32->study_data;
1349 }
1350 dummy_extra32.mark = &mark32_2;
1351 return_value32[1] = pcre32_exec(re32, &dummy_extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
1352 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector32_2, 32);
1353 }
1354 #endif
1355
1356 /* printf("[%d-%d-%d|%d-%d|%d-%d|%d-%d]%s",
1357 return_value8[0], return_value16[0],
1358 ovector8_1[0], ovector8_1[1],
1359 ovector16_1[0], ovector16_1[1],
1360 ovector32_1[0], ovector32_1[1],
1361 (current->flags & PCRE_CASELESS) ? "C" : ""); */
1362
1363 /* If F_DIFF is set, just run the test, but do not compare the results.
1364 Segfaults can still be captured. */
1365
1366 is_successful = 1;
1367 if (!(current->start_offset & F_DIFF)) {
1368 #if defined SUPPORT_UTF && ((defined(SUPPORT_PCRE8) + defined(SUPPORT_PCRE16) + defined(SUPPORT_PCRE32)) >= 2)
1369 if (!(current->start_offset & F_FORCECONV)) {
1370 int return_value;
1371
1372 /* All results must be the same. */
1373 #ifdef SUPPORT_PCRE8
1374 if ((return_value = return_value8[0]) != return_value8[1]) {
1375 printf("\n8 bit: Return value differs(J8:%d,I8:%d): [%d] '%s' @ '%s'\n",
1376 return_value8[0], return_value8[1], total, current->pattern, current->input);
1377 is_successful = 0;
1378 } else
1379 #endif
1380 #ifdef SUPPORT_PCRE16
1381 if ((return_value = return_value16[0]) != return_value16[1]) {
1382 printf("\n16 bit: Return value differs(J16:%d,I16:%d): [%d] '%s' @ '%s'\n",
1383 return_value16[0], return_value16[1], total, current->pattern, current->input);
1384 is_successful = 0;
1385 } else
1386 #endif
1387 #ifdef SUPPORT_PCRE32
1388 if ((return_value = return_value32[0]) != return_value32[1]) {
1389 printf("\n32 bit: Return value differs(J32:%d,I32:%d): [%d] '%s' @ '%s'\n",
1390 return_value32[0], return_value32[1], total, current->pattern, current->input);
1391 is_successful = 0;
1392 } else
1393 #endif
1394 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1395 if (return_value8[0] != return_value16[0]) {
1396 printf("\n8 and 16 bit: Return value differs(J8:%d,J16:%d): [%d] '%s' @ '%s'\n",
1397 return_value8[0], return_value16[0],
1398 total, current->pattern, current->input);
1399 is_successful = 0;
1400 } else
1401 #endif
1402 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE32
1403 if (return_value8[0] != return_value32[0]) {
1404 printf("\n8 and 32 bit: Return value differs(J8:%d,J32:%d): [%d] '%s' @ '%s'\n",
1405 return_value8[0], return_value32[0],
1406 total, current->pattern, current->input);
1407 is_successful = 0;
1408 } else
1409 #endif
1410 #if defined SUPPORT_PCRE16 && defined SUPPORT_PCRE32
1411 if (return_value16[0] != return_value32[0]) {
1412 printf("\n16 and 32 bit: Return value differs(J16:%d,J32:%d): [%d] '%s' @ '%s'\n",
1413 return_value16[0], return_value32[0],
1414 total, current->pattern, current->input);
1415 is_successful = 0;
1416 } else
1417 #endif
1418 if (return_value >= 0 || return_value == PCRE_ERROR_PARTIAL) {
1419 if (return_value == PCRE_ERROR_PARTIAL) {
1420 return_value = 2;
1421 } else {
1422 return_value *= 2;
1423 }
1424 #ifdef SUPPORT_PCRE8
1425 return_value8[0] = return_value;
1426 #endif
1427 #ifdef SUPPORT_PCRE16
1428 return_value16[0] = return_value;
1429 #endif
1430 #ifdef SUPPORT_PCRE32
1431 return_value32[0] = return_value;
1432 #endif
1433 /* Transform back the results. */
1434 if (current->flags & PCRE_UTF8) {
1435 #ifdef SUPPORT_PCRE16
1436 for (i = 0; i < return_value; ++i) {
1437 if (ovector16_1[i] >= 0)
1438 ovector16_1[i] = regtest_offsetmap16[ovector16_1[i]];
1439 if (ovector16_2[i] >= 0)
1440 ovector16_2[i] = regtest_offsetmap16[ovector16_2[i]];
1441 }
1442 #endif
1443 #ifdef SUPPORT_PCRE32
1444 for (i = 0; i < return_value; ++i) {
1445 if (ovector32_1[i] >= 0)
1446 ovector32_1[i] = regtest_offsetmap32[ovector32_1[i]];
1447 if (ovector32_2[i] >= 0)
1448 ovector32_2[i] = regtest_offsetmap32[ovector32_2[i]];
1449 }
1450 #endif
1451 }
1452
1453 for (i = 0; i < return_value; ++i) {
1454 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1455 if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
1456 printf("\n8 and 16 bit: Ovector[%d] value differs(J8:%d,I8:%d,J16:%d,I16:%d): [%d] '%s' @ '%s' \n",
1457 i, ovector8_1[i], ovector8_2[i], ovector16_1[i], ovector16_2[i],
1458 total, current->pattern, current->input);
1459 is_successful = 0;
1460 }
1461 #endif
1462 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE32
1463 if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector32_1[i] || ovector8_1[i] != ovector32_2[i]) {
1464 printf("\n8 and 32 bit: Ovector[%d] value differs(J8:%d,I8:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
1465 i, ovector8_1[i], ovector8_2[i], ovector32_1[i], ovector32_2[i],
1466 total, current->pattern, current->input);
1467 is_successful = 0;
1468 }
1469 #endif
1470 #if defined SUPPORT_PCRE16 && defined SUPPORT_PCRE16
1471 if (ovector16_1[i] != ovector16_2[i] || ovector16_1[i] != ovector16_1[i] || ovector16_1[i] != ovector16_2[i]) {
1472 printf("\n16 and 16 bit: Ovector[%d] value differs(J16:%d,I16:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
1473 i, ovector16_1[i], ovector16_2[i], ovector16_1[i], ovector16_2[i],
1474 total, current->pattern, current->input);
1475 is_successful = 0;
1476 }
1477 #endif
1478 }
1479 }
1480 } else
1481 #endif /* more than one of SUPPORT_PCRE8, SUPPORT_PCRE16 and SUPPORT_PCRE32 */
1482 {
1483 /* Only the 8 bit and 16 bit results must be equal. */
1484 #ifdef SUPPORT_PCRE8
1485 if (return_value8[0] != return_value8[1]) {
1486 printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1487 return_value8[0], return_value8[1], total, current->pattern, current->input);
1488 is_successful = 0;
1489 } else if (return_value8[0] >= 0 || return_value8[0] == PCRE_ERROR_PARTIAL) {
1490 if (return_value8[0] == PCRE_ERROR_PARTIAL)
1491 return_value8[0] = 2;
1492 else
1493 return_value8[0] *= 2;
1494
1495 for (i = 0; i < return_value8[0]; ++i)
1496 if (ovector8_1[i] != ovector8_2[i]) {
1497 printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1498 i, ovector8_1[i], ovector8_2[i], total, current->pattern, current->input);
1499 is_successful = 0;
1500 }
1501 }
1502 #endif
1503
1504 #ifdef SUPPORT_PCRE16
1505 if (return_value16[0] != return_value16[1]) {
1506 printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1507 return_value16[0], return_value16[1], total, current->pattern, current->input);
1508 is_successful = 0;
1509 } else if (return_value16[0] >= 0 || return_value16[0] == PCRE_ERROR_PARTIAL) {
1510 if (return_value16[0] == PCRE_ERROR_PARTIAL)
1511 return_value16[0] = 2;
1512 else
1513 return_value16[0] *= 2;
1514
1515 for (i = 0; i < return_value16[0]; ++i)
1516 if (ovector16_1[i] != ovector16_2[i]) {
1517 printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1518 i, ovector16_1[i], ovector16_2[i], total, current->pattern, current->input);
1519 is_successful = 0;
1520 }
1521 }
1522 #endif
1523
1524 #ifdef SUPPORT_PCRE32
1525 if (return_value32[0] != return_value32[1]) {
1526 printf("\n32 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1527 return_value32[0], return_value32[1], total, current->pattern, current->input);
1528 is_successful = 0;
1529 } else if (return_value32[0] >= 0 || return_value32[0] == PCRE_ERROR_PARTIAL) {
1530 if (return_value32[0] == PCRE_ERROR_PARTIAL)
1531 return_value32[0] = 2;
1532 else
1533 return_value32[0] *= 2;
1534
1535 for (i = 0; i < return_value32[0]; ++i)
1536 if (ovector32_1[i] != ovector32_2[i]) {
1537 printf("\n32 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1538 i, ovector32_1[i], ovector32_2[i], total, current->pattern, current->input);
1539 is_successful = 0;
1540 }
1541 }
1542 #endif
1543 }
1544 }
1545
1546 if (is_successful) {
1547 #ifdef SUPPORT_PCRE8
1548 if (!(current->start_offset & F_NO8) && ((utf && ucp) || is_ascii_input)) {
1549 if (return_value8[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1550 printf("8 bit: Test should match: [%d] '%s' @ '%s'\n",
1551 total, current->pattern, current->input);
1552 is_successful = 0;
1553 }
1554
1555 if (return_value8[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1556 printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n",
1557 total, current->pattern, current->input);
1558 is_successful = 0;
1559 }
1560 }
1561 #endif
1562 #ifdef SUPPORT_PCRE16
1563 if (!(current->start_offset & F_NO16) && ((utf && ucp) || is_ascii_input)) {
1564 if (return_value16[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1565 printf("16 bit: Test should match: [%d] '%s' @ '%s'\n",
1566 total, current->pattern, current->input);
1567 is_successful = 0;
1568 }
1569
1570 if (return_value16[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1571 printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n",
1572 total, current->pattern, current->input);
1573 is_successful = 0;
1574 }
1575 }
1576 #endif
1577 #ifdef SUPPORT_PCRE32
1578 if (!(current->start_offset & F_NO32) && ((utf && ucp) || is_ascii_input)) {
1579 if (return_value32[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1580 printf("32 bit: Test should match: [%d] '%s' @ '%s'\n",
1581 total, current->pattern, current->input);
1582 is_successful = 0;
1583 }
1584
1585 if (return_value32[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1586 printf("32 bit: Test should not match: [%d] '%s' @ '%s'\n",
1587 total, current->pattern, current->input);
1588 is_successful = 0;
1589 }
1590 }
1591 #endif
1592 }
1593
1594 if (is_successful) {
1595 #ifdef SUPPORT_PCRE8
1596 if (mark8_1 != mark8_2) {
1597 printf("8 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1598 total, current->pattern, current->input);
1599 is_successful = 0;
1600 }
1601 #endif
1602 #ifdef SUPPORT_PCRE16
1603 if (mark16_1 != mark16_2) {
1604 printf("16 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1605 total, current->pattern, current->input);
1606 is_successful = 0;
1607 }
1608 #endif
1609 #ifdef SUPPORT_PCRE32
1610 if (mark32_1 != mark32_2) {
1611 printf("32 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1612 total, current->pattern, current->input);
1613 is_successful = 0;
1614 }
1615 #endif
1616 }
1617
1618 #ifdef SUPPORT_PCRE8
1619 if (re8) {
1620 pcre_free_study(extra8);
1621 pcre_free(re8);
1622 }
1623 #endif
1624 #ifdef SUPPORT_PCRE16
1625 if (re16) {
1626 pcre16_free_study(extra16);
1627 pcre16_free(re16);
1628 }
1629 #endif
1630 #ifdef SUPPORT_PCRE32
1631 if (re32) {
1632 pcre32_free_study(extra32);
1633 pcre32_free(re32);
1634 }
1635 #endif
1636
1637 if (is_successful) {
1638 successful++;
1639 successful_row++;
1640 printf(".");
1641 if (successful_row >= 60) {
1642 successful_row = 0;
1643 printf("\n");
1644 }
1645 } else
1646 successful_row = 0;
1647
1648 fflush(stdout);
1649 current++;
1650 }
1651 tables(1);
1652 #ifdef SUPPORT_PCRE8
1653 setstack8(NULL);
1654 #endif
1655 #ifdef SUPPORT_PCRE16
1656 setstack16(NULL);
1657 #endif
1658 #ifdef SUPPORT_PCRE32
1659 setstack32(NULL);
1660 #endif
1661
1662 if (total == successful) {
1663 printf("\nAll JIT regression tests are successfully passed.\n");
1664 return 0;
1665 } else {
1666 printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
1667 return 1;
1668 }
1669 }
1670
1671 /* End of pcre_jit_test.c */

  ViewVC Help
Powered by ViewVC 1.1.5