/[pcre]/code/trunk/pcre_jit_test.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_test.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1300 - (show annotations)
Mon Mar 25 12:34:26 2013 UTC (6 years, 5 months ago) by zherczeg
File MIME type: text/plain
File size: 67991 byte(s)
Control verbs are handled in the same way in JIT and interpreter.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Main Library written by Philip Hazel
9 Copyright (c) 1997-2012 University of Cambridge
10
11 This JIT compiler regression test program was written by Zoltan Herczeg
12 Copyright (c) 2010-2012
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include <stdio.h>
48 #include <string.h>
49 #include "pcre.h"
50
51
52 #include "pcre_internal.h"
53
54 #define PCRE_BUG 0x80000000
55
56 /*
57 Letter characters:
58 \xe6\x92\xad = 0x64ad = 25773 (kanji)
59 Non-letter characters:
60 \xc2\xa1 = 0xa1 = (Inverted Exclamation Mark)
61 \xf3\xa9\xb7\x80 = 0xe9dc0 = 957888
62 \xed\xa0\x80 = 55296 = 0xd800 (Invalid UTF character)
63 \xed\xb0\x80 = 56320 = 0xdc00 (Invalid UTF character)
64 Newlines:
65 \xc2\x85 = 0x85 = 133 (NExt Line = NEL)
66 \xe2\x80\xa8 = 0x2028 = 8232 (Line Separator)
67 Othercase pairs:
68 \xc3\xa9 = 0xe9 = 233 (e')
69 \xc3\x89 = 0xc9 = 201 (E')
70 \xc3\xa1 = 0xe1 = 225 (a')
71 \xc3\x81 = 0xc1 = 193 (A')
72 \xc8\xba = 0x23a = 570
73 \xe2\xb1\xa5 = 0x2c65 = 11365
74 \xe1\xbd\xb8 = 0x1f78 = 8056
75 \xe1\xbf\xb8 = 0x1ff8 = 8184
76 \xf0\x90\x90\x80 = 0x10400 = 66560
77 \xf0\x90\x90\xa8 = 0x10428 = 66600
78 Mark property:
79 \xcc\x8d = 0x30d = 781
80 Special:
81 \xdf\xbf = 0x7ff = 2047 (highest 2 byte character)
82 \xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character)
83 \xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character)
84 \xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character)
85 \xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character)
86 */
87
88 static int regression_tests(void);
89
90 int main(void)
91 {
92 int jit = 0;
93 #if defined SUPPORT_PCRE8
94 pcre_config(PCRE_CONFIG_JIT, &jit);
95 #elif defined SUPPORT_PCRE16
96 pcre16_config(PCRE_CONFIG_JIT, &jit);
97 #elif defined SUPPORT_PCRE32
98 pcre32_config(PCRE_CONFIG_JIT, &jit);
99 #endif
100 if (!jit) {
101 printf("JIT must be enabled to run pcre_jit_test\n");
102 return 1;
103 }
104 return regression_tests();
105 }
106
107 /* --------------------------------------------------------------------------------------- */
108
109 #if !(defined SUPPORT_PCRE8) && !(defined SUPPORT_PCRE16) && !(defined SUPPORT_PCRE32)
110 #error SUPPORT_PCRE8 or SUPPORT_PCRE16 or SUPPORT_PCRE32 must be defined
111 #endif
112
113 #define MUA (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
114 #define MUAP (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
115 #define CMUA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
116 #define CMUAP (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
117 #define MA (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
118 #define MAP (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
119 #define CMA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
120
121 #define OFFSET_MASK 0x00ffff
122 #define F_NO8 0x010000
123 #define F_NO16 0x020000
124 #define F_NO32 0x020000
125 #define F_NOMATCH 0x040000
126 #define F_DIFF 0x080000
127 #define F_FORCECONV 0x100000
128 #define F_PROPERTY 0x200000
129 #define F_STUDY 0x400000
130
131 struct regression_test_case {
132 int flags;
133 int start_offset;
134 const char *pattern;
135 const char *input;
136 };
137
138 static struct regression_test_case regression_test_cases[] = {
139 /* Constant strings. */
140 { MUA, 0, "AbC", "AbAbC" },
141 { MUA, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
142 { CMUA, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
143 { MA, 0, "[^a]", "aAbB" },
144 { CMA, 0, "[^m]", "mMnN" },
145 { MA, 0, "a[^b][^#]", "abacd" },
146 { CMA, 0, "A[^B][^E]", "abacd" },
147 { CMUA, 0, "[^x][^#]", "XxBll" },
148 { MUA, 0, "[^a]", "aaa\xc3\xa1#Ab" },
149 { CMUA, 0, "[^A]", "aA\xe6\x92\xad" },
150 { MUA, 0, "\\W(\\W)?\\w", "\r\n+bc" },
151 { MUA, 0, "\\W(\\W)?\\w", "\n\r+bc" },
152 { MUA, 0, "\\W(\\W)?\\w", "\r\r+bc" },
153 { MUA, 0, "\\W(\\W)?\\w", "\n\n+bc" },
154 { MUA, 0, "[axd]", "sAXd" },
155 { CMUA, 0, "[axd]", "sAXd" },
156 { CMUA, 0 | F_NOMATCH, "[^axd]", "DxA" },
157 { MUA, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
158 { MUA, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
159 { CMUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
160 { MUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
161 { MUA, 0, "[^a]", "\xc2\x80[]" },
162 { CMUA, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
163 { CMA, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
164 { PCRE_CASELESS, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
165 { PCRE_CASELESS, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
166 { PCRE_CASELESS, 0, "a1", "Aa1" },
167 { MA, 0, "\\Ca", "cda" },
168 { CMA, 0, "\\Ca", "CDA" },
169 { MA, 0 | F_NOMATCH, "\\Cx", "cda" },
170 { CMA, 0 | F_NOMATCH, "\\Cx", "CDA" },
171 { CMUAP, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
172 { CMUAP, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
173 { CMUAP, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
174 { CMUAP, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
175
176 /* Assertions. */
177 { MUA, 0, "\\b[^A]", "A_B#" },
178 { MA, 0 | F_NOMATCH, "\\b\\W", "\n*" },
179 { MUA, 0, "\\B[^,]\\b[^s]\\b", "#X" },
180 { MAP, 0, "\\B", "_\xa1" },
181 { MAP, 0, "\\b_\\b[,A]\\B", "_," },
182 { MUAP, 0, "\\b", "\xe6\x92\xad!" },
183 { MUAP, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
184 { MUAP, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
185 { MUAP, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
186 { MUA, 0 | F_NOMATCH, "\\b.", "\xcd\xbe" },
187 { CMUAP, 0, "\\By", "\xf0\x90\x90\xa8y" },
188 { MA, 0 | F_NOMATCH, "\\R^", "\n" },
189 { MA, 1 | F_NOMATCH, "^", "\n" },
190 { 0, 0, "^ab", "ab" },
191 { 0, 0 | F_NOMATCH, "^ab", "aab" },
192 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "^a", "\r\raa\n\naa\r\naa" },
193 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
194 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^-", "a--b--\x85--" },
195 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xe2\x80\xa8--" },
196 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xc2\x85--" },
197 { 0, 0, "ab$", "ab" },
198 { 0, 0 | F_NOMATCH, "ab$", "abab\n\n" },
199 { PCRE_DOLLAR_ENDONLY, 0 | F_NOMATCH, "ab$", "abab\r\n" },
200 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "a$", "\r\raa\n\naa\r\naa" },
201 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aaa" },
202 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "#$", "#\xc2\x85###\r#" },
203 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "#$", "#\xe2\x80\xa9" },
204 { PCRE_NOTBOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "^a", "aa\naa" },
205 { PCRE_NOTBOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },
206 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\naa" },
207 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\r\n" },
208 { PCRE_UTF8 | PCRE_DOLLAR_ENDONLY | PCRE_NEWLINE_ANY, 0 | F_PROPERTY, "\\p{Any}{2,}$", "aa\r\n" },
209 { PCRE_NOTEOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },
210 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa" },
211 { PCRE_NEWLINE_CR | PCRE_UTF8, 0, "a\\Z", "aaa\r" },
212 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa\n" },
213 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r" },
214 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
215 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r\n" },
216 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
217 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
218 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
219 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
220 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
221 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
222 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
223 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
224 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
225 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xc2\x85" },
226 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
227 { MA, 0, "\\Aa", "aaa" },
228 { MA, 1 | F_NOMATCH, "\\Aa", "aaa" },
229 { MA, 1, "\\Ga", "aaa" },
230 { MA, 1 | F_NOMATCH, "\\Ga", "aba" },
231 { MA, 0, "a\\z", "aaa" },
232 { MA, 0 | F_NOMATCH, "a\\z", "aab" },
233
234 /* Brackets. */
235 { MUA, 0, "(ab|bb|cd)", "bacde" },
236 { MUA, 0, "(?:ab|a)(bc|c)", "ababc" },
237 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
238 { CMUA, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
239 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
240 { MUA, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
241
242 /* Greedy and non-greedy ? operators. */
243 { MUA, 0, "(?:a)?a", "laab" },
244 { CMUA, 0, "(A)?A", "llaab" },
245 { MUA, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
246 { MUA, 0, "(a)?a", "manm" },
247 { CMUA, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
248 { MUA, 0, "(a|b)?\?d((?:e)?)", "abcde" },
249 { MUA, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
250
251 /* Greedy and non-greedy + operators */
252 { MUA, 0, "(aa)+aa", "aaaaaaa" },
253 { MUA, 0, "(aa)+?aa", "aaaaaaa" },
254 { MUA, 0, "(?:aba|ab|a)+l", "ababamababal" },
255 { MUA, 0, "(?:aba|ab|a)+?l", "ababamababal" },
256 { MUA, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
257 { MUA, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
258 { MUA, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
259
260 /* Greedy and non-greedy * operators */
261 { CMUA, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
262 { MUA, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
263 { MUA, 0, "(aa|ab)*ab", "aaabaaab" },
264 { CMUA, 0, "(aa|Ab)*?aB", "aaabaaab" },
265 { MUA, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
266 { MUA, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
267 { MA, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
268 { MA, 0, "((?:a|)*){0}a", "a" },
269
270 /* Combining ? + * operators */
271 { MUA, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
272 { MUA, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
273 { MUA, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
274 { MUA, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
275 { MUA, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
276
277 /* Single character iterators. */
278 { MUA, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
279 { MUA, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
280 { MUA, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
281 { MUA, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
282 { MUA, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
283 { MUA, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
284 { MUA, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
285 { MUA, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
286 { MUA, 0, "(ba{2})+c", "baabaaabacbaabaac" },
287 { MUA, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
288 { MUA, 0, "(a?+[^b])+", "babaacacb" },
289 { MUA, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
290 { CMUA, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
291 { CMUA, 0, "[c-f]+k", "DemmFke" },
292 { MUA, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
293 { MUA, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
294 { CMUA, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
295 { CMUA, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
296 { CMUA, 0, "[ace]{3,}", "AcbDAcEEcEd" },
297 { CMUA, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
298 { MUA, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
299 { CMUA, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
300 { MUA, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
301 { MUA, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
302 { MUA, 0, "\\b\\w+\\B", "x,a_cd" },
303 { MUAP, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
304 { CMUA, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
305 { CMUAP, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" },
306 { CMUA, 0, "[^\xf0\x90\x90\x80]{2,4}@", "\xf0\x90\x90\xa8\xf0\x90\x90\x80###\xf0\x90\x90\x80@@@" },
307 { CMUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
308 { MUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
309 { MUA, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" },
310
311 /* Basic character sets. */
312 { MUA, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
313 { MUA, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
314 { MUA, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
315 { MUA, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
316 { MUA, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
317 { MUA, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
318
319 /* Unicode properties. */
320 { MUAP, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
321 { MUAP, 0 | F_PROPERTY, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
322 { MUAP, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
323 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}]", "abc" },
324 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}]", "abc" },
325 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
326 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
327 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
328 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
329 { MUAP, 0 | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
330 { MUAP, 0 | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
331 { MUAP, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
332 { CMUAP, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
333 { MUAP, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
334 { MUAP, 0 | F_PROPERTY, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
335 { MUA, 0 | F_PROPERTY, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
336 { CMUAP, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
337 { MUAP, 0 | F_PROPERTY, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
338 { MUAP, 0 | F_PROPERTY, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
339 { PCRE_UCP, 0, "[a-b\\s]{2,5}[^a]", "AB baaa" },
340
341 /* Possible empty brackets. */
342 { MUA, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
343 { MUA, 0, "(|ab||bc|a)+d", "abcxabcabd" },
344 { MUA, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
345 { MUA, 0, "(|ab||bc|a)*d", "abcxabcabd" },
346 { MUA, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
347 { MUA, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
348 { MUA, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
349 { MUA, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
350 { MUA, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
351 { MUA, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
352
353 /* Start offset. */
354 { MUA, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
355 { MUA, 4 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
356 { MUA, 2 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
357 { MUA, 1, "(\\w\\W\\w)+", "ab#d" },
358
359 /* Newline. */
360 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
361 { PCRE_MULTILINE | PCRE_NEWLINE_CR, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
362 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{1,3}[^#]", "\r\n##...." },
363
364 /* Any character except newline or any newline. */
365 { PCRE_NEWLINE_CRLF, 0, ".", "\r" },
366 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
367 { PCRE_NEWLINE_ANYCRLF, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
368 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
369 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
370 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0 | F_NOMATCH, ".(.).", "\xe2\x80\xa8\nb\r" },
371 { PCRE_NEWLINE_ANY, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
372 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
373 { PCRE_BSR_ANYCRLF, 0, "\\R", "\r" },
374 { PCRE_BSR_ANYCRLF, 0, "\\R", "\x85#\r\n#" },
375 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\xe2\x80\xa8#c" },
376 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\r\nc" },
377 { PCRE_NEWLINE_CRLF | PCRE_BSR_UNICODE | PCRE_UTF8, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
378 { MUA, 0 | F_NOMATCH, "\\R+", "ab" },
379 { MUA, 0, "\\R+", "ab\r\n\r" },
380 { MUA, 0, "\\R*", "ab\r\n\r" },
381 { MUA, 0, "\\R*", "\r\n\r" },
382 { MUA, 0, "\\R{2,4}", "\r\nab\r\r" },
383 { MUA, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
384 { MUA, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
385 { MUA, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
386 { MUA, 0 | F_NOMATCH, "\\R+\\R\\R", "\r\n\r\n" },
387 { MUA, 0, "\\R+\\R\\R", "\r\r\r" },
388 { MUA, 0, "\\R*\\R\\R", "\n\r" },
389 { MUA, 0 | F_NOMATCH, "\\R{2,4}\\R\\R", "\r\r\r" },
390 { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
391
392 /* Atomic groups (no fallback from "next" direction). */
393 { MUA, 0 | F_NOMATCH, "(?>ab)ab", "bab" },
394 { MUA, 0 | F_NOMATCH, "(?>(ab))ab", "bab" },
395 { MUA, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
396 "bababcdedefgheijijklmlmnop" },
397 { MUA, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
398 { MUA, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
399 { MUA, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
400 { MUA, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
401 { MUA, 0, "((?>a|)+?)b", "aaacaaab" },
402 { MUA, 0, "(?>x|)*$", "aaa" },
403 { MUA, 0, "(?>(x)|)*$", "aaa" },
404 { MUA, 0, "(?>x|())*$", "aaa" },
405 { MUA, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
406 { MUA, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
407 { MUA, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
408 { MUA, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
409 { MUA, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
410 { MUA, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
411 { MUA, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
412 { MUA, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
413 { MUA, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
414 { MUA, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
415 { MUA, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
416 { MUA, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
417 { MUA, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
418 { MUA, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
419 { CMA, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
420 { MUA, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
421 { MUA, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
422 { MUA, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d" },
423 { MUA, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
424 { MUA, 0 | F_PROPERTY, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
425 { MUA, 0 | F_PROPERTY, "\\X{2,4}", "abcdef" },
426 { MUA, 0 | F_PROPERTY, "\\X{2,4}?", "abcdef" },
427 { MUA, 0 | F_NOMATCH | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d##" },
428 { MUA, 0 | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
429 { MUA, 0, "(c(ab)?+ab)+", "cabcababcab" },
430 { MUA, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
431
432 /* Possessive quantifiers. */
433 { MUA, 0, "(?:a|b)++m", "mababbaaxababbaam" },
434 { MUA, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
435 { MUA, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
436 { MUA, 0, "(a|b)++m", "mababbaaxababbaam" },
437 { MUA, 0, "(a|b)*+m", "mababbaaxababbaam" },
438 { MUA, 0, "(a|b)*+m", "ababbaaxababbaam" },
439 { MUA, 0, "(a|b(*ACCEPT))++m", "maaxab" },
440 { MUA, 0, "(?:b*)++m", "bxbbxbbbxm" },
441 { MUA, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
442 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxm" },
443 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
444 { MUA, 0, "(b*)++m", "bxbbxbbbxm" },
445 { MUA, 0, "(b*)++m", "bxbbxbbbxbbm" },
446 { MUA, 0, "(b*)*+m", "bxbbxbbbxm" },
447 { MUA, 0, "(b*)*+m", "bxbbxbbbxbbm" },
448 { MUA, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
449 { MUA, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
450 { MUA, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
451 { MUA, 0, "(a|(b))++m", "mababbaaxababbaam" },
452 { MUA, 0, "((a)|b)*+m", "mababbaaxababbaam" },
453 { MUA, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
454 { MUA, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
455 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxm" },
456 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
457 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
458 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
459 { MUA, 0, "((b*))++m", "bxbbxbbbxm" },
460 { MUA, 0, "((b*))++m", "bxbbxbbbxbbm" },
461 { MUA, 0, "((b*))*+m", "bxbbxbbbxm" },
462 { MUA, 0, "((b*))*+m", "bxbbxbbbxbbm" },
463 { MUA, 0 | F_NOMATCH, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
464 { MUA, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
465 { MUA, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
466 { MUA, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
467 { MUA, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
468
469 /* Back references. */
470 { MUA, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
471 { CMUA, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
472 { CMA, 0, "(a{2,4})\\1", "AaAaaAaA" },
473 { MUA, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
474 { MUA, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
475 { MUA, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
476 { MUA, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
477 { MUA, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
478 { MUA, 0, "(?:(aa)|b)\\1?b", "bb" },
479 { CMUA, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
480 { MUA, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
481 { CMUA, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
482 { MUA, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
483 { CMA, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
484 { MUA, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
485 { MUA, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
486 { MA, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
487 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
488 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{0,2}", "wwwww." },
489 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwww" },
490 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwwww" },
491 { PCRE_UCP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
492 { CMUAP, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" },
493
494 /* Assertions. */
495 { MUA, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
496 { MUA, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
497 { MUA, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
498 { MUA, 0, "(?<=aaa|aa|a)a", "aaa" },
499 { MUA, 2, "(?<=aaa|aa|a)a", "aaa" },
500 { MA, 0, "(?<=aaa|aa|a)a", "aaa" },
501 { MA, 2, "(?<=aaa|aa|a)a", "aaa" },
502 { MUA, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
503 { MUA, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
504 { MUA, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
505 { MUA, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
506 { MUA, 0, "((?(?=(a))a)+k)", "bbak" },
507 { MUA, 0, "((?(?=a)a)+k)", "bbak" },
508 { MUA, 0 | F_NOMATCH, "(?=(?>(a))m)amk", "a k" },
509 { MUA, 0 | F_NOMATCH, "(?!(?>(a))m)amk", "a k" },
510 { MUA, 0 | F_NOMATCH, "(?>(?=(a))am)amk", "a k" },
511 { MUA, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
512 { MUA, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
513 { MUA, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
514 { MUA, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
515 { MUA, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
516 { MUA, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
517 { MUA, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
518 { MUA, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
519
520 /* Not empty, ACCEPT, FAIL */
521 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*", "bcx" },
522 { MUA | PCRE_NOTEMPTY, 0, "a*", "bcaad" },
523 { MUA | PCRE_NOTEMPTY, 0, "a*?", "bcaad" },
524 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
525 { MUA, 0, "a(*ACCEPT)b", "ab" },
526 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*(*ACCEPT)b", "bcx" },
527 { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
528 { MUA | PCRE_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
529 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "(?:z|a*(*ACCEPT)b)", "bcx" },
530 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
531 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
532 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
533 { MUA | PCRE_NOTEMPTY_ATSTART, 0 | F_NOMATCH, "a*(*ACCEPT)b", "" },
534 { MUA, 0, "((a(*ACCEPT)b))", "ab" },
535 { MUA, 0, "(a(*FAIL)a|a)", "aaa" },
536 { MUA, 0, "(?=ab(*ACCEPT)b)a", "ab" },
537 { MUA, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
538 { MUA, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
539 { MUA | PCRE_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
540
541 /* Conditional blocks. */
542 { MUA, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
543 { MUA, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
544 { MUA, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
545 { MUA, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
546 { MUA, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
547 { MUA, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
548 { MUA, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
549 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
550 { MUA, 0 | F_DIFF, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
551 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
552 { MUA, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
553 { MUA, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
554 { MUA, 0, "(?(?=a)ab)", "a" },
555 { MUA, 0, "(?(?<!b)c)", "b" },
556 { MUA, 0, "(?(DEFINE)a(b))", "a" },
557 { MUA, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
558 { MUA, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
559 { MUA, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
560 { MUA, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
561 { MUA, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
562 { MUA, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
563 { MUA, 0, "(c)?\?(?(1)a|b)", "cbb" },
564 { MUA, 0 | F_DIFF, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
565 { MUA, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
566 { MUA, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
567 { MUA, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
568 { MUA, 0 | F_DIFF, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
569 { MUA, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
570 { MUA, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
571 { MUA, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
572 { MUA, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
573 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)*l", "bc ddd abccabccl" },
574 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+?dd", "bcabcacdb bdddd" },
575 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+l", "ababccddabdbccd abcccl" },
576 { MUA, 0, "((?:a|aa)(?(1)aaa))x", "aax" },
577
578 /* Set start of match. */
579 { MUA, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
580 { MUA, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
581 { MUA, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
582 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a\\K(*ACCEPT)b", "aa" },
583 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
584
585 /* First line. */
586 { MUA | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}a", "bb\naaa" },
587 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}a", "bb\r\naaa" },
588 { MUA | PCRE_FIRSTLINE, 0, "(?<=a)", "a" },
589 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[^a][^b]", "ab" },
590 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "a", "\na" },
591 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[abc]", "\na" },
592 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^a", "\na" },
593 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^(?<=\n)", "\na" },
594 { MUA | PCRE_FIRSTLINE, 0, "\xf0\x90\x90\x80", "\xf0\x90\x90\x80" },
595 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\xc2\x85#" },
596 { PCRE_MULTILINE | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\x85#" },
597 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^#", "\xe2\x80\xa8#" },
598 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}", "\r\na" },
599 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, ".", "\r" },
600 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "a", "\ra" },
601 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH, "ba", "bbb\r\nba" },
602 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}{4}|a", "\r\na" },
603 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 1, ".", "\r\n" },
604 { PCRE_FIRSTLINE | PCRE_NEWLINE_LF | PCRE_DOTALL, 0 | F_NOMATCH, "ab.", "ab" },
605
606 /* Recurse. */
607 { MUA, 0, "(a)(?1)", "aa" },
608 { MUA, 0, "((a))(?1)", "aa" },
609 { MUA, 0, "(b|a)(?1)", "aa" },
610 { MUA, 0, "(b|(a))(?1)", "aa" },
611 { MUA, 0 | F_NOMATCH, "((a)(b)(?:a*))(?1)", "aba" },
612 { MUA, 0, "((a)(b)(?:a*))(?1)", "abab" },
613 { MUA, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
614 { MUA, 0, "((?2)b|(a)){2}(?1)", "aabab" },
615 { MUA, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
616 { MUA, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
617 { MUA, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
618 { MUA, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
619 { MUA, 0 | F_NOMATCH, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
620 { MUA, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
621 { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
622 { MUA, 0 | F_NOMATCH, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
623 { MUA, 0, "b|<(?R)*>", "<<b>" },
624 { MUA, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
625 { MUA, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
626 { MUA, 0, "(a)((?(R)a|b))(?2)", "aabbabaa" },
627 { MUA, 0, "(a)((?(R2)a|b))(?2)", "aabbabaa" },
628 { MUA, 0, "(a)((?(R1)a|b))(?2)", "ababba" },
629 { MUA, 0, "(?(R0)aa|bb(?R))", "abba aabb bbaa" },
630 { MUA, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" },
631 { MUA, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" },
632
633 /* 16 bit specific tests. */
634 { CMA, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" },
635 { CMA, 0 | F_FORCECONV, "\xe1\xbd\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
636 { CMA, 0 | F_FORCECONV, "[\xc3\xa1]", "\xc3\x81\xc3\xa1" },
637 { CMA, 0 | F_FORCECONV, "[\xe1\xbd\xb8]", "\xe1\xbf\xb8\xe1\xbd\xb8" },
638 { CMA, 0 | F_FORCECONV, "[a-\xed\xb0\x80]", "A" },
639 { CMA, 0 | F_NO8 | F_FORCECONV, "[a-\\x{dc00}]", "B" },
640 { CMA, 0 | F_NO8 | F_NOMATCH | F_FORCECONV, "[b-\\x{dc00}]", "a" },
641 { CMA, 0 | F_NO8 | F_FORCECONV, "\xed\xa0\x80\\x{d800}\xed\xb0\x80\\x{dc00}", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80" },
642 { CMA, 0 | F_NO8 | F_FORCECONV, "[\xed\xa0\x80\\x{d800}]{1,2}?[\xed\xb0\x80\\x{dc00}]{1,2}?#", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80#" },
643 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80\xed\xb0\x80#]{0,3}(?<=\xed\xb0\x80.)", "\xed\xa0\x80#\xed\xa0\x80##\xed\xb0\x80\xed\xa0\x80" },
644 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\x9f\xbf\xed\xa0\x83" },
645 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\xb4\x80\xed\xb3\xb0" },
646 { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" },
647 { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" },
648 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" },
649 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80][\xed\xb0\x80]{2,}", "\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" },
650 { MA, 0 | F_FORCECONV, "[^\xed\xb0\x80]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
651 { MA, 0 | F_NO8 | F_FORCECONV, "[^\\x{dc00}]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
652 { CMA, 0 | F_FORCECONV, ".\\B.", "\xed\xa0\x80\xed\xb0\x80" },
653 { CMA, 0 | F_FORCECONV, "\\D+(?:\\d+|.)\\S+(?:\\s+|.)\\W+(?:\\w+|.)\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80" },
654 { CMA, 0 | F_FORCECONV, "\\d*\\s*\\w*\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80" },
655 { CMA, 0 | F_FORCECONV | F_NOMATCH, "\\d*?\\D*?\\s*?\\S*?\\w*?\\W*?##", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80#" },
656 { CMA | PCRE_EXTENDED, 0 | F_FORCECONV, "\xed\xa0\x80 \xed\xb0\x80 !", "\xed\xa0\x80\xed\xb0\x80!" },
657 { CMA, 0 | F_FORCECONV, "\xed\xa0\x80+#[^#]+\xed\xa0\x80", "\xed\xa0\x80#a\xed\xa0\x80" },
658 { CMA, 0 | F_FORCECONV, "(\xed\xa0\x80+)#\\1", "\xed\xa0\x80\xed\xa0\x80#\xed\xa0\x80\xed\xa0\x80" },
659 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0 | F_NO8 | F_FORCECONV, "^-", "a--\xe2\x80\xa8--" },
660 { PCRE_BSR_UNICODE, 0 | F_NO8 | F_FORCECONV, "\\R", "ab\xe2\x80\xa8" },
661 { 0, 0 | F_NO8 | F_FORCECONV, "\\v", "ab\xe2\x80\xa9" },
662 { 0, 0 | F_NO8 | F_FORCECONV, "\\h", "ab\xe1\xa0\x8e" },
663 { 0, 0 | F_NO8 | F_FORCECONV, "\\v+?\\V+?#", "\xe2\x80\xa9\xe2\x80\xa9\xef\xbf\xbf\xef\xbf\xbf#" },
664 { 0, 0 | F_NO8 | F_FORCECONV, "\\h+?\\H+?#", "\xe1\xa0\x8e\xe1\xa0\x8e\xef\xbf\xbf\xef\xbf\xbf#" },
665
666 /* Partial matching. */
667 { MUA | PCRE_PARTIAL_SOFT, 0, "ab", "a" },
668 { MUA | PCRE_PARTIAL_SOFT, 0, "ab|a", "a" },
669 { MUA | PCRE_PARTIAL_HARD, 0, "ab|a", "a" },
670 { MUA | PCRE_PARTIAL_SOFT, 0, "\\b#", "a" },
671 { MUA | PCRE_PARTIAL_SOFT, 0, "(?<=a)b", "a" },
672 { MUA | PCRE_PARTIAL_SOFT, 0, "abc|(?<=xxa)bc", "xxab" },
673 { MUA | PCRE_PARTIAL_SOFT, 0, "a\\B", "a" },
674 { MUA | PCRE_PARTIAL_HARD, 0, "a\\b", "a" },
675
676 /* (*MARK) verb. */
677 { MUA, 0, "a(*MARK:aa)a", "ababaa" },
678 { MUA, 0 | F_NOMATCH, "a(*:aa)a", "abab" },
679 { MUA, 0, "a(*:aa)(b(*:bb)b|bc)", "abc" },
680 { MUA, 0 | F_NOMATCH, "a(*:1)x|b(*:2)y", "abc" },
681 { MUA, 0, "(?>a(*:aa))b|ac", "ac" },
682 { MUA, 0, "(?(DEFINE)(a(*:aa)))(?1)", "a" },
683 { MUA, 0 | F_NOMATCH, "(?(DEFINE)((a)(*:aa)))(?1)b", "aa" },
684 { MUA, 0, "(?(DEFINE)(a(*:aa)))a(?1)b|aac", "aac" },
685 { MUA, 0, "(a(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
686 { MUA, 0, "(a(*:aa)){0}(?:b(?1)b)+", "babba" },
687 { MUA, 0 | F_NOMATCH | F_STUDY, "(a(*:aa)){0}(?:b(?1)b)+", "ba" },
688 { MUA, 0, "(a\\K(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
689 { MUA, 0, "(a\\K(*:aa)){0}(?:b(?1)b)+", "babba" },
690 { MUA, 0 | F_NOMATCH | F_STUDY, "(a\\K(*:aa)){0}(?:b(?1)b)+", "ba" },
691 { MUA, 0 | F_NOMATCH | F_STUDY, "(*:mark)m", "a" },
692
693 /* (*COMMIT) verb. */
694 { MUA, 0 | F_NOMATCH, "a(*COMMIT)b", "ac" },
695 { MUA, 0, "aa(*COMMIT)b", "xaxaab" },
696 { MUA, 0 | F_NOMATCH, "a(*COMMIT)(*:msg)b|ac", "ac" },
697 { MUA, 0 | F_NOMATCH, "(a(*COMMIT)b)++", "abac" },
698 { MUA, 0 | F_NOMATCH, "((a)(*COMMIT)b)++", "abac" },
699 { MUA, 0 | F_NOMATCH, "(?=a(*COMMIT)b)ab|ad", "ad" },
700
701 /* (*PRUNE) verb. */
702 { MUA, 0, "aa\\K(*PRUNE)b", "aaab" },
703 { MUA, 0, "aa(*PRUNE:bb)b|a", "aa" },
704 { MUA, 0, "(a)(a)(*PRUNE)b|(a)", "aa" },
705 { MUA, 0, "(a)(a)(a)(a)(a)(a)(a)(a)(*PRUNE)b|(a)", "aaaaaaaa" },
706 { MUA | PCRE_PARTIAL_SOFT, 0, "a(*PRUNE)a|", "a" },
707 { MUA | PCRE_PARTIAL_SOFT, 0, "a(*PRUNE)a|m", "a" },
708 { MUA, 0 | F_NOMATCH, "(?=a(*PRUNE)b)ab|ad", "ad" },
709 { MUA, 0, "a(*COMMIT)(*PRUNE)d|bc", "abc" },
710 { MUA, 0, "(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
711 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
712 { MUA, 0, "(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
713 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
714 { MUA, 0, "(a(*COMMIT)b){0}a(?1)(*PRUNE)c|bc", "abc" },
715 { MUA, 0 | F_NOMATCH, "(a(*COMMIT)b){0}a(*COMMIT)(?1)(*PRUNE)c|bc", "abc" },
716 { MUA, 0, "(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
717 { MUA, 0 | F_NOMATCH, "(*COMMIT)(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
718 { MUA, 0, "((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
719 { MUA, 0 | F_NOMATCH, "(*COMMIT)((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
720 { MUA, 0, "(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" },
721 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" },
722 { MUA, 0, "(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" },
723 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" },
724 { MUA, 0, "(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" },
725 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" },
726 { MUA, 0, "(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" },
727 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" },
728 { MUA, 0, "(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" },
729 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" },
730 { MUA, 0, "(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" },
731 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" },
732
733 /* (*SKIP) verb. */
734 { MUA, 0 | F_NOMATCH, "(?=a(*SKIP)b)ab|ad", "ad" },
735
736 /* (*THEN) verb. */
737 { MUA, 0, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcaabcaabcaabcnacm" },
738 { MUA, 0 | F_NOMATCH, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcm" },
739 { MUA, 0, "((?:a(*THEN)|aab)c|a+)+m", "aabcaabcnmaabcaabcm" },
740 { MUA, 0, "((?:a|aab)(*THEN)c|a+)+m", "aam" },
741 { MUA, 0, "((?:a(*COMMIT)|aab)(*THEN)c|a+)+m", "aam" },
742 { MUA, 0, "(?(?=a(*THEN)b)ab|ad)", "ad" },
743 { MUA, 0, "(?(?!a(*THEN)b)ad|add)", "add" },
744 { MUA, 0 | F_NOMATCH, "(?(?=a)a(*THEN)b|ad)", "ad" },
745 { MUA, 0, "(?!(?(?=a)ab|b(*THEN)d))bn|bnn", "bnn" },
746
747 /* Deep recursion. */
748 { MUA, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
749 { MUA, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
750 { MUA, 0, "((a?)+)+b", "aaaaaaaaaaaa b" },
751
752 /* Deep recursion: Stack limit reached. */
753 { MA, 0 | F_NOMATCH, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
754 { MA, 0 | F_NOMATCH, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
755 { MA, 0 | F_NOMATCH, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
756 { MA, 0 | F_NOMATCH, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
757 { MA, 0 | F_NOMATCH, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
758
759 { 0, 0, NULL, NULL }
760 };
761
762 static const unsigned char *tables(int mode)
763 {
764 /* The purpose of this function to allow valgrind
765 for reporting invalid reads and writes. */
766 static unsigned char *tables_copy;
767 const char *errorptr;
768 int erroroffset;
769 unsigned char *default_tables;
770 #if defined SUPPORT_PCRE8
771 pcre *regex;
772 char null_str[1] = { 0 };
773 #elif defined SUPPORT_PCRE16
774 pcre16 *regex;
775 PCRE_UCHAR16 null_str[1] = { 0 };
776 #elif defined SUPPORT_PCRE32
777 pcre32 *regex;
778 PCRE_UCHAR32 null_str[1] = { 0 };
779 #endif
780
781 if (mode) {
782 if (tables_copy)
783 free(tables_copy);
784 tables_copy = NULL;
785 return NULL;
786 }
787
788 if (tables_copy)
789 return tables_copy;
790
791 default_tables = NULL;
792 #if defined SUPPORT_PCRE8
793 regex = pcre_compile(null_str, 0, &errorptr, &erroroffset, NULL);
794 if (regex) {
795 pcre_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
796 pcre_free(regex);
797 }
798 #elif defined SUPPORT_PCRE16
799 regex = pcre16_compile(null_str, 0, &errorptr, &erroroffset, NULL);
800 if (regex) {
801 pcre16_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
802 pcre16_free(regex);
803 }
804 #elif defined SUPPORT_PCRE32
805 regex = pcre32_compile(null_str, 0, &errorptr, &erroroffset, NULL);
806 if (regex) {
807 pcre32_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
808 pcre32_free(regex);
809 }
810 #endif
811 /* Shouldn't ever happen. */
812 if (!default_tables)
813 return NULL;
814
815 /* Unfortunately this value cannot get from pcre_fullinfo.
816 Since this is a test program, this is acceptable at the moment. */
817 tables_copy = (unsigned char *)malloc(1088);
818 if (!tables_copy)
819 return NULL;
820
821 memcpy(tables_copy, default_tables, 1088);
822 return tables_copy;
823 }
824
825 #ifdef SUPPORT_PCRE8
826 static pcre_jit_stack* callback8(void *arg)
827 {
828 return (pcre_jit_stack *)arg;
829 }
830 #endif
831
832 #ifdef SUPPORT_PCRE16
833 static pcre16_jit_stack* callback16(void *arg)
834 {
835 return (pcre16_jit_stack *)arg;
836 }
837 #endif
838
839 #ifdef SUPPORT_PCRE32
840 static pcre32_jit_stack* callback32(void *arg)
841 {
842 return (pcre32_jit_stack *)arg;
843 }
844 #endif
845
846 #ifdef SUPPORT_PCRE8
847 static pcre_jit_stack *stack8;
848
849 static pcre_jit_stack *getstack8(void)
850 {
851 if (!stack8)
852 stack8 = pcre_jit_stack_alloc(1, 1024 * 1024);
853 return stack8;
854 }
855
856 static void setstack8(pcre_extra *extra)
857 {
858 if (!extra) {
859 if (stack8)
860 pcre_jit_stack_free(stack8);
861 stack8 = NULL;
862 return;
863 }
864
865 pcre_assign_jit_stack(extra, callback8, getstack8());
866 }
867 #endif /* SUPPORT_PCRE8 */
868
869 #ifdef SUPPORT_PCRE16
870 static pcre16_jit_stack *stack16;
871
872 static pcre16_jit_stack *getstack16(void)
873 {
874 if (!stack16)
875 stack16 = pcre16_jit_stack_alloc(1, 1024 * 1024);
876 return stack16;
877 }
878
879 static void setstack16(pcre16_extra *extra)
880 {
881 if (!extra) {
882 if (stack16)
883 pcre16_jit_stack_free(stack16);
884 stack16 = NULL;
885 return;
886 }
887
888 pcre16_assign_jit_stack(extra, callback16, getstack16());
889 }
890 #endif /* SUPPORT_PCRE8 */
891
892 #ifdef SUPPORT_PCRE32
893 static pcre32_jit_stack *stack32;
894
895 static pcre32_jit_stack *getstack32(void)
896 {
897 if (!stack32)
898 stack32 = pcre32_jit_stack_alloc(1, 1024 * 1024);
899 return stack32;
900 }
901
902 static void setstack32(pcre32_extra *extra)
903 {
904 if (!extra) {
905 if (stack32)
906 pcre32_jit_stack_free(stack32);
907 stack32 = NULL;
908 return;
909 }
910
911 pcre32_assign_jit_stack(extra, callback32, getstack32());
912 }
913 #endif /* SUPPORT_PCRE8 */
914
915 #ifdef SUPPORT_PCRE16
916
917 static int convert_utf8_to_utf16(const char *input, PCRE_UCHAR16 *output, int *offsetmap, int max_length)
918 {
919 unsigned char *iptr = (unsigned char*)input;
920 PCRE_UCHAR16 *optr = output;
921 unsigned int c;
922
923 if (max_length == 0)
924 return 0;
925
926 while (*iptr && max_length > 1) {
927 c = 0;
928 if (offsetmap)
929 *offsetmap++ = (int)(iptr - (unsigned char*)input);
930
931 if (!(*iptr & 0x80))
932 c = *iptr++;
933 else if (!(*iptr & 0x20)) {
934 c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
935 iptr += 2;
936 } else if (!(*iptr & 0x10)) {
937 c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
938 iptr += 3;
939 } else if (!(*iptr & 0x08)) {
940 c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
941 iptr += 4;
942 }
943
944 if (c < 65536) {
945 *optr++ = c;
946 max_length--;
947 } else if (max_length <= 2) {
948 *optr = '\0';
949 return (int)(optr - output);
950 } else {
951 c -= 0x10000;
952 *optr++ = 0xd800 | ((c >> 10) & 0x3ff);
953 *optr++ = 0xdc00 | (c & 0x3ff);
954 max_length -= 2;
955 if (offsetmap)
956 offsetmap++;
957 }
958 }
959 if (offsetmap)
960 *offsetmap = (int)(iptr - (unsigned char*)input);
961 *optr = '\0';
962 return (int)(optr - output);
963 }
964
965 static int copy_char8_to_char16(const char *input, PCRE_UCHAR16 *output, int max_length)
966 {
967 unsigned char *iptr = (unsigned char*)input;
968 PCRE_UCHAR16 *optr = output;
969
970 if (max_length == 0)
971 return 0;
972
973 while (*iptr && max_length > 1) {
974 *optr++ = *iptr++;
975 max_length--;
976 }
977 *optr = '\0';
978 return (int)(optr - output);
979 }
980
981 #define REGTEST_MAX_LENGTH16 4096
982 static PCRE_UCHAR16 regtest_buf16[REGTEST_MAX_LENGTH16];
983 static int regtest_offsetmap16[REGTEST_MAX_LENGTH16];
984
985 #endif /* SUPPORT_PCRE16 */
986
987 #ifdef SUPPORT_PCRE32
988
989 static int convert_utf8_to_utf32(const char *input, PCRE_UCHAR32 *output, int *offsetmap, int max_length)
990 {
991 unsigned char *iptr = (unsigned char*)input;
992 PCRE_UCHAR32 *optr = output;
993 unsigned int c;
994
995 if (max_length == 0)
996 return 0;
997
998 while (*iptr && max_length > 1) {
999 c = 0;
1000 if (offsetmap)
1001 *offsetmap++ = (int)(iptr - (unsigned char*)input);
1002
1003 if (!(*iptr & 0x80))
1004 c = *iptr++;
1005 else if (!(*iptr & 0x20)) {
1006 c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
1007 iptr += 2;
1008 } else if (!(*iptr & 0x10)) {
1009 c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
1010 iptr += 3;
1011 } else if (!(*iptr & 0x08)) {
1012 c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
1013 iptr += 4;
1014 }
1015
1016 *optr++ = c;
1017 max_length--;
1018 }
1019 if (offsetmap)
1020 *offsetmap = (int)(iptr - (unsigned char*)input);
1021 *optr = 0;
1022 return (int)(optr - output);
1023 }
1024
1025 static int copy_char8_to_char32(const char *input, PCRE_UCHAR32 *output, int max_length)
1026 {
1027 unsigned char *iptr = (unsigned char*)input;
1028 PCRE_UCHAR32 *optr = output;
1029
1030 if (max_length == 0)
1031 return 0;
1032
1033 while (*iptr && max_length > 1) {
1034 *optr++ = *iptr++;
1035 max_length--;
1036 }
1037 *optr = '\0';
1038 return (int)(optr - output);
1039 }
1040
1041 #define REGTEST_MAX_LENGTH32 4096
1042 static PCRE_UCHAR32 regtest_buf32[REGTEST_MAX_LENGTH32];
1043 static int regtest_offsetmap32[REGTEST_MAX_LENGTH32];
1044
1045 #endif /* SUPPORT_PCRE32 */
1046
1047 static int check_ascii(const char *input)
1048 {
1049 const unsigned char *ptr = (unsigned char *)input;
1050 while (*ptr) {
1051 if (*ptr > 127)
1052 return 0;
1053 ptr++;
1054 }
1055 return 1;
1056 }
1057
1058 static int regression_tests(void)
1059 {
1060 struct regression_test_case *current = regression_test_cases;
1061 const char *error;
1062 char *cpu_info;
1063 int i, err_offs;
1064 int is_successful, is_ascii_pattern, is_ascii_input;
1065 int total = 0;
1066 int successful = 0;
1067 int successful_row = 0;
1068 int counter = 0;
1069 int study_mode;
1070 int utf = 0, ucp = 0;
1071 int disabled_flags = 0;
1072 #ifdef SUPPORT_PCRE8
1073 pcre *re8;
1074 pcre_extra *extra8;
1075 pcre_extra dummy_extra8;
1076 int ovector8_1[32];
1077 int ovector8_2[32];
1078 int return_value8[2];
1079 unsigned char *mark8_1, *mark8_2;
1080 #endif
1081 #ifdef SUPPORT_PCRE16
1082 pcre16 *re16;
1083 pcre16_extra *extra16;
1084 pcre16_extra dummy_extra16;
1085 int ovector16_1[32];
1086 int ovector16_2[32];
1087 int return_value16[2];
1088 PCRE_UCHAR16 *mark16_1, *mark16_2;
1089 int length16;
1090 #endif
1091 #ifdef SUPPORT_PCRE32
1092 pcre32 *re32;
1093 pcre32_extra *extra32;
1094 pcre32_extra dummy_extra32;
1095 int ovector32_1[32];
1096 int ovector32_2[32];
1097 int return_value32[2];
1098 PCRE_UCHAR32 *mark32_1, *mark32_2;
1099 int length32;
1100 #endif
1101
1102 /* This test compares the behaviour of interpreter and JIT. Although disabling
1103 utf or ucp may make tests fail, if the pcre_exec result is the SAME, it is
1104 still considered successful from pcre_jit_test point of view. */
1105
1106 #if defined SUPPORT_PCRE8
1107 pcre_config(PCRE_CONFIG_JITTARGET, &cpu_info);
1108 #elif defined SUPPORT_PCRE16
1109 pcre16_config(PCRE_CONFIG_JITTARGET, &cpu_info);
1110 #elif defined SUPPORT_PCRE32
1111 pcre32_config(PCRE_CONFIG_JITTARGET, &cpu_info);
1112 #endif
1113
1114 printf("Running JIT regression tests\n");
1115 printf(" target CPU of SLJIT compiler: %s\n", cpu_info);
1116
1117 #if defined SUPPORT_PCRE8
1118 pcre_config(PCRE_CONFIG_UTF8, &utf);
1119 pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
1120 #elif defined SUPPORT_PCRE16
1121 pcre16_config(PCRE_CONFIG_UTF16, &utf);
1122 pcre16_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
1123 #elif defined SUPPORT_PCRE16
1124 pcre32_config(PCRE_CONFIG_UTF32, &utf);
1125 pcre32_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
1126 #endif
1127
1128 if (!utf)
1129 disabled_flags |= PCRE_UTF8 | PCRE_UTF16 | PCRE_UTF32;
1130 if (!ucp)
1131 disabled_flags |= PCRE_UCP;
1132 #ifdef SUPPORT_PCRE8
1133 printf(" in 8 bit mode with UTF-8 %s and ucp %s:\n", utf ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
1134 #endif
1135 #ifdef SUPPORT_PCRE16
1136 printf(" in 16 bit mode with UTF-16 %s and ucp %s:\n", utf ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
1137 #endif
1138 #ifdef SUPPORT_PCRE32
1139 printf(" in 32 bit mode with UTF-32 %s and ucp %s:\n", utf ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
1140 #endif
1141
1142 while (current->pattern) {
1143 /* printf("\nPattern: %s :\n", current->pattern); */
1144 total++;
1145 if (current->start_offset & F_PROPERTY) {
1146 is_ascii_pattern = 0;
1147 is_ascii_input = 0;
1148 } else {
1149 is_ascii_pattern = check_ascii(current->pattern);
1150 is_ascii_input = check_ascii(current->input);
1151 }
1152
1153 if (current->flags & PCRE_PARTIAL_SOFT)
1154 study_mode = PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE;
1155 else if (current->flags & PCRE_PARTIAL_HARD)
1156 study_mode = PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE;
1157 else
1158 study_mode = PCRE_STUDY_JIT_COMPILE;
1159 error = NULL;
1160 #ifdef SUPPORT_PCRE8
1161 re8 = NULL;
1162 if (!(current->start_offset & F_NO8))
1163 re8 = pcre_compile(current->pattern,
1164 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags),
1165 &error, &err_offs, tables(0));
1166
1167 extra8 = NULL;
1168 if (re8) {
1169 error = NULL;
1170 extra8 = pcre_study(re8, study_mode, &error);
1171 if (!extra8) {
1172 printf("\n8 bit: Cannot study pattern: %s\n", current->pattern);
1173 pcre_free(re8);
1174 re8 = NULL;
1175 }
1176 else if (!(extra8->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
1177 printf("\n8 bit: JIT compiler does not support: %s\n", current->pattern);
1178 pcre_free_study(extra8);
1179 pcre_free(re8);
1180 re8 = NULL;
1181 }
1182 extra8->flags |= PCRE_EXTRA_MARK;
1183 } else if (((utf && ucp) || is_ascii_pattern) && !(current->start_offset & F_NO8))
1184 printf("\n8 bit: Cannot compile pattern \"%s\": %s\n", current->pattern, error);
1185 #endif
1186 #ifdef SUPPORT_PCRE16
1187 if ((current->flags & PCRE_UTF16) || (current->start_offset & F_FORCECONV))
1188 convert_utf8_to_utf16(current->pattern, regtest_buf16, NULL, REGTEST_MAX_LENGTH16);
1189 else
1190 copy_char8_to_char16(current->pattern, regtest_buf16, REGTEST_MAX_LENGTH16);
1191
1192 re16 = NULL;
1193 if (!(current->start_offset & F_NO16))
1194 re16 = pcre16_compile(regtest_buf16,
1195 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags),
1196 &error, &err_offs, tables(0));
1197
1198 extra16 = NULL;
1199 if (re16) {
1200 error = NULL;
1201 extra16 = pcre16_study(re16, study_mode, &error);
1202 if (!extra16) {
1203 printf("\n16 bit: Cannot study pattern: %s\n", current->pattern);
1204 pcre16_free(re16);
1205 re16 = NULL;
1206 }
1207 else if (!(extra16->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
1208 printf("\n16 bit: JIT compiler does not support: %s\n", current->pattern);
1209 pcre16_free_study(extra16);
1210 pcre16_free(re16);
1211 re16 = NULL;
1212 }
1213 extra16->flags |= PCRE_EXTRA_MARK;
1214 } else if (((utf && ucp) || is_ascii_pattern) && !(current->start_offset & F_NO16))
1215 printf("\n16 bit: Cannot compile pattern \"%s\": %s\n", current->pattern, error);
1216 #endif
1217 #ifdef SUPPORT_PCRE32
1218 if ((current->flags & PCRE_UTF32) || (current->start_offset & F_FORCECONV))
1219 convert_utf8_to_utf32(current->pattern, regtest_buf32, NULL, REGTEST_MAX_LENGTH32);
1220 else
1221 copy_char8_to_char32(current->pattern, regtest_buf32, REGTEST_MAX_LENGTH32);
1222
1223 re32 = NULL;
1224 if (!(current->start_offset & F_NO32))
1225 re32 = pcre32_compile(regtest_buf32,
1226 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags),
1227 &error, &err_offs, tables(0));
1228
1229 extra32 = NULL;
1230 if (re32) {
1231 error = NULL;
1232 extra32 = pcre32_study(re32, study_mode, &error);
1233 if (!extra32) {
1234 printf("\n32 bit: Cannot study pattern: %s\n", current->pattern);
1235 pcre32_free(re32);
1236 re32 = NULL;
1237 }
1238 if (!(extra32->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
1239 printf("\n32 bit: JIT compiler does not support: %s\n", current->pattern);
1240 pcre32_free_study(extra32);
1241 pcre32_free(re32);
1242 re32 = NULL;
1243 }
1244 extra32->flags |= PCRE_EXTRA_MARK;
1245 } else if (((utf && ucp) || is_ascii_pattern) && !(current->start_offset & F_NO32))
1246 printf("\n32 bit: Cannot compile pattern \"%s\": %s\n", current->pattern, error);
1247 #endif
1248
1249 counter++;
1250 if ((counter & 0x3) != 0) {
1251 #ifdef SUPPORT_PCRE8
1252 setstack8(NULL);
1253 #endif
1254 #ifdef SUPPORT_PCRE16
1255 setstack16(NULL);
1256 #endif
1257 #ifdef SUPPORT_PCRE32
1258 setstack32(NULL);
1259 #endif
1260 }
1261
1262 #ifdef SUPPORT_PCRE8
1263 return_value8[0] = -1000;
1264 return_value8[1] = -1000;
1265 for (i = 0; i < 32; ++i)
1266 ovector8_1[i] = -2;
1267 for (i = 0; i < 32; ++i)
1268 ovector8_2[i] = -2;
1269 if (re8) {
1270 mark8_1 = NULL;
1271 mark8_2 = NULL;
1272 extra8->mark = &mark8_1;
1273
1274 if ((counter & 0x1) != 0) {
1275 setstack8(extra8);
1276 return_value8[0] = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1277 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_1, 32);
1278 } else
1279 return_value8[0] = pcre_jit_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1280 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_1, 32, getstack8());
1281 memset(&dummy_extra8, 0, sizeof(pcre_extra));
1282 dummy_extra8.flags = PCRE_EXTRA_MARK;
1283 if (current->start_offset & F_STUDY) {
1284 dummy_extra8.flags |= PCRE_EXTRA_STUDY_DATA;
1285 dummy_extra8.study_data = extra8->study_data;
1286 }
1287 dummy_extra8.mark = &mark8_2;
1288 return_value8[1] = pcre_exec(re8, &dummy_extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1289 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_2, 32);
1290 }
1291 #endif
1292
1293 #ifdef SUPPORT_PCRE16
1294 return_value16[0] = -1000;
1295 return_value16[1] = -1000;
1296 for (i = 0; i < 32; ++i)
1297 ovector16_1[i] = -2;
1298 for (i = 0; i < 32; ++i)
1299 ovector16_2[i] = -2;
1300 if (re16) {
1301 mark16_1 = NULL;
1302 mark16_2 = NULL;
1303 if ((current->flags & PCRE_UTF16) || (current->start_offset & F_FORCECONV))
1304 length16 = convert_utf8_to_utf16(current->input, regtest_buf16, regtest_offsetmap16, REGTEST_MAX_LENGTH16);
1305 else
1306 length16 = copy_char8_to_char16(current->input, regtest_buf16, REGTEST_MAX_LENGTH16);
1307 extra16->mark = &mark16_1;
1308 if ((counter & 0x1) != 0) {
1309 setstack16(extra16);
1310 return_value16[0] = pcre16_exec(re16, extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
1311 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_1, 32);
1312 } else
1313 return_value16[0] = pcre16_jit_exec(re16, extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
1314 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_1, 32, getstack16());
1315 memset(&dummy_extra16, 0, sizeof(pcre16_extra));
1316 dummy_extra16.flags = PCRE_EXTRA_MARK;
1317 if (current->start_offset & F_STUDY) {
1318 dummy_extra16.flags |= PCRE_EXTRA_STUDY_DATA;
1319 dummy_extra16.study_data = extra16->study_data;
1320 }
1321 dummy_extra16.mark = &mark16_2;
1322 return_value16[1] = pcre16_exec(re16, &dummy_extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
1323 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_2, 32);
1324 }
1325 #endif
1326
1327 #ifdef SUPPORT_PCRE32
1328 return_value32[0] = -1000;
1329 return_value32[1] = -1000;
1330 for (i = 0; i < 32; ++i)
1331 ovector32_1[i] = -2;
1332 for (i = 0; i < 32; ++i)
1333 ovector32_2[i] = -2;
1334 if (re32) {
1335 mark32_1 = NULL;
1336 mark32_2 = NULL;
1337 if ((current->flags & PCRE_UTF32) || (current->start_offset & F_FORCECONV))
1338 length32 = convert_utf8_to_utf32(current->input, regtest_buf32, regtest_offsetmap32, REGTEST_MAX_LENGTH32);
1339 else
1340 length32 = copy_char8_to_char32(current->input, regtest_buf32, REGTEST_MAX_LENGTH32);
1341 extra32->mark = &mark32_1;
1342 if ((counter & 0x1) != 0) {
1343 setstack32(extra32);
1344 return_value32[0] = pcre32_exec(re32, extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
1345 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector32_1, 32);
1346 } else
1347 return_value32[0] = pcre32_jit_exec(re32, extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
1348 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector32_1, 32, getstack32());
1349 memset(&dummy_extra32, 0, sizeof(pcre32_extra));
1350 dummy_extra32.flags = PCRE_EXTRA_MARK;
1351 if (current->start_offset & F_STUDY) {
1352 dummy_extra32.flags |= PCRE_EXTRA_STUDY_DATA;
1353 dummy_extra32.study_data = extra32->study_data;
1354 }
1355 dummy_extra32.mark = &mark32_2;
1356 return_value32[1] = pcre32_exec(re32, &dummy_extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
1357 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector32_2, 32);
1358 }
1359 #endif
1360
1361 /* printf("[%d-%d-%d|%d-%d|%d-%d|%d-%d]%s",
1362 return_value8[0], return_value16[0],
1363 ovector8_1[0], ovector8_1[1],
1364 ovector16_1[0], ovector16_1[1],
1365 ovector32_1[0], ovector32_1[1],
1366 (current->flags & PCRE_CASELESS) ? "C" : ""); */
1367
1368 /* If F_DIFF is set, just run the test, but do not compare the results.
1369 Segfaults can still be captured. */
1370
1371 is_successful = 1;
1372 if (!(current->start_offset & F_DIFF)) {
1373 #if defined SUPPORT_UTF && ((defined(SUPPORT_PCRE8) + defined(SUPPORT_PCRE16) + defined(SUPPORT_PCRE32)) >= 2)
1374 if (!(current->start_offset & F_FORCECONV)) {
1375 int return_value;
1376
1377 /* All results must be the same. */
1378 #ifdef SUPPORT_PCRE8
1379 if ((return_value = return_value8[0]) != return_value8[1]) {
1380 printf("\n8 bit: Return value differs(J8:%d,I8:%d): [%d] '%s' @ '%s'\n",
1381 return_value8[0], return_value8[1], total, current->pattern, current->input);
1382 is_successful = 0;
1383 } else
1384 #endif
1385 #ifdef SUPPORT_PCRE16
1386 if ((return_value = return_value16[0]) != return_value16[1]) {
1387 printf("\n16 bit: Return value differs(J16:%d,I16:%d): [%d] '%s' @ '%s'\n",
1388 return_value16[0], return_value16[1], total, current->pattern, current->input);
1389 is_successful = 0;
1390 } else
1391 #endif
1392 #ifdef SUPPORT_PCRE32
1393 if ((return_value = return_value32[0]) != return_value32[1]) {
1394 printf("\n32 bit: Return value differs(J32:%d,I32:%d): [%d] '%s' @ '%s'\n",
1395 return_value32[0], return_value32[1], total, current->pattern, current->input);
1396 is_successful = 0;
1397 } else
1398 #endif
1399 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1400 if (return_value8[0] != return_value16[0]) {
1401 printf("\n8 and 16 bit: Return value differs(J8:%d,J16:%d): [%d] '%s' @ '%s'\n",
1402 return_value8[0], return_value16[0],
1403 total, current->pattern, current->input);
1404 is_successful = 0;
1405 } else
1406 #endif
1407 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE32
1408 if (return_value8[0] != return_value32[0]) {
1409 printf("\n8 and 32 bit: Return value differs(J8:%d,J32:%d): [%d] '%s' @ '%s'\n",
1410 return_value8[0], return_value32[0],
1411 total, current->pattern, current->input);
1412 is_successful = 0;
1413 } else
1414 #endif
1415 #if defined SUPPORT_PCRE16 && defined SUPPORT_PCRE32
1416 if (return_value16[0] != return_value32[0]) {
1417 printf("\n16 and 32 bit: Return value differs(J16:%d,J32:%d): [%d] '%s' @ '%s'\n",
1418 return_value16[0], return_value32[0],
1419 total, current->pattern, current->input);
1420 is_successful = 0;
1421 } else
1422 #endif
1423 if (return_value >= 0 || return_value == PCRE_ERROR_PARTIAL) {
1424 if (return_value == PCRE_ERROR_PARTIAL) {
1425 return_value = 2;
1426 } else {
1427 return_value *= 2;
1428 }
1429 #ifdef SUPPORT_PCRE8
1430 return_value8[0] = return_value;
1431 #endif
1432 #ifdef SUPPORT_PCRE16
1433 return_value16[0] = return_value;
1434 #endif
1435 #ifdef SUPPORT_PCRE32
1436 return_value32[0] = return_value;
1437 #endif
1438 /* Transform back the results. */
1439 if (current->flags & PCRE_UTF8) {
1440 #ifdef SUPPORT_PCRE16
1441 for (i = 0; i < return_value; ++i) {
1442 if (ovector16_1[i] >= 0)
1443 ovector16_1[i] = regtest_offsetmap16[ovector16_1[i]];
1444 if (ovector16_2[i] >= 0)
1445 ovector16_2[i] = regtest_offsetmap16[ovector16_2[i]];
1446 }
1447 #endif
1448 #ifdef SUPPORT_PCRE32
1449 for (i = 0; i < return_value; ++i) {
1450 if (ovector32_1[i] >= 0)
1451 ovector32_1[i] = regtest_offsetmap32[ovector32_1[i]];
1452 if (ovector32_2[i] >= 0)
1453 ovector32_2[i] = regtest_offsetmap32[ovector32_2[i]];
1454 }
1455 #endif
1456 }
1457
1458 for (i = 0; i < return_value; ++i) {
1459 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1460 if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
1461 printf("\n8 and 16 bit: Ovector[%d] value differs(J8:%d,I8:%d,J16:%d,I16:%d): [%d] '%s' @ '%s' \n",
1462 i, ovector8_1[i], ovector8_2[i], ovector16_1[i], ovector16_2[i],
1463 total, current->pattern, current->input);
1464 is_successful = 0;
1465 }
1466 #endif
1467 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE32
1468 if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector32_1[i] || ovector8_1[i] != ovector32_2[i]) {
1469 printf("\n8 and 32 bit: Ovector[%d] value differs(J8:%d,I8:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
1470 i, ovector8_1[i], ovector8_2[i], ovector32_1[i], ovector32_2[i],
1471 total, current->pattern, current->input);
1472 is_successful = 0;
1473 }
1474 #endif
1475 #if defined SUPPORT_PCRE16 && defined SUPPORT_PCRE16
1476 if (ovector16_1[i] != ovector16_2[i] || ovector16_1[i] != ovector16_1[i] || ovector16_1[i] != ovector16_2[i]) {
1477 printf("\n16 and 16 bit: Ovector[%d] value differs(J16:%d,I16:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
1478 i, ovector16_1[i], ovector16_2[i], ovector16_1[i], ovector16_2[i],
1479 total, current->pattern, current->input);
1480 is_successful = 0;
1481 }
1482 #endif
1483 }
1484 }
1485 } else
1486 #endif /* more than one of SUPPORT_PCRE8, SUPPORT_PCRE16 and SUPPORT_PCRE32 */
1487 {
1488 /* Only the 8 bit and 16 bit results must be equal. */
1489 #ifdef SUPPORT_PCRE8
1490 if (return_value8[0] != return_value8[1]) {
1491 printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1492 return_value8[0], return_value8[1], total, current->pattern, current->input);
1493 is_successful = 0;
1494 } else if (return_value8[0] >= 0 || return_value8[0] == PCRE_ERROR_PARTIAL) {
1495 if (return_value8[0] == PCRE_ERROR_PARTIAL)
1496 return_value8[0] = 2;
1497 else
1498 return_value8[0] *= 2;
1499
1500 for (i = 0; i < return_value8[0]; ++i)
1501 if (ovector8_1[i] != ovector8_2[i]) {
1502 printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1503 i, ovector8_1[i], ovector8_2[i], total, current->pattern, current->input);
1504 is_successful = 0;
1505 }
1506 }
1507 #endif
1508
1509 #ifdef SUPPORT_PCRE16
1510 if (return_value16[0] != return_value16[1]) {
1511 printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1512 return_value16[0], return_value16[1], total, current->pattern, current->input);
1513 is_successful = 0;
1514 } else if (return_value16[0] >= 0 || return_value16[0] == PCRE_ERROR_PARTIAL) {
1515 if (return_value16[0] == PCRE_ERROR_PARTIAL)
1516 return_value16[0] = 2;
1517 else
1518 return_value16[0] *= 2;
1519
1520 for (i = 0; i < return_value16[0]; ++i)
1521 if (ovector16_1[i] != ovector16_2[i]) {
1522 printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1523 i, ovector16_1[i], ovector16_2[i], total, current->pattern, current->input);
1524 is_successful = 0;
1525 }
1526 }
1527 #endif
1528
1529 #ifdef SUPPORT_PCRE32
1530 if (return_value32[0] != return_value32[1]) {
1531 printf("\n32 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1532 return_value32[0], return_value32[1], total, current->pattern, current->input);
1533 is_successful = 0;
1534 } else if (return_value32[0] >= 0 || return_value32[0] == PCRE_ERROR_PARTIAL) {
1535 if (return_value32[0] == PCRE_ERROR_PARTIAL)
1536 return_value32[0] = 2;
1537 else
1538 return_value32[0] *= 2;
1539
1540 for (i = 0; i < return_value32[0]; ++i)
1541 if (ovector32_1[i] != ovector32_2[i]) {
1542 printf("\n32 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1543 i, ovector32_1[i], ovector32_2[i], total, current->pattern, current->input);
1544 is_successful = 0;
1545 }
1546 }
1547 #endif
1548 }
1549 }
1550
1551 if (is_successful) {
1552 #ifdef SUPPORT_PCRE8
1553 if (!(current->start_offset & F_NO8) && ((utf && ucp) || is_ascii_input)) {
1554 if (return_value8[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1555 printf("8 bit: Test should match: [%d] '%s' @ '%s'\n",
1556 total, current->pattern, current->input);
1557 is_successful = 0;
1558 }
1559
1560 if (return_value8[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1561 printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n",
1562 total, current->pattern, current->input);
1563 is_successful = 0;
1564 }
1565 }
1566 #endif
1567 #ifdef SUPPORT_PCRE16
1568 if (!(current->start_offset & F_NO16) && ((utf && ucp) || is_ascii_input)) {
1569 if (return_value16[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1570 printf("16 bit: Test should match: [%d] '%s' @ '%s'\n",
1571 total, current->pattern, current->input);
1572 is_successful = 0;
1573 }
1574
1575 if (return_value16[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1576 printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n",
1577 total, current->pattern, current->input);
1578 is_successful = 0;
1579 }
1580 }
1581 #endif
1582 #ifdef SUPPORT_PCRE32
1583 if (!(current->start_offset & F_NO32) && ((utf && ucp) || is_ascii_input)) {
1584 if (return_value32[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1585 printf("32 bit: Test should match: [%d] '%s' @ '%s'\n",
1586 total, current->pattern, current->input);
1587 is_successful = 0;
1588 }
1589
1590 if (return_value32[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1591 printf("32 bit: Test should not match: [%d] '%s' @ '%s'\n",
1592 total, current->pattern, current->input);
1593 is_successful = 0;
1594 }
1595 }
1596 #endif
1597 }
1598
1599 if (is_successful) {
1600 #ifdef SUPPORT_PCRE8
1601 if (mark8_1 != mark8_2) {
1602 printf("8 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1603 total, current->pattern, current->input);
1604 is_successful = 0;
1605 }
1606 #endif
1607 #ifdef SUPPORT_PCRE16
1608 if (mark16_1 != mark16_2) {
1609 printf("16 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1610 total, current->pattern, current->input);
1611 is_successful = 0;
1612 }
1613 #endif
1614 #ifdef SUPPORT_PCRE32
1615 if (mark32_1 != mark32_2) {
1616 printf("32 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1617 total, current->pattern, current->input);
1618 is_successful = 0;
1619 }
1620 #endif
1621 }
1622
1623 #ifdef SUPPORT_PCRE8
1624 if (re8) {
1625 pcre_free_study(extra8);
1626 pcre_free(re8);
1627 }
1628 #endif
1629 #ifdef SUPPORT_PCRE16
1630 if (re16) {
1631 pcre16_free_study(extra16);
1632 pcre16_free(re16);
1633 }
1634 #endif
1635 #ifdef SUPPORT_PCRE32
1636 if (re32) {
1637 pcre32_free_study(extra32);
1638 pcre32_free(re32);
1639 }
1640 #endif
1641
1642 if (is_successful) {
1643 successful++;
1644 successful_row++;
1645 printf(".");
1646 if (successful_row >= 60) {
1647 successful_row = 0;
1648 printf("\n");
1649 }
1650 } else
1651 successful_row = 0;
1652
1653 fflush(stdout);
1654 current++;
1655 }
1656 tables(1);
1657 #ifdef SUPPORT_PCRE8
1658 setstack8(NULL);
1659 #endif
1660 #ifdef SUPPORT_PCRE16
1661 setstack16(NULL);
1662 #endif
1663 #ifdef SUPPORT_PCRE32
1664 setstack32(NULL);
1665 #endif
1666
1667 if (total == successful) {
1668 printf("\nAll JIT regression tests are successfully passed.\n");
1669 return 0;
1670 } else {
1671 printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
1672 return 1;
1673 }
1674 }
1675
1676 /* End of pcre_jit_test.c */

  ViewVC Help
Powered by ViewVC 1.1.5