/[pcre]/code/trunk/pcre_jit_test.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_test.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1306 - (show annotations)
Mon Apr 1 17:04:17 2013 UTC (6 years, 5 months ago) by zherczeg
File MIME type: text/plain
File size: 68632 byte(s)
Error occurred while calculating annotation data.
Auto-detect and optimize limited repetitions in JIT.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Main Library written by Philip Hazel
9 Copyright (c) 1997-2012 University of Cambridge
10
11 This JIT compiler regression test program was written by Zoltan Herczeg
12 Copyright (c) 2010-2012
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include <stdio.h>
48 #include <string.h>
49 #include "pcre.h"
50
51
52 #include "pcre_internal.h"
53
54 #define PCRE_BUG 0x80000000
55
56 /*
57 Letter characters:
58 \xe6\x92\xad = 0x64ad = 25773 (kanji)
59 Non-letter characters:
60 \xc2\xa1 = 0xa1 = (Inverted Exclamation Mark)
61 \xf3\xa9\xb7\x80 = 0xe9dc0 = 957888
62 \xed\xa0\x80 = 55296 = 0xd800 (Invalid UTF character)
63 \xed\xb0\x80 = 56320 = 0xdc00 (Invalid UTF character)
64 Newlines:
65 \xc2\x85 = 0x85 = 133 (NExt Line = NEL)
66 \xe2\x80\xa8 = 0x2028 = 8232 (Line Separator)
67 Othercase pairs:
68 \xc3\xa9 = 0xe9 = 233 (e')
69 \xc3\x89 = 0xc9 = 201 (E')
70 \xc3\xa1 = 0xe1 = 225 (a')
71 \xc3\x81 = 0xc1 = 193 (A')
72 \xc8\xba = 0x23a = 570
73 \xe2\xb1\xa5 = 0x2c65 = 11365
74 \xe1\xbd\xb8 = 0x1f78 = 8056
75 \xe1\xbf\xb8 = 0x1ff8 = 8184
76 \xf0\x90\x90\x80 = 0x10400 = 66560
77 \xf0\x90\x90\xa8 = 0x10428 = 66600
78 Mark property:
79 \xcc\x8d = 0x30d = 781
80 Special:
81 \xdf\xbf = 0x7ff = 2047 (highest 2 byte character)
82 \xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character)
83 \xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character)
84 \xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character)
85 \xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character)
86 */
87
88 static int regression_tests(void);
89
90 int main(void)
91 {
92 int jit = 0;
93 #if defined SUPPORT_PCRE8
94 pcre_config(PCRE_CONFIG_JIT, &jit);
95 #elif defined SUPPORT_PCRE16
96 pcre16_config(PCRE_CONFIG_JIT, &jit);
97 #elif defined SUPPORT_PCRE32
98 pcre32_config(PCRE_CONFIG_JIT, &jit);
99 #endif
100 if (!jit) {
101 printf("JIT must be enabled to run pcre_jit_test\n");
102 return 1;
103 }
104 return regression_tests();
105 }
106
107 /* --------------------------------------------------------------------------------------- */
108
109 #if !(defined SUPPORT_PCRE8) && !(defined SUPPORT_PCRE16) && !(defined SUPPORT_PCRE32)
110 #error SUPPORT_PCRE8 or SUPPORT_PCRE16 or SUPPORT_PCRE32 must be defined
111 #endif
112
113 #define MUA (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
114 #define MUAP (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
115 #define CMUA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
116 #define CMUAP (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
117 #define MA (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
118 #define MAP (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
119 #define CMA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
120
121 #define OFFSET_MASK 0x00ffff
122 #define F_NO8 0x010000
123 #define F_NO16 0x020000
124 #define F_NO32 0x020000
125 #define F_NOMATCH 0x040000
126 #define F_DIFF 0x080000
127 #define F_FORCECONV 0x100000
128 #define F_PROPERTY 0x200000
129 #define F_STUDY 0x400000
130
131 struct regression_test_case {
132 int flags;
133 int start_offset;
134 const char *pattern;
135 const char *input;
136 };
137
138 static struct regression_test_case regression_test_cases[] = {
139 /* Constant strings. */
140 { MUA, 0, "AbC", "AbAbC" },
141 { MUA, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
142 { CMUA, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
143 { MA, 0, "[^a]", "aAbB" },
144 { CMA, 0, "[^m]", "mMnN" },
145 { MA, 0, "a[^b][^#]", "abacd" },
146 { CMA, 0, "A[^B][^E]", "abacd" },
147 { CMUA, 0, "[^x][^#]", "XxBll" },
148 { MUA, 0, "[^a]", "aaa\xc3\xa1#Ab" },
149 { CMUA, 0, "[^A]", "aA\xe6\x92\xad" },
150 { MUA, 0, "\\W(\\W)?\\w", "\r\n+bc" },
151 { MUA, 0, "\\W(\\W)?\\w", "\n\r+bc" },
152 { MUA, 0, "\\W(\\W)?\\w", "\r\r+bc" },
153 { MUA, 0, "\\W(\\W)?\\w", "\n\n+bc" },
154 { MUA, 0, "[axd]", "sAXd" },
155 { CMUA, 0, "[axd]", "sAXd" },
156 { CMUA, 0 | F_NOMATCH, "[^axd]", "DxA" },
157 { MUA, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
158 { MUA, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
159 { CMUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
160 { MUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
161 { MUA, 0, "[^a]", "\xc2\x80[]" },
162 { CMUA, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
163 { CMA, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
164 { PCRE_CASELESS, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
165 { PCRE_CASELESS, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
166 { PCRE_CASELESS, 0, "a1", "Aa1" },
167 { MA, 0, "\\Ca", "cda" },
168 { CMA, 0, "\\Ca", "CDA" },
169 { MA, 0 | F_NOMATCH, "\\Cx", "cda" },
170 { CMA, 0 | F_NOMATCH, "\\Cx", "CDA" },
171 { CMUAP, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
172 { CMUAP, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
173 { CMUAP, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
174 { CMUAP, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
175
176 /* Assertions. */
177 { MUA, 0, "\\b[^A]", "A_B#" },
178 { MA, 0 | F_NOMATCH, "\\b\\W", "\n*" },
179 { MUA, 0, "\\B[^,]\\b[^s]\\b", "#X" },
180 { MAP, 0, "\\B", "_\xa1" },
181 { MAP, 0, "\\b_\\b[,A]\\B", "_," },
182 { MUAP, 0, "\\b", "\xe6\x92\xad!" },
183 { MUAP, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
184 { MUAP, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
185 { MUAP, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
186 { MUA, 0 | F_NOMATCH, "\\b.", "\xcd\xbe" },
187 { CMUAP, 0, "\\By", "\xf0\x90\x90\xa8y" },
188 { MA, 0 | F_NOMATCH, "\\R^", "\n" },
189 { MA, 1 | F_NOMATCH, "^", "\n" },
190 { 0, 0, "^ab", "ab" },
191 { 0, 0 | F_NOMATCH, "^ab", "aab" },
192 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "^a", "\r\raa\n\naa\r\naa" },
193 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
194 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^-", "a--b--\x85--" },
195 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xe2\x80\xa8--" },
196 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xc2\x85--" },
197 { 0, 0, "ab$", "ab" },
198 { 0, 0 | F_NOMATCH, "ab$", "abab\n\n" },
199 { PCRE_DOLLAR_ENDONLY, 0 | F_NOMATCH, "ab$", "abab\r\n" },
200 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "a$", "\r\raa\n\naa\r\naa" },
201 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aaa" },
202 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "#$", "#\xc2\x85###\r#" },
203 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "#$", "#\xe2\x80\xa9" },
204 { PCRE_NOTBOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "^a", "aa\naa" },
205 { PCRE_NOTBOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },
206 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\naa" },
207 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\r\n" },
208 { PCRE_UTF8 | PCRE_DOLLAR_ENDONLY | PCRE_NEWLINE_ANY, 0 | F_PROPERTY, "\\p{Any}{2,}$", "aa\r\n" },
209 { PCRE_NOTEOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },
210 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa" },
211 { PCRE_NEWLINE_CR | PCRE_UTF8, 0, "a\\Z", "aaa\r" },
212 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa\n" },
213 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r" },
214 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
215 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r\n" },
216 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
217 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
218 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
219 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
220 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
221 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
222 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
223 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
224 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
225 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xc2\x85" },
226 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
227 { MA, 0, "\\Aa", "aaa" },
228 { MA, 1 | F_NOMATCH, "\\Aa", "aaa" },
229 { MA, 1, "\\Ga", "aaa" },
230 { MA, 1 | F_NOMATCH, "\\Ga", "aba" },
231 { MA, 0, "a\\z", "aaa" },
232 { MA, 0 | F_NOMATCH, "a\\z", "aab" },
233
234 /* Brackets. */
235 { MUA, 0, "(ab|bb|cd)", "bacde" },
236 { MUA, 0, "(?:ab|a)(bc|c)", "ababc" },
237 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
238 { CMUA, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
239 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
240 { MUA, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
241
242 /* Greedy and non-greedy ? operators. */
243 { MUA, 0, "(?:a)?a", "laab" },
244 { CMUA, 0, "(A)?A", "llaab" },
245 { MUA, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
246 { MUA, 0, "(a)?a", "manm" },
247 { CMUA, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
248 { MUA, 0, "(a|b)?\?d((?:e)?)", "abcde" },
249 { MUA, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
250
251 /* Greedy and non-greedy + operators */
252 { MUA, 0, "(aa)+aa", "aaaaaaa" },
253 { MUA, 0, "(aa)+?aa", "aaaaaaa" },
254 { MUA, 0, "(?:aba|ab|a)+l", "ababamababal" },
255 { MUA, 0, "(?:aba|ab|a)+?l", "ababamababal" },
256 { MUA, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
257 { MUA, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
258 { MUA, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
259
260 /* Greedy and non-greedy * operators */
261 { CMUA, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
262 { MUA, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
263 { MUA, 0, "(aa|ab)*ab", "aaabaaab" },
264 { CMUA, 0, "(aa|Ab)*?aB", "aaabaaab" },
265 { MUA, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
266 { MUA, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
267 { MA, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
268 { MA, 0, "((?:a|)*){0}a", "a" },
269
270 /* Combining ? + * operators */
271 { MUA, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
272 { MUA, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
273 { MUA, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
274 { MUA, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
275 { MUA, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
276
277 /* Single character iterators. */
278 { MUA, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
279 { MUA, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
280 { MUA, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
281 { MUA, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
282 { MUA, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
283 { MUA, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
284 { MUA, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
285 { MUA, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
286 { MUA, 0, "(ba{2})+c", "baabaaabacbaabaac" },
287 { MUA, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
288 { MUA, 0, "(a?+[^b])+", "babaacacb" },
289 { MUA, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
290 { CMUA, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
291 { CMUA, 0, "[c-f]+k", "DemmFke" },
292 { MUA, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
293 { MUA, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
294 { CMUA, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
295 { CMUA, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
296 { CMUA, 0, "[ace]{3,}", "AcbDAcEEcEd" },
297 { CMUA, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
298 { MUA, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
299 { CMUA, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
300 { MUA, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
301 { MUA, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
302 { MUA, 0, "\\b\\w+\\B", "x,a_cd" },
303 { MUAP, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
304 { CMUA, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
305 { CMUAP, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" },
306 { CMUA, 0, "[^\xf0\x90\x90\x80]{2,4}@", "\xf0\x90\x90\xa8\xf0\x90\x90\x80###\xf0\x90\x90\x80@@@" },
307 { CMUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
308 { MUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
309 { MUA, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" },
310
311 /* Bracket repeats with limit. */
312 { MUA, 0, "(?:(ab){2}){5}M", "abababababababababababM" },
313 { MUA, 0, "(?:ab|abab){1,5}M", "abababababababababababM" },
314 { MUA, 0, "(?>ab|abab){1,5}M", "abababababababababababM" },
315 { MUA, 0, "(?:ab|abab){1,5}?M", "abababababababababababM" },
316 { MUA, 0, "(?>ab|abab){1,5}?M", "abababababababababababM" },
317 { MUA, 0, "(?:(ab){1,4}?){1,3}?M", "abababababababababababababM" },
318 { MUA, 0, "(?:(ab){1,4}){1,3}abababababababababababM", "ababababababababababababM" },
319 { MUA, 0 | F_NOMATCH, "(?:(ab){1,4}){1,3}abababababababababababM", "abababababababababababM" },
320 { MUA, 0, "(ab){4,6}?M", "abababababababM" },
321
322 /* Basic character sets. */
323 { MUA, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
324 { MUA, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
325 { MUA, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
326 { MUA, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
327 { MUA, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
328 { MUA, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
329
330 /* Unicode properties. */
331 { MUAP, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
332 { MUAP, 0 | F_PROPERTY, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
333 { MUAP, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
334 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}]", "abc" },
335 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}]", "abc" },
336 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
337 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
338 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
339 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
340 { MUAP, 0 | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
341 { MUAP, 0 | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
342 { MUAP, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
343 { CMUAP, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
344 { MUAP, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
345 { MUAP, 0 | F_PROPERTY, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
346 { MUA, 0 | F_PROPERTY, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
347 { CMUAP, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
348 { MUAP, 0 | F_PROPERTY, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
349 { MUAP, 0 | F_PROPERTY, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
350 { PCRE_UCP, 0, "[a-b\\s]{2,5}[^a]", "AB baaa" },
351
352 /* Possible empty brackets. */
353 { MUA, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
354 { MUA, 0, "(|ab||bc|a)+d", "abcxabcabd" },
355 { MUA, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
356 { MUA, 0, "(|ab||bc|a)*d", "abcxabcabd" },
357 { MUA, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
358 { MUA, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
359 { MUA, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
360 { MUA, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
361 { MUA, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
362 { MUA, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
363
364 /* Start offset. */
365 { MUA, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
366 { MUA, 4 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
367 { MUA, 2 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
368 { MUA, 1, "(\\w\\W\\w)+", "ab#d" },
369
370 /* Newline. */
371 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
372 { PCRE_MULTILINE | PCRE_NEWLINE_CR, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
373 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{1,3}[^#]", "\r\n##...." },
374
375 /* Any character except newline or any newline. */
376 { PCRE_NEWLINE_CRLF, 0, ".", "\r" },
377 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
378 { PCRE_NEWLINE_ANYCRLF, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
379 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
380 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
381 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0 | F_NOMATCH, ".(.).", "\xe2\x80\xa8\nb\r" },
382 { PCRE_NEWLINE_ANY, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
383 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
384 { PCRE_BSR_ANYCRLF, 0, "\\R", "\r" },
385 { PCRE_BSR_ANYCRLF, 0, "\\R", "\x85#\r\n#" },
386 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\xe2\x80\xa8#c" },
387 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\r\nc" },
388 { PCRE_NEWLINE_CRLF | PCRE_BSR_UNICODE | PCRE_UTF8, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
389 { MUA, 0 | F_NOMATCH, "\\R+", "ab" },
390 { MUA, 0, "\\R+", "ab\r\n\r" },
391 { MUA, 0, "\\R*", "ab\r\n\r" },
392 { MUA, 0, "\\R*", "\r\n\r" },
393 { MUA, 0, "\\R{2,4}", "\r\nab\r\r" },
394 { MUA, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
395 { MUA, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
396 { MUA, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
397 { MUA, 0 | F_NOMATCH, "\\R+\\R\\R", "\r\n\r\n" },
398 { MUA, 0, "\\R+\\R\\R", "\r\r\r" },
399 { MUA, 0, "\\R*\\R\\R", "\n\r" },
400 { MUA, 0 | F_NOMATCH, "\\R{2,4}\\R\\R", "\r\r\r" },
401 { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
402
403 /* Atomic groups (no fallback from "next" direction). */
404 { MUA, 0 | F_NOMATCH, "(?>ab)ab", "bab" },
405 { MUA, 0 | F_NOMATCH, "(?>(ab))ab", "bab" },
406 { MUA, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
407 "bababcdedefgheijijklmlmnop" },
408 { MUA, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
409 { MUA, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
410 { MUA, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
411 { MUA, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
412 { MUA, 0, "((?>a|)+?)b", "aaacaaab" },
413 { MUA, 0, "(?>x|)*$", "aaa" },
414 { MUA, 0, "(?>(x)|)*$", "aaa" },
415 { MUA, 0, "(?>x|())*$", "aaa" },
416 { MUA, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
417 { MUA, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
418 { MUA, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
419 { MUA, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
420 { MUA, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
421 { MUA, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
422 { MUA, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
423 { MUA, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
424 { MUA, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
425 { MUA, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
426 { MUA, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
427 { MUA, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
428 { MUA, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
429 { MUA, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
430 { CMA, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
431 { MUA, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
432 { MUA, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
433 { MUA, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d" },
434 { MUA, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
435 { MUA, 0 | F_PROPERTY, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
436 { MUA, 0 | F_PROPERTY, "\\X{2,4}", "abcdef" },
437 { MUA, 0 | F_PROPERTY, "\\X{2,4}?", "abcdef" },
438 { MUA, 0 | F_NOMATCH | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d##" },
439 { MUA, 0 | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
440 { MUA, 0, "(c(ab)?+ab)+", "cabcababcab" },
441 { MUA, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
442
443 /* Possessive quantifiers. */
444 { MUA, 0, "(?:a|b)++m", "mababbaaxababbaam" },
445 { MUA, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
446 { MUA, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
447 { MUA, 0, "(a|b)++m", "mababbaaxababbaam" },
448 { MUA, 0, "(a|b)*+m", "mababbaaxababbaam" },
449 { MUA, 0, "(a|b)*+m", "ababbaaxababbaam" },
450 { MUA, 0, "(a|b(*ACCEPT))++m", "maaxab" },
451 { MUA, 0, "(?:b*)++m", "bxbbxbbbxm" },
452 { MUA, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
453 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxm" },
454 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
455 { MUA, 0, "(b*)++m", "bxbbxbbbxm" },
456 { MUA, 0, "(b*)++m", "bxbbxbbbxbbm" },
457 { MUA, 0, "(b*)*+m", "bxbbxbbbxm" },
458 { MUA, 0, "(b*)*+m", "bxbbxbbbxbbm" },
459 { MUA, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
460 { MUA, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
461 { MUA, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
462 { MUA, 0, "(a|(b))++m", "mababbaaxababbaam" },
463 { MUA, 0, "((a)|b)*+m", "mababbaaxababbaam" },
464 { MUA, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
465 { MUA, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
466 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxm" },
467 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
468 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
469 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
470 { MUA, 0, "((b*))++m", "bxbbxbbbxm" },
471 { MUA, 0, "((b*))++m", "bxbbxbbbxbbm" },
472 { MUA, 0, "((b*))*+m", "bxbbxbbbxm" },
473 { MUA, 0, "((b*))*+m", "bxbbxbbbxbbm" },
474 { MUA, 0 | F_NOMATCH, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
475 { MUA, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
476 { MUA, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
477 { MUA, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
478 { MUA, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
479
480 /* Back references. */
481 { MUA, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
482 { CMUA, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
483 { CMA, 0, "(a{2,4})\\1", "AaAaaAaA" },
484 { MUA, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
485 { MUA, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
486 { MUA, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
487 { MUA, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
488 { MUA, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
489 { MUA, 0, "(?:(aa)|b)\\1?b", "bb" },
490 { CMUA, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
491 { MUA, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
492 { CMUA, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
493 { MUA, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
494 { CMA, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
495 { MUA, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
496 { MUA, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
497 { MA, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
498 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
499 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{0,2}", "wwwww." },
500 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwww" },
501 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwwww" },
502 { PCRE_UCP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
503 { CMUAP, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" },
504
505 /* Assertions. */
506 { MUA, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
507 { MUA, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
508 { MUA, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
509 { MUA, 0, "(?<=aaa|aa|a)a", "aaa" },
510 { MUA, 2, "(?<=aaa|aa|a)a", "aaa" },
511 { MA, 0, "(?<=aaa|aa|a)a", "aaa" },
512 { MA, 2, "(?<=aaa|aa|a)a", "aaa" },
513 { MUA, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
514 { MUA, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
515 { MUA, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
516 { MUA, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
517 { MUA, 0, "((?(?=(a))a)+k)", "bbak" },
518 { MUA, 0, "((?(?=a)a)+k)", "bbak" },
519 { MUA, 0 | F_NOMATCH, "(?=(?>(a))m)amk", "a k" },
520 { MUA, 0 | F_NOMATCH, "(?!(?>(a))m)amk", "a k" },
521 { MUA, 0 | F_NOMATCH, "(?>(?=(a))am)amk", "a k" },
522 { MUA, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
523 { MUA, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
524 { MUA, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
525 { MUA, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
526 { MUA, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
527 { MUA, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
528 { MUA, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
529 { MUA, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
530
531 /* Not empty, ACCEPT, FAIL */
532 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*", "bcx" },
533 { MUA | PCRE_NOTEMPTY, 0, "a*", "bcaad" },
534 { MUA | PCRE_NOTEMPTY, 0, "a*?", "bcaad" },
535 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
536 { MUA, 0, "a(*ACCEPT)b", "ab" },
537 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*(*ACCEPT)b", "bcx" },
538 { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
539 { MUA | PCRE_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
540 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "(?:z|a*(*ACCEPT)b)", "bcx" },
541 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
542 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
543 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
544 { MUA | PCRE_NOTEMPTY_ATSTART, 0 | F_NOMATCH, "a*(*ACCEPT)b", "" },
545 { MUA, 0, "((a(*ACCEPT)b))", "ab" },
546 { MUA, 0, "(a(*FAIL)a|a)", "aaa" },
547 { MUA, 0, "(?=ab(*ACCEPT)b)a", "ab" },
548 { MUA, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
549 { MUA, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
550 { MUA | PCRE_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
551
552 /* Conditional blocks. */
553 { MUA, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
554 { MUA, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
555 { MUA, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
556 { MUA, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
557 { MUA, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
558 { MUA, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
559 { MUA, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
560 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
561 { MUA, 0 | F_DIFF, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
562 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
563 { MUA, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
564 { MUA, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
565 { MUA, 0, "(?(?=a)ab)", "a" },
566 { MUA, 0, "(?(?<!b)c)", "b" },
567 { MUA, 0, "(?(DEFINE)a(b))", "a" },
568 { MUA, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
569 { MUA, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
570 { MUA, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
571 { MUA, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
572 { MUA, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
573 { MUA, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
574 { MUA, 0, "(c)?\?(?(1)a|b)", "cbb" },
575 { MUA, 0 | F_DIFF, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
576 { MUA, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
577 { MUA, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
578 { MUA, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
579 { MUA, 0 | F_DIFF, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
580 { MUA, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
581 { MUA, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
582 { MUA, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
583 { MUA, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
584 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)*l", "bc ddd abccabccl" },
585 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+?dd", "bcabcacdb bdddd" },
586 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+l", "ababccddabdbccd abcccl" },
587 { MUA, 0, "((?:a|aa)(?(1)aaa))x", "aax" },
588
589 /* Set start of match. */
590 { MUA, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
591 { MUA, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
592 { MUA, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
593 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a\\K(*ACCEPT)b", "aa" },
594 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
595
596 /* First line. */
597 { MUA | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}a", "bb\naaa" },
598 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}a", "bb\r\naaa" },
599 { MUA | PCRE_FIRSTLINE, 0, "(?<=a)", "a" },
600 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[^a][^b]", "ab" },
601 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "a", "\na" },
602 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[abc]", "\na" },
603 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^a", "\na" },
604 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^(?<=\n)", "\na" },
605 { MUA | PCRE_FIRSTLINE, 0, "\xf0\x90\x90\x80", "\xf0\x90\x90\x80" },
606 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\xc2\x85#" },
607 { PCRE_MULTILINE | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\x85#" },
608 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^#", "\xe2\x80\xa8#" },
609 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}", "\r\na" },
610 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, ".", "\r" },
611 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "a", "\ra" },
612 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH, "ba", "bbb\r\nba" },
613 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}{4}|a", "\r\na" },
614 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 1, ".", "\r\n" },
615 { PCRE_FIRSTLINE | PCRE_NEWLINE_LF | PCRE_DOTALL, 0 | F_NOMATCH, "ab.", "ab" },
616
617 /* Recurse. */
618 { MUA, 0, "(a)(?1)", "aa" },
619 { MUA, 0, "((a))(?1)", "aa" },
620 { MUA, 0, "(b|a)(?1)", "aa" },
621 { MUA, 0, "(b|(a))(?1)", "aa" },
622 { MUA, 0 | F_NOMATCH, "((a)(b)(?:a*))(?1)", "aba" },
623 { MUA, 0, "((a)(b)(?:a*))(?1)", "abab" },
624 { MUA, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
625 { MUA, 0, "((?2)b|(a)){2}(?1)", "aabab" },
626 { MUA, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
627 { MUA, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
628 { MUA, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
629 { MUA, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
630 { MUA, 0 | F_NOMATCH, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
631 { MUA, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
632 { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
633 { MUA, 0 | F_NOMATCH, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
634 { MUA, 0, "b|<(?R)*>", "<<b>" },
635 { MUA, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
636 { MUA, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
637 { MUA, 0, "(a)((?(R)a|b))(?2)", "aabbabaa" },
638 { MUA, 0, "(a)((?(R2)a|b))(?2)", "aabbabaa" },
639 { MUA, 0, "(a)((?(R1)a|b))(?2)", "ababba" },
640 { MUA, 0, "(?(R0)aa|bb(?R))", "abba aabb bbaa" },
641 { MUA, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" },
642 { MUA, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" },
643
644 /* 16 bit specific tests. */
645 { CMA, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" },
646 { CMA, 0 | F_FORCECONV, "\xe1\xbd\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
647 { CMA, 0 | F_FORCECONV, "[\xc3\xa1]", "\xc3\x81\xc3\xa1" },
648 { CMA, 0 | F_FORCECONV, "[\xe1\xbd\xb8]", "\xe1\xbf\xb8\xe1\xbd\xb8" },
649 { CMA, 0 | F_FORCECONV, "[a-\xed\xb0\x80]", "A" },
650 { CMA, 0 | F_NO8 | F_FORCECONV, "[a-\\x{dc00}]", "B" },
651 { CMA, 0 | F_NO8 | F_NOMATCH | F_FORCECONV, "[b-\\x{dc00}]", "a" },
652 { CMA, 0 | F_NO8 | F_FORCECONV, "\xed\xa0\x80\\x{d800}\xed\xb0\x80\\x{dc00}", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80" },
653 { CMA, 0 | F_NO8 | F_FORCECONV, "[\xed\xa0\x80\\x{d800}]{1,2}?[\xed\xb0\x80\\x{dc00}]{1,2}?#", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80#" },
654 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80\xed\xb0\x80#]{0,3}(?<=\xed\xb0\x80.)", "\xed\xa0\x80#\xed\xa0\x80##\xed\xb0\x80\xed\xa0\x80" },
655 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\x9f\xbf\xed\xa0\x83" },
656 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\xb4\x80\xed\xb3\xb0" },
657 { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" },
658 { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" },
659 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" },
660 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80][\xed\xb0\x80]{2,}", "\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" },
661 { MA, 0 | F_FORCECONV, "[^\xed\xb0\x80]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
662 { MA, 0 | F_NO8 | F_FORCECONV, "[^\\x{dc00}]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
663 { CMA, 0 | F_FORCECONV, ".\\B.", "\xed\xa0\x80\xed\xb0\x80" },
664 { CMA, 0 | F_FORCECONV, "\\D+(?:\\d+|.)\\S+(?:\\s+|.)\\W+(?:\\w+|.)\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80" },
665 { CMA, 0 | F_FORCECONV, "\\d*\\s*\\w*\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80" },
666 { CMA, 0 | F_FORCECONV | F_NOMATCH, "\\d*?\\D*?\\s*?\\S*?\\w*?\\W*?##", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80#" },
667 { CMA | PCRE_EXTENDED, 0 | F_FORCECONV, "\xed\xa0\x80 \xed\xb0\x80 !", "\xed\xa0\x80\xed\xb0\x80!" },
668 { CMA, 0 | F_FORCECONV, "\xed\xa0\x80+#[^#]+\xed\xa0\x80", "\xed\xa0\x80#a\xed\xa0\x80" },
669 { CMA, 0 | F_FORCECONV, "(\xed\xa0\x80+)#\\1", "\xed\xa0\x80\xed\xa0\x80#\xed\xa0\x80\xed\xa0\x80" },
670 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0 | F_NO8 | F_FORCECONV, "^-", "a--\xe2\x80\xa8--" },
671 { PCRE_BSR_UNICODE, 0 | F_NO8 | F_FORCECONV, "\\R", "ab\xe2\x80\xa8" },
672 { 0, 0 | F_NO8 | F_FORCECONV, "\\v", "ab\xe2\x80\xa9" },
673 { 0, 0 | F_NO8 | F_FORCECONV, "\\h", "ab\xe1\xa0\x8e" },
674 { 0, 0 | F_NO8 | F_FORCECONV, "\\v+?\\V+?#", "\xe2\x80\xa9\xe2\x80\xa9\xef\xbf\xbf\xef\xbf\xbf#" },
675 { 0, 0 | F_NO8 | F_FORCECONV, "\\h+?\\H+?#", "\xe1\xa0\x8e\xe1\xa0\x8e\xef\xbf\xbf\xef\xbf\xbf#" },
676
677 /* Partial matching. */
678 { MUA | PCRE_PARTIAL_SOFT, 0, "ab", "a" },
679 { MUA | PCRE_PARTIAL_SOFT, 0, "ab|a", "a" },
680 { MUA | PCRE_PARTIAL_HARD, 0, "ab|a", "a" },
681 { MUA | PCRE_PARTIAL_SOFT, 0, "\\b#", "a" },
682 { MUA | PCRE_PARTIAL_SOFT, 0, "(?<=a)b", "a" },
683 { MUA | PCRE_PARTIAL_SOFT, 0, "abc|(?<=xxa)bc", "xxab" },
684 { MUA | PCRE_PARTIAL_SOFT, 0, "a\\B", "a" },
685 { MUA | PCRE_PARTIAL_HARD, 0, "a\\b", "a" },
686
687 /* (*MARK) verb. */
688 { MUA, 0, "a(*MARK:aa)a", "ababaa" },
689 { MUA, 0 | F_NOMATCH, "a(*:aa)a", "abab" },
690 { MUA, 0, "a(*:aa)(b(*:bb)b|bc)", "abc" },
691 { MUA, 0 | F_NOMATCH, "a(*:1)x|b(*:2)y", "abc" },
692 { MUA, 0, "(?>a(*:aa))b|ac", "ac" },
693 { MUA, 0, "(?(DEFINE)(a(*:aa)))(?1)", "a" },
694 { MUA, 0 | F_NOMATCH, "(?(DEFINE)((a)(*:aa)))(?1)b", "aa" },
695 { MUA, 0, "(?(DEFINE)(a(*:aa)))a(?1)b|aac", "aac" },
696 { MUA, 0, "(a(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
697 { MUA, 0, "(a(*:aa)){0}(?:b(?1)b)+", "babba" },
698 { MUA, 0 | F_NOMATCH | F_STUDY, "(a(*:aa)){0}(?:b(?1)b)+", "ba" },
699 { MUA, 0, "(a\\K(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
700 { MUA, 0, "(a\\K(*:aa)){0}(?:b(?1)b)+", "babba" },
701 { MUA, 0 | F_NOMATCH | F_STUDY, "(a\\K(*:aa)){0}(?:b(?1)b)+", "ba" },
702 { MUA, 0 | F_NOMATCH | F_STUDY, "(*:mark)m", "a" },
703
704 /* (*COMMIT) verb. */
705 { MUA, 0 | F_NOMATCH, "a(*COMMIT)b", "ac" },
706 { MUA, 0, "aa(*COMMIT)b", "xaxaab" },
707 { MUA, 0 | F_NOMATCH, "a(*COMMIT)(*:msg)b|ac", "ac" },
708 { MUA, 0 | F_NOMATCH, "(a(*COMMIT)b)++", "abac" },
709 { MUA, 0 | F_NOMATCH, "((a)(*COMMIT)b)++", "abac" },
710 { MUA, 0 | F_NOMATCH, "(?=a(*COMMIT)b)ab|ad", "ad" },
711
712 /* (*PRUNE) verb. */
713 { MUA, 0, "aa\\K(*PRUNE)b", "aaab" },
714 { MUA, 0, "aa(*PRUNE:bb)b|a", "aa" },
715 { MUA, 0, "(a)(a)(*PRUNE)b|(a)", "aa" },
716 { MUA, 0, "(a)(a)(a)(a)(a)(a)(a)(a)(*PRUNE)b|(a)", "aaaaaaaa" },
717 { MUA | PCRE_PARTIAL_SOFT, 0, "a(*PRUNE)a|", "a" },
718 { MUA | PCRE_PARTIAL_SOFT, 0, "a(*PRUNE)a|m", "a" },
719 { MUA, 0 | F_NOMATCH, "(?=a(*PRUNE)b)ab|ad", "ad" },
720 { MUA, 0, "a(*COMMIT)(*PRUNE)d|bc", "abc" },
721 { MUA, 0, "(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
722 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
723 { MUA, 0, "(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
724 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
725 { MUA, 0, "(a(*COMMIT)b){0}a(?1)(*PRUNE)c|bc", "abc" },
726 { MUA, 0 | F_NOMATCH, "(a(*COMMIT)b){0}a(*COMMIT)(?1)(*PRUNE)c|bc", "abc" },
727 { MUA, 0, "(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
728 { MUA, 0 | F_NOMATCH, "(*COMMIT)(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
729 { MUA, 0, "((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
730 { MUA, 0 | F_NOMATCH, "(*COMMIT)((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
731 { MUA, 0, "(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" },
732 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" },
733 { MUA, 0, "(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" },
734 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" },
735 { MUA, 0, "(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" },
736 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" },
737 { MUA, 0, "(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" },
738 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" },
739 { MUA, 0, "(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" },
740 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" },
741 { MUA, 0, "(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" },
742 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" },
743
744 /* (*SKIP) verb. */
745 { MUA, 0 | F_NOMATCH, "(?=a(*SKIP)b)ab|ad", "ad" },
746
747 /* (*THEN) verb. */
748 { MUA, 0, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcaabcaabcaabcnacm" },
749 { MUA, 0 | F_NOMATCH, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcm" },
750 { MUA, 0, "((?:a(*THEN)|aab)c|a+)+m", "aabcaabcnmaabcaabcm" },
751 { MUA, 0, "((?:a|aab)(*THEN)c|a+)+m", "aam" },
752 { MUA, 0, "((?:a(*COMMIT)|aab)(*THEN)c|a+)+m", "aam" },
753 { MUA, 0, "(?(?=a(*THEN)b)ab|ad)", "ad" },
754 { MUA, 0, "(?(?!a(*THEN)b)ad|add)", "add" },
755 { MUA, 0 | F_NOMATCH, "(?(?=a)a(*THEN)b|ad)", "ad" },
756 { MUA, 0, "(?!(?(?=a)ab|b(*THEN)d))bn|bnn", "bnn" },
757
758 /* Deep recursion. */
759 { MUA, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
760 { MUA, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
761 { MUA, 0, "((a?)+)+b", "aaaaaaaaaaaa b" },
762
763 /* Deep recursion: Stack limit reached. */
764 { MA, 0 | F_NOMATCH, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
765 { MA, 0 | F_NOMATCH, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
766 { MA, 0 | F_NOMATCH, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
767 { MA, 0 | F_NOMATCH, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
768 { MA, 0 | F_NOMATCH, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
769
770 { 0, 0, NULL, NULL }
771 };
772
773 static const unsigned char *tables(int mode)
774 {
775 /* The purpose of this function to allow valgrind
776 for reporting invalid reads and writes. */
777 static unsigned char *tables_copy;
778 const char *errorptr;
779 int erroroffset;
780 unsigned char *default_tables;
781 #if defined SUPPORT_PCRE8
782 pcre *regex;
783 char null_str[1] = { 0 };
784 #elif defined SUPPORT_PCRE16
785 pcre16 *regex;
786 PCRE_UCHAR16 null_str[1] = { 0 };
787 #elif defined SUPPORT_PCRE32
788 pcre32 *regex;
789 PCRE_UCHAR32 null_str[1] = { 0 };
790 #endif
791
792 if (mode) {
793 if (tables_copy)
794 free(tables_copy);
795 tables_copy = NULL;
796 return NULL;
797 }
798
799 if (tables_copy)
800 return tables_copy;
801
802 default_tables = NULL;
803 #if defined SUPPORT_PCRE8
804 regex = pcre_compile(null_str, 0, &errorptr, &erroroffset, NULL);
805 if (regex) {
806 pcre_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
807 pcre_free(regex);
808 }
809 #elif defined SUPPORT_PCRE16
810 regex = pcre16_compile(null_str, 0, &errorptr, &erroroffset, NULL);
811 if (regex) {
812 pcre16_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
813 pcre16_free(regex);
814 }
815 #elif defined SUPPORT_PCRE32
816 regex = pcre32_compile(null_str, 0, &errorptr, &erroroffset, NULL);
817 if (regex) {
818 pcre32_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
819 pcre32_free(regex);
820 }
821 #endif
822 /* Shouldn't ever happen. */
823 if (!default_tables)
824 return NULL;
825
826 /* Unfortunately this value cannot get from pcre_fullinfo.
827 Since this is a test program, this is acceptable at the moment. */
828 tables_copy = (unsigned char *)malloc(1088);
829 if (!tables_copy)
830 return NULL;
831
832 memcpy(tables_copy, default_tables, 1088);
833 return tables_copy;
834 }
835
836 #ifdef SUPPORT_PCRE8
837 static pcre_jit_stack* callback8(void *arg)
838 {
839 return (pcre_jit_stack *)arg;
840 }
841 #endif
842
843 #ifdef SUPPORT_PCRE16
844 static pcre16_jit_stack* callback16(void *arg)
845 {
846 return (pcre16_jit_stack *)arg;
847 }
848 #endif
849
850 #ifdef SUPPORT_PCRE32
851 static pcre32_jit_stack* callback32(void *arg)
852 {
853 return (pcre32_jit_stack *)arg;
854 }
855 #endif
856
857 #ifdef SUPPORT_PCRE8
858 static pcre_jit_stack *stack8;
859
860 static pcre_jit_stack *getstack8(void)
861 {
862 if (!stack8)
863 stack8 = pcre_jit_stack_alloc(1, 1024 * 1024);
864 return stack8;
865 }
866
867 static void setstack8(pcre_extra *extra)
868 {
869 if (!extra) {
870 if (stack8)
871 pcre_jit_stack_free(stack8);
872 stack8 = NULL;
873 return;
874 }
875
876 pcre_assign_jit_stack(extra, callback8, getstack8());
877 }
878 #endif /* SUPPORT_PCRE8 */
879
880 #ifdef SUPPORT_PCRE16
881 static pcre16_jit_stack *stack16;
882
883 static pcre16_jit_stack *getstack16(void)
884 {
885 if (!stack16)
886 stack16 = pcre16_jit_stack_alloc(1, 1024 * 1024);
887 return stack16;
888 }
889
890 static void setstack16(pcre16_extra *extra)
891 {
892 if (!extra) {
893 if (stack16)
894 pcre16_jit_stack_free(stack16);
895 stack16 = NULL;
896 return;
897 }
898
899 pcre16_assign_jit_stack(extra, callback16, getstack16());
900 }
901 #endif /* SUPPORT_PCRE8 */
902
903 #ifdef SUPPORT_PCRE32
904 static pcre32_jit_stack *stack32;
905
906 static pcre32_jit_stack *getstack32(void)
907 {
908 if (!stack32)
909 stack32 = pcre32_jit_stack_alloc(1, 1024 * 1024);
910 return stack32;
911 }
912
913 static void setstack32(pcre32_extra *extra)
914 {
915 if (!extra) {
916 if (stack32)
917 pcre32_jit_stack_free(stack32);
918 stack32 = NULL;
919 return;
920 }
921
922 pcre32_assign_jit_stack(extra, callback32, getstack32());
923 }
924 #endif /* SUPPORT_PCRE8 */
925
926 #ifdef SUPPORT_PCRE16
927
928 static int convert_utf8_to_utf16(const char *input, PCRE_UCHAR16 *output, int *offsetmap, int max_length)
929 {
930 unsigned char *iptr = (unsigned char*)input;
931 PCRE_UCHAR16 *optr = output;
932 unsigned int c;
933
934 if (max_length == 0)
935 return 0;
936
937 while (*iptr && max_length > 1) {
938 c = 0;
939 if (offsetmap)
940 *offsetmap++ = (int)(iptr - (unsigned char*)input);
941
942 if (!(*iptr & 0x80))
943 c = *iptr++;
944 else if (!(*iptr & 0x20)) {
945 c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
946 iptr += 2;
947 } else if (!(*iptr & 0x10)) {
948 c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
949 iptr += 3;
950 } else if (!(*iptr & 0x08)) {
951 c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
952 iptr += 4;
953 }
954
955 if (c < 65536) {
956 *optr++ = c;
957 max_length--;
958 } else if (max_length <= 2) {
959 *optr = '\0';
960 return (int)(optr - output);
961 } else {
962 c -= 0x10000;
963 *optr++ = 0xd800 | ((c >> 10) & 0x3ff);
964 *optr++ = 0xdc00 | (c & 0x3ff);
965 max_length -= 2;
966 if (offsetmap)
967 offsetmap++;
968 }
969 }
970 if (offsetmap)
971 *offsetmap = (int)(iptr - (unsigned char*)input);
972 *optr = '\0';
973 return (int)(optr - output);
974 }
975
976 static int copy_char8_to_char16(const char *input, PCRE_UCHAR16 *output, int max_length)
977 {
978 unsigned char *iptr = (unsigned char*)input;
979 PCRE_UCHAR16 *optr = output;
980
981 if (max_length == 0)
982 return 0;
983
984 while (*iptr && max_length > 1) {
985 *optr++ = *iptr++;
986 max_length--;
987 }
988 *optr = '\0';
989 return (int)(optr - output);
990 }
991
992 #define REGTEST_MAX_LENGTH16 4096
993 static PCRE_UCHAR16 regtest_buf16[REGTEST_MAX_LENGTH16];
994 static int regtest_offsetmap16[REGTEST_MAX_LENGTH16];
995
996 #endif /* SUPPORT_PCRE16 */
997
998 #ifdef SUPPORT_PCRE32
999
1000 static int convert_utf8_to_utf32(const char *input, PCRE_UCHAR32 *output, int *offsetmap, int max_length)
1001 {
1002 unsigned char *iptr = (unsigned char*)input;
1003 PCRE_UCHAR32 *optr = output;
1004 unsigned int c;
1005
1006 if (max_length == 0)
1007 return 0;
1008
1009 while (*iptr && max_length > 1) {
1010 c = 0;
1011 if (offsetmap)
1012 *offsetmap++ = (int)(iptr - (unsigned char*)input);
1013
1014 if (!(*iptr & 0x80))
1015 c = *iptr++;
1016 else if (!(*iptr & 0x20)) {
1017 c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
1018 iptr += 2;
1019 } else if (!(*iptr & 0x10)) {
1020 c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
1021 iptr += 3;
1022 } else if (!(*iptr & 0x08)) {
1023 c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
1024 iptr += 4;
1025 }
1026
1027 *optr++ = c;
1028 max_length--;
1029 }
1030 if (offsetmap)
1031 *offsetmap = (int)(iptr - (unsigned char*)input);
1032 *optr = 0;
1033 return (int)(optr - output);
1034 }
1035
1036 static int copy_char8_to_char32(const char *input, PCRE_UCHAR32 *output, int max_length)
1037 {
1038 unsigned char *iptr = (unsigned char*)input;
1039 PCRE_UCHAR32 *optr = output;
1040
1041 if (max_length == 0)
1042 return 0;
1043
1044 while (*iptr && max_length > 1) {
1045 *optr++ = *iptr++;
1046 max_length--;
1047 }
1048 *optr = '\0';
1049 return (int)(optr - output);
1050 }
1051
1052 #define REGTEST_MAX_LENGTH32 4096
1053 static PCRE_UCHAR32 regtest_buf32[REGTEST_MAX_LENGTH32];
1054 static int regtest_offsetmap32[REGTEST_MAX_LENGTH32];
1055
1056 #endif /* SUPPORT_PCRE32 */
1057
1058 static int check_ascii(const char *input)
1059 {
1060 const unsigned char *ptr = (unsigned char *)input;
1061 while (*ptr) {
1062 if (*ptr > 127)
1063 return 0;
1064 ptr++;
1065 }
1066 return 1;
1067 }
1068
1069 static int regression_tests(void)
1070 {
1071 struct regression_test_case *current = regression_test_cases;
1072 const char *error;
1073 char *cpu_info;
1074 int i, err_offs;
1075 int is_successful, is_ascii_pattern, is_ascii_input;
1076 int total = 0;
1077 int successful = 0;
1078 int successful_row = 0;
1079 int counter = 0;
1080 int study_mode;
1081 int utf = 0, ucp = 0;
1082 int disabled_flags = 0;
1083 #ifdef SUPPORT_PCRE8
1084 pcre *re8;
1085 pcre_extra *extra8;
1086 pcre_extra dummy_extra8;
1087 int ovector8_1[32];
1088 int ovector8_2[32];
1089 int return_value8[2];
1090 unsigned char *mark8_1, *mark8_2;
1091 #endif
1092 #ifdef SUPPORT_PCRE16
1093 pcre16 *re16;
1094 pcre16_extra *extra16;
1095 pcre16_extra dummy_extra16;
1096 int ovector16_1[32];
1097 int ovector16_2[32];
1098 int return_value16[2];
1099 PCRE_UCHAR16 *mark16_1, *mark16_2;
1100 int length16;
1101 #endif
1102 #ifdef SUPPORT_PCRE32
1103 pcre32 *re32;
1104 pcre32_extra *extra32;
1105 pcre32_extra dummy_extra32;
1106 int ovector32_1[32];
1107 int ovector32_2[32];
1108 int return_value32[2];
1109 PCRE_UCHAR32 *mark32_1, *mark32_2;
1110 int length32;
1111 #endif
1112
1113 /* This test compares the behaviour of interpreter and JIT. Although disabling
1114 utf or ucp may make tests fail, if the pcre_exec result is the SAME, it is
1115 still considered successful from pcre_jit_test point of view. */
1116
1117 #if defined SUPPORT_PCRE8
1118 pcre_config(PCRE_CONFIG_JITTARGET, &cpu_info);
1119 #elif defined SUPPORT_PCRE16
1120 pcre16_config(PCRE_CONFIG_JITTARGET, &cpu_info);
1121 #elif defined SUPPORT_PCRE32
1122 pcre32_config(PCRE_CONFIG_JITTARGET, &cpu_info);
1123 #endif
1124
1125 printf("Running JIT regression tests\n");
1126 printf(" target CPU of SLJIT compiler: %s\n", cpu_info);
1127
1128 #if defined SUPPORT_PCRE8
1129 pcre_config(PCRE_CONFIG_UTF8, &utf);
1130 pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
1131 #elif defined SUPPORT_PCRE16
1132 pcre16_config(PCRE_CONFIG_UTF16, &utf);
1133 pcre16_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
1134 #elif defined SUPPORT_PCRE16
1135 pcre32_config(PCRE_CONFIG_UTF32, &utf);
1136 pcre32_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
1137 #endif
1138
1139 if (!utf)
1140 disabled_flags |= PCRE_UTF8 | PCRE_UTF16 | PCRE_UTF32;
1141 if (!ucp)
1142 disabled_flags |= PCRE_UCP;
1143 #ifdef SUPPORT_PCRE8
1144 printf(" in 8 bit mode with UTF-8 %s and ucp %s:\n", utf ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
1145 #endif
1146 #ifdef SUPPORT_PCRE16
1147 printf(" in 16 bit mode with UTF-16 %s and ucp %s:\n", utf ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
1148 #endif
1149 #ifdef SUPPORT_PCRE32
1150 printf(" in 32 bit mode with UTF-32 %s and ucp %s:\n", utf ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
1151 #endif
1152
1153 while (current->pattern) {
1154 /* printf("\nPattern: %s :\n", current->pattern); */
1155 total++;
1156 if (current->start_offset & F_PROPERTY) {
1157 is_ascii_pattern = 0;
1158 is_ascii_input = 0;
1159 } else {
1160 is_ascii_pattern = check_ascii(current->pattern);
1161 is_ascii_input = check_ascii(current->input);
1162 }
1163
1164 if (current->flags & PCRE_PARTIAL_SOFT)
1165 study_mode = PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE;
1166 else if (current->flags & PCRE_PARTIAL_HARD)
1167 study_mode = PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE;
1168 else
1169 study_mode = PCRE_STUDY_JIT_COMPILE;
1170 error = NULL;
1171 #ifdef SUPPORT_PCRE8
1172 re8 = NULL;
1173 if (!(current->start_offset & F_NO8))
1174 re8 = pcre_compile(current->pattern,
1175 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags),
1176 &error, &err_offs, tables(0));
1177
1178 extra8 = NULL;
1179 if (re8) {
1180 error = NULL;
1181 extra8 = pcre_study(re8, study_mode, &error);
1182 if (!extra8) {
1183 printf("\n8 bit: Cannot study pattern: %s\n", current->pattern);
1184 pcre_free(re8);
1185 re8 = NULL;
1186 }
1187 else if (!(extra8->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
1188 printf("\n8 bit: JIT compiler does not support: %s\n", current->pattern);
1189 pcre_free_study(extra8);
1190 pcre_free(re8);
1191 re8 = NULL;
1192 }
1193 extra8->flags |= PCRE_EXTRA_MARK;
1194 } else if (((utf && ucp) || is_ascii_pattern) && !(current->start_offset & F_NO8))
1195 printf("\n8 bit: Cannot compile pattern \"%s\": %s\n", current->pattern, error);
1196 #endif
1197 #ifdef SUPPORT_PCRE16
1198 if ((current->flags & PCRE_UTF16) || (current->start_offset & F_FORCECONV))
1199 convert_utf8_to_utf16(current->pattern, regtest_buf16, NULL, REGTEST_MAX_LENGTH16);
1200 else
1201 copy_char8_to_char16(current->pattern, regtest_buf16, REGTEST_MAX_LENGTH16);
1202
1203 re16 = NULL;
1204 if (!(current->start_offset & F_NO16))
1205 re16 = pcre16_compile(regtest_buf16,
1206 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags),
1207 &error, &err_offs, tables(0));
1208
1209 extra16 = NULL;
1210 if (re16) {
1211 error = NULL;
1212 extra16 = pcre16_study(re16, study_mode, &error);
1213 if (!extra16) {
1214 printf("\n16 bit: Cannot study pattern: %s\n", current->pattern);
1215 pcre16_free(re16);
1216 re16 = NULL;
1217 }
1218 else if (!(extra16->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
1219 printf("\n16 bit: JIT compiler does not support: %s\n", current->pattern);
1220 pcre16_free_study(extra16);
1221 pcre16_free(re16);
1222 re16 = NULL;
1223 }
1224 extra16->flags |= PCRE_EXTRA_MARK;
1225 } else if (((utf && ucp) || is_ascii_pattern) && !(current->start_offset & F_NO16))
1226 printf("\n16 bit: Cannot compile pattern \"%s\": %s\n", current->pattern, error);
1227 #endif
1228 #ifdef SUPPORT_PCRE32
1229 if ((current->flags & PCRE_UTF32) || (current->start_offset & F_FORCECONV))
1230 convert_utf8_to_utf32(current->pattern, regtest_buf32, NULL, REGTEST_MAX_LENGTH32);
1231 else
1232 copy_char8_to_char32(current->pattern, regtest_buf32, REGTEST_MAX_LENGTH32);
1233
1234 re32 = NULL;
1235 if (!(current->start_offset & F_NO32))
1236 re32 = pcre32_compile(regtest_buf32,
1237 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags),
1238 &error, &err_offs, tables(0));
1239
1240 extra32 = NULL;
1241 if (re32) {
1242 error = NULL;
1243 extra32 = pcre32_study(re32, study_mode, &error);
1244 if (!extra32) {
1245 printf("\n32 bit: Cannot study pattern: %s\n", current->pattern);
1246 pcre32_free(re32);
1247 re32 = NULL;
1248 }
1249 if (!(extra32->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
1250 printf("\n32 bit: JIT compiler does not support: %s\n", current->pattern);
1251 pcre32_free_study(extra32);
1252 pcre32_free(re32);
1253 re32 = NULL;
1254 }
1255 extra32->flags |= PCRE_EXTRA_MARK;
1256 } else if (((utf && ucp) || is_ascii_pattern) && !(current->start_offset & F_NO32))
1257 printf("\n32 bit: Cannot compile pattern \"%s\": %s\n", current->pattern, error);
1258 #endif
1259
1260 counter++;
1261 if ((counter & 0x3) != 0) {
1262 #ifdef SUPPORT_PCRE8
1263 setstack8(NULL);
1264 #endif
1265 #ifdef SUPPORT_PCRE16
1266 setstack16(NULL);
1267 #endif
1268 #ifdef SUPPORT_PCRE32
1269 setstack32(NULL);
1270 #endif
1271 }
1272
1273 #ifdef SUPPORT_PCRE8
1274 return_value8[0] = -1000;
1275 return_value8[1] = -1000;
1276 for (i = 0; i < 32; ++i)
1277 ovector8_1[i] = -2;
1278 for (i = 0; i < 32; ++i)
1279 ovector8_2[i] = -2;
1280 if (re8) {
1281 mark8_1 = NULL;
1282 mark8_2 = NULL;
1283 extra8->mark = &mark8_1;
1284
1285 if ((counter & 0x1) != 0) {
1286 setstack8(extra8);
1287 return_value8[0] = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1288 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_1, 32);
1289 } else
1290 return_value8[0] = pcre_jit_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1291 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_1, 32, getstack8());
1292 memset(&dummy_extra8, 0, sizeof(pcre_extra));
1293 dummy_extra8.flags = PCRE_EXTRA_MARK;
1294 if (current->start_offset & F_STUDY) {
1295 dummy_extra8.flags |= PCRE_EXTRA_STUDY_DATA;
1296 dummy_extra8.study_data = extra8->study_data;
1297 }
1298 dummy_extra8.mark = &mark8_2;
1299 return_value8[1] = pcre_exec(re8, &dummy_extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1300 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_2, 32);
1301 }
1302 #endif
1303
1304 #ifdef SUPPORT_PCRE16
1305 return_value16[0] = -1000;
1306 return_value16[1] = -1000;
1307 for (i = 0; i < 32; ++i)
1308 ovector16_1[i] = -2;
1309 for (i = 0; i < 32; ++i)
1310 ovector16_2[i] = -2;
1311 if (re16) {
1312 mark16_1 = NULL;
1313 mark16_2 = NULL;
1314 if ((current->flags & PCRE_UTF16) || (current->start_offset & F_FORCECONV))
1315 length16 = convert_utf8_to_utf16(current->input, regtest_buf16, regtest_offsetmap16, REGTEST_MAX_LENGTH16);
1316 else
1317 length16 = copy_char8_to_char16(current->input, regtest_buf16, REGTEST_MAX_LENGTH16);
1318 extra16->mark = &mark16_1;
1319 if ((counter & 0x1) != 0) {
1320 setstack16(extra16);
1321 return_value16[0] = pcre16_exec(re16, extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
1322 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_1, 32);
1323 } else
1324 return_value16[0] = pcre16_jit_exec(re16, extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
1325 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_1, 32, getstack16());
1326 memset(&dummy_extra16, 0, sizeof(pcre16_extra));
1327 dummy_extra16.flags = PCRE_EXTRA_MARK;
1328 if (current->start_offset & F_STUDY) {
1329 dummy_extra16.flags |= PCRE_EXTRA_STUDY_DATA;
1330 dummy_extra16.study_data = extra16->study_data;
1331 }
1332 dummy_extra16.mark = &mark16_2;
1333 return_value16[1] = pcre16_exec(re16, &dummy_extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
1334 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_2, 32);
1335 }
1336 #endif
1337
1338 #ifdef SUPPORT_PCRE32
1339 return_value32[0] = -1000;
1340 return_value32[1] = -1000;
1341 for (i = 0; i < 32; ++i)
1342 ovector32_1[i] = -2;
1343 for (i = 0; i < 32; ++i)
1344 ovector32_2[i] = -2;
1345 if (re32) {
1346 mark32_1 = NULL;
1347 mark32_2 = NULL;
1348 if ((current->flags & PCRE_UTF32) || (current->start_offset & F_FORCECONV))
1349 length32 = convert_utf8_to_utf32(current->input, regtest_buf32, regtest_offsetmap32, REGTEST_MAX_LENGTH32);
1350 else
1351 length32 = copy_char8_to_char32(current->input, regtest_buf32, REGTEST_MAX_LENGTH32);
1352 extra32->mark = &mark32_1;
1353 if ((counter & 0x1) != 0) {
1354 setstack32(extra32);
1355 return_value32[0] = pcre32_exec(re32, extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
1356 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector32_1, 32);
1357 } else
1358 return_value32[0] = pcre32_jit_exec(re32, extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
1359 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector32_1, 32, getstack32());
1360 memset(&dummy_extra32, 0, sizeof(pcre32_extra));
1361 dummy_extra32.flags = PCRE_EXTRA_MARK;
1362 if (current->start_offset & F_STUDY) {
1363 dummy_extra32.flags |= PCRE_EXTRA_STUDY_DATA;
1364 dummy_extra32.study_data = extra32->study_data;
1365 }
1366 dummy_extra32.mark = &mark32_2;
1367 return_value32[1] = pcre32_exec(re32, &dummy_extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
1368 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector32_2, 32);
1369 }
1370 #endif
1371
1372 /* printf("[%d-%d-%d|%d-%d|%d-%d|%d-%d]%s",
1373 return_value8[0], return_value16[0],
1374 ovector8_1[0], ovector8_1[1],
1375 ovector16_1[0], ovector16_1[1],
1376 ovector32_1[0], ovector32_1[1],
1377 (current->flags & PCRE_CASELESS) ? "C" : ""); */
1378
1379 /* If F_DIFF is set, just run the test, but do not compare the results.
1380 Segfaults can still be captured. */
1381
1382 is_successful = 1;
1383 if (!(current->start_offset & F_DIFF)) {
1384 #if defined SUPPORT_UTF && ((defined(SUPPORT_PCRE8) + defined(SUPPORT_PCRE16) + defined(SUPPORT_PCRE32)) >= 2)
1385 if (!(current->start_offset & F_FORCECONV)) {
1386 int return_value;
1387
1388 /* All results must be the same. */
1389 #ifdef SUPPORT_PCRE8
1390 if ((return_value = return_value8[0]) != return_value8[1]) {
1391 printf("\n8 bit: Return value differs(J8:%d,I8:%d): [%d] '%s' @ '%s'\n",
1392 return_value8[0], return_value8[1], total, current->pattern, current->input);
1393 is_successful = 0;
1394 } else
1395 #endif
1396 #ifdef SUPPORT_PCRE16
1397 if ((return_value = return_value16[0]) != return_value16[1]) {
1398 printf("\n16 bit: Return value differs(J16:%d,I16:%d): [%d] '%s' @ '%s'\n",
1399 return_value16[0], return_value16[1], total, current->pattern, current->input);
1400 is_successful = 0;
1401 } else
1402 #endif
1403 #ifdef SUPPORT_PCRE32
1404 if ((return_value = return_value32[0]) != return_value32[1]) {
1405 printf("\n32 bit: Return value differs(J32:%d,I32:%d): [%d] '%s' @ '%s'\n",
1406 return_value32[0], return_value32[1], total, current->pattern, current->input);
1407 is_successful = 0;
1408 } else
1409 #endif
1410 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1411 if (return_value8[0] != return_value16[0]) {
1412 printf("\n8 and 16 bit: Return value differs(J8:%d,J16:%d): [%d] '%s' @ '%s'\n",
1413 return_value8[0], return_value16[0],
1414 total, current->pattern, current->input);
1415 is_successful = 0;
1416 } else
1417 #endif
1418 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE32
1419 if (return_value8[0] != return_value32[0]) {
1420 printf("\n8 and 32 bit: Return value differs(J8:%d,J32:%d): [%d] '%s' @ '%s'\n",
1421 return_value8[0], return_value32[0],
1422 total, current->pattern, current->input);
1423 is_successful = 0;
1424 } else
1425 #endif
1426 #if defined SUPPORT_PCRE16 && defined SUPPORT_PCRE32
1427 if (return_value16[0] != return_value32[0]) {
1428 printf("\n16 and 32 bit: Return value differs(J16:%d,J32:%d): [%d] '%s' @ '%s'\n",
1429 return_value16[0], return_value32[0],
1430 total, current->pattern, current->input);
1431 is_successful = 0;
1432 } else
1433 #endif
1434 if (return_value >= 0 || return_value == PCRE_ERROR_PARTIAL) {
1435 if (return_value == PCRE_ERROR_PARTIAL) {
1436 return_value = 2;
1437 } else {
1438 return_value *= 2;
1439 }
1440 #ifdef SUPPORT_PCRE8
1441 return_value8[0] = return_value;
1442 #endif
1443 #ifdef SUPPORT_PCRE16
1444 return_value16[0] = return_value;
1445 #endif
1446 #ifdef SUPPORT_PCRE32
1447 return_value32[0] = return_value;
1448 #endif
1449 /* Transform back the results. */
1450 if (current->flags & PCRE_UTF8) {
1451 #ifdef SUPPORT_PCRE16
1452 for (i = 0; i < return_value; ++i) {
1453 if (ovector16_1[i] >= 0)
1454 ovector16_1[i] = regtest_offsetmap16[ovector16_1[i]];
1455 if (ovector16_2[i] >= 0)
1456 ovector16_2[i] = regtest_offsetmap16[ovector16_2[i]];
1457 }
1458 #endif
1459 #ifdef SUPPORT_PCRE32
1460 for (i = 0; i < return_value; ++i) {
1461 if (ovector32_1[i] >= 0)
1462 ovector32_1[i] = regtest_offsetmap32[ovector32_1[i]];
1463 if (ovector32_2[i] >= 0)
1464 ovector32_2[i] = regtest_offsetmap32[ovector32_2[i]];
1465 }
1466 #endif
1467 }
1468
1469 for (i = 0; i < return_value; ++i) {
1470 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1471 if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
1472 printf("\n8 and 16 bit: Ovector[%d] value differs(J8:%d,I8:%d,J16:%d,I16:%d): [%d] '%s' @ '%s' \n",
1473 i, ovector8_1[i], ovector8_2[i], ovector16_1[i], ovector16_2[i],
1474 total, current->pattern, current->input);
1475 is_successful = 0;
1476 }
1477 #endif
1478 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE32
1479 if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector32_1[i] || ovector8_1[i] != ovector32_2[i]) {
1480 printf("\n8 and 32 bit: Ovector[%d] value differs(J8:%d,I8:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
1481 i, ovector8_1[i], ovector8_2[i], ovector32_1[i], ovector32_2[i],
1482 total, current->pattern, current->input);
1483 is_successful = 0;
1484 }
1485 #endif
1486 #if defined SUPPORT_PCRE16 && defined SUPPORT_PCRE16
1487 if (ovector16_1[i] != ovector16_2[i] || ovector16_1[i] != ovector16_1[i] || ovector16_1[i] != ovector16_2[i]) {
1488 printf("\n16 and 16 bit: Ovector[%d] value differs(J16:%d,I16:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
1489 i, ovector16_1[i], ovector16_2[i], ovector16_1[i], ovector16_2[i],
1490 total, current->pattern, current->input);
1491 is_successful = 0;
1492 }
1493 #endif
1494 }
1495 }
1496 } else
1497 #endif /* more than one of SUPPORT_PCRE8, SUPPORT_PCRE16 and SUPPORT_PCRE32 */
1498 {
1499 /* Only the 8 bit and 16 bit results must be equal. */
1500 #ifdef SUPPORT_PCRE8
1501 if (return_value8[0] != return_value8[1]) {
1502 printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1503 return_value8[0], return_value8[1], total, current->pattern, current->input);
1504 is_successful = 0;
1505 } else if (return_value8[0] >= 0 || return_value8[0] == PCRE_ERROR_PARTIAL) {
1506 if (return_value8[0] == PCRE_ERROR_PARTIAL)
1507 return_value8[0] = 2;
1508 else
1509 return_value8[0] *= 2;
1510
1511 for (i = 0; i < return_value8[0]; ++i)
1512 if (ovector8_1[i] != ovector8_2[i]) {
1513 printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1514 i, ovector8_1[i], ovector8_2[i], total, current->pattern, current->input);
1515 is_successful = 0;
1516 }
1517 }
1518 #endif
1519
1520 #ifdef SUPPORT_PCRE16
1521 if (return_value16[0] != return_value16[1]) {
1522 printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1523 return_value16[0], return_value16[1], total, current->pattern, current->input);
1524 is_successful = 0;
1525 } else if (return_value16[0] >= 0 || return_value16[0] == PCRE_ERROR_PARTIAL) {
1526 if (return_value16[0] == PCRE_ERROR_PARTIAL)
1527 return_value16[0] = 2;
1528 else
1529 return_value16[0] *= 2;
1530
1531 for (i = 0; i < return_value16[0]; ++i)
1532 if (ovector16_1[i] != ovector16_2[i]) {
1533 printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1534 i, ovector16_1[i], ovector16_2[i], total, current->pattern, current->input);
1535 is_successful = 0;
1536 }
1537 }
1538 #endif
1539
1540 #ifdef SUPPORT_PCRE32
1541 if (return_value32[0] != return_value32[1]) {
1542 printf("\n32 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1543 return_value32[0], return_value32[1], total, current->pattern, current->input);
1544 is_successful = 0;
1545 } else if (return_value32[0] >= 0 || return_value32[0] == PCRE_ERROR_PARTIAL) {
1546 if (return_value32[0] == PCRE_ERROR_PARTIAL)
1547 return_value32[0] = 2;
1548 else
1549 return_value32[0] *= 2;
1550
1551 for (i = 0; i < return_value32[0]; ++i)
1552 if (ovector32_1[i] != ovector32_2[i]) {
1553 printf("\n32 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1554 i, ovector32_1[i], ovector32_2[i], total, current->pattern, current->input);
1555 is_successful = 0;
1556 }
1557 }
1558 #endif
1559 }
1560 }
1561
1562 if (is_successful) {
1563 #ifdef SUPPORT_PCRE8
1564 if (!(current->start_offset & F_NO8) && ((utf && ucp) || is_ascii_input)) {
1565 if (return_value8[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1566 printf("8 bit: Test should match: [%d] '%s' @ '%s'\n",
1567 total, current->pattern, current->input);
1568 is_successful = 0;
1569 }
1570
1571 if (return_value8[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1572 printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n",
1573 total, current->pattern, current->input);
1574 is_successful = 0;
1575 }
1576 }
1577 #endif
1578 #ifdef SUPPORT_PCRE16
1579 if (!(current->start_offset & F_NO16) && ((utf && ucp) || is_ascii_input)) {
1580 if (return_value16[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1581 printf("16 bit: Test should match: [%d] '%s' @ '%s'\n",
1582 total, current->pattern, current->input);
1583 is_successful = 0;
1584 }
1585
1586 if (return_value16[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1587 printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n",
1588 total, current->pattern, current->input);
1589 is_successful = 0;
1590 }
1591 }
1592 #endif
1593 #ifdef SUPPORT_PCRE32
1594 if (!(current->start_offset & F_NO32) && ((utf && ucp) || is_ascii_input)) {
1595 if (return_value32[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1596 printf("32 bit: Test should match: [%d] '%s' @ '%s'\n",
1597 total, current->pattern, current->input);
1598 is_successful = 0;
1599 }
1600
1601 if (return_value32[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1602 printf("32 bit: Test should not match: [%d] '%s' @ '%s'\n",
1603 total, current->pattern, current->input);
1604 is_successful = 0;
1605 }
1606 }
1607 #endif
1608 }
1609
1610 if (is_successful) {
1611 #ifdef SUPPORT_PCRE8
1612 if (mark8_1 != mark8_2) {
1613 printf("8 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1614 total, current->pattern, current->input);
1615 is_successful = 0;
1616 }
1617 #endif
1618 #ifdef SUPPORT_PCRE16
1619 if (mark16_1 != mark16_2) {
1620 printf("16 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1621 total, current->pattern, current->input);
1622 is_successful = 0;
1623 }
1624 #endif
1625 #ifdef SUPPORT_PCRE32
1626 if (mark32_1 != mark32_2) {
1627 printf("32 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1628 total, current->pattern, current->input);
1629 is_successful = 0;
1630 }
1631 #endif
1632 }
1633
1634 #ifdef SUPPORT_PCRE8
1635 if (re8) {
1636 pcre_free_study(extra8);
1637 pcre_free(re8);
1638 }
1639 #endif
1640 #ifdef SUPPORT_PCRE16
1641 if (re16) {
1642 pcre16_free_study(extra16);
1643 pcre16_free(re16);
1644 }
1645 #endif
1646 #ifdef SUPPORT_PCRE32
1647 if (re32) {
1648 pcre32_free_study(extra32);
1649 pcre32_free(re32);
1650 }
1651 #endif
1652
1653 if (is_successful) {
1654 successful++;
1655 successful_row++;
1656 printf(".");
1657 if (successful_row >= 60) {
1658 successful_row = 0;
1659 printf("\n");
1660 }
1661 } else
1662 successful_row = 0;
1663
1664 fflush(stdout);
1665 current++;
1666 }
1667 tables(1);
1668 #ifdef SUPPORT_PCRE8
1669 setstack8(NULL);
1670 #endif
1671 #ifdef SUPPORT_PCRE16
1672 setstack16(NULL);
1673 #endif
1674 #ifdef SUPPORT_PCRE32
1675 setstack32(NULL);
1676 #endif
1677
1678 if (total == successful) {
1679 printf("\nAll JIT regression tests are successfully passed.\n");
1680 return 0;
1681 } else {
1682 printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
1683 return 1;
1684 }
1685 }
1686
1687 /* End of pcre_jit_test.c */

  ViewVC Help
Powered by ViewVC 1.1.5