/[pcre]/code/trunk/pcre_jit_test.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_test.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1308 - (show annotations)
Tue Apr 2 06:58:55 2013 UTC (6 years, 5 months ago) by zherczeg
File MIME type: text/plain
File size: 68844 byte(s)
Fix for recursions to preserve repeat counters.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Main Library written by Philip Hazel
9 Copyright (c) 1997-2012 University of Cambridge
10
11 This JIT compiler regression test program was written by Zoltan Herczeg
12 Copyright (c) 2010-2012
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include <stdio.h>
48 #include <string.h>
49 #include "pcre.h"
50
51
52 #include "pcre_internal.h"
53
54 #define PCRE_BUG 0x80000000
55
56 /*
57 Letter characters:
58 \xe6\x92\xad = 0x64ad = 25773 (kanji)
59 Non-letter characters:
60 \xc2\xa1 = 0xa1 = (Inverted Exclamation Mark)
61 \xf3\xa9\xb7\x80 = 0xe9dc0 = 957888
62 \xed\xa0\x80 = 55296 = 0xd800 (Invalid UTF character)
63 \xed\xb0\x80 = 56320 = 0xdc00 (Invalid UTF character)
64 Newlines:
65 \xc2\x85 = 0x85 = 133 (NExt Line = NEL)
66 \xe2\x80\xa8 = 0x2028 = 8232 (Line Separator)
67 Othercase pairs:
68 \xc3\xa9 = 0xe9 = 233 (e')
69 \xc3\x89 = 0xc9 = 201 (E')
70 \xc3\xa1 = 0xe1 = 225 (a')
71 \xc3\x81 = 0xc1 = 193 (A')
72 \xc8\xba = 0x23a = 570
73 \xe2\xb1\xa5 = 0x2c65 = 11365
74 \xe1\xbd\xb8 = 0x1f78 = 8056
75 \xe1\xbf\xb8 = 0x1ff8 = 8184
76 \xf0\x90\x90\x80 = 0x10400 = 66560
77 \xf0\x90\x90\xa8 = 0x10428 = 66600
78 Mark property:
79 \xcc\x8d = 0x30d = 781
80 Special:
81 \xdf\xbf = 0x7ff = 2047 (highest 2 byte character)
82 \xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character)
83 \xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character)
84 \xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character)
85 \xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character)
86 */
87
88 static int regression_tests(void);
89
90 int main(void)
91 {
92 int jit = 0;
93 #if defined SUPPORT_PCRE8
94 pcre_config(PCRE_CONFIG_JIT, &jit);
95 #elif defined SUPPORT_PCRE16
96 pcre16_config(PCRE_CONFIG_JIT, &jit);
97 #elif defined SUPPORT_PCRE32
98 pcre32_config(PCRE_CONFIG_JIT, &jit);
99 #endif
100 if (!jit) {
101 printf("JIT must be enabled to run pcre_jit_test\n");
102 return 1;
103 }
104 return regression_tests();
105 }
106
107 /* --------------------------------------------------------------------------------------- */
108
109 #if !(defined SUPPORT_PCRE8) && !(defined SUPPORT_PCRE16) && !(defined SUPPORT_PCRE32)
110 #error SUPPORT_PCRE8 or SUPPORT_PCRE16 or SUPPORT_PCRE32 must be defined
111 #endif
112
113 #define MUA (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
114 #define MUAP (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
115 #define CMUA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
116 #define CMUAP (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
117 #define MA (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
118 #define MAP (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
119 #define CMA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
120
121 #define OFFSET_MASK 0x00ffff
122 #define F_NO8 0x010000
123 #define F_NO16 0x020000
124 #define F_NO32 0x020000
125 #define F_NOMATCH 0x040000
126 #define F_DIFF 0x080000
127 #define F_FORCECONV 0x100000
128 #define F_PROPERTY 0x200000
129 #define F_STUDY 0x400000
130
131 struct regression_test_case {
132 int flags;
133 int start_offset;
134 const char *pattern;
135 const char *input;
136 };
137
138 static struct regression_test_case regression_test_cases[] = {
139 /* Constant strings. */
140 { MUA, 0, "AbC", "AbAbC" },
141 { MUA, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
142 { CMUA, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
143 { MA, 0, "[^a]", "aAbB" },
144 { CMA, 0, "[^m]", "mMnN" },
145 { MA, 0, "a[^b][^#]", "abacd" },
146 { CMA, 0, "A[^B][^E]", "abacd" },
147 { CMUA, 0, "[^x][^#]", "XxBll" },
148 { MUA, 0, "[^a]", "aaa\xc3\xa1#Ab" },
149 { CMUA, 0, "[^A]", "aA\xe6\x92\xad" },
150 { MUA, 0, "\\W(\\W)?\\w", "\r\n+bc" },
151 { MUA, 0, "\\W(\\W)?\\w", "\n\r+bc" },
152 { MUA, 0, "\\W(\\W)?\\w", "\r\r+bc" },
153 { MUA, 0, "\\W(\\W)?\\w", "\n\n+bc" },
154 { MUA, 0, "[axd]", "sAXd" },
155 { CMUA, 0, "[axd]", "sAXd" },
156 { CMUA, 0 | F_NOMATCH, "[^axd]", "DxA" },
157 { MUA, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
158 { MUA, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
159 { CMUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
160 { MUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
161 { MUA, 0, "[^a]", "\xc2\x80[]" },
162 { CMUA, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
163 { CMA, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
164 { PCRE_CASELESS, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
165 { PCRE_CASELESS, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
166 { PCRE_CASELESS, 0, "a1", "Aa1" },
167 { MA, 0, "\\Ca", "cda" },
168 { CMA, 0, "\\Ca", "CDA" },
169 { MA, 0 | F_NOMATCH, "\\Cx", "cda" },
170 { CMA, 0 | F_NOMATCH, "\\Cx", "CDA" },
171 { CMUAP, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
172 { CMUAP, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
173 { CMUAP, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
174 { CMUAP, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
175
176 /* Assertions. */
177 { MUA, 0, "\\b[^A]", "A_B#" },
178 { MA, 0 | F_NOMATCH, "\\b\\W", "\n*" },
179 { MUA, 0, "\\B[^,]\\b[^s]\\b", "#X" },
180 { MAP, 0, "\\B", "_\xa1" },
181 { MAP, 0, "\\b_\\b[,A]\\B", "_," },
182 { MUAP, 0, "\\b", "\xe6\x92\xad!" },
183 { MUAP, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
184 { MUAP, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
185 { MUAP, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
186 { MUA, 0 | F_NOMATCH, "\\b.", "\xcd\xbe" },
187 { CMUAP, 0, "\\By", "\xf0\x90\x90\xa8y" },
188 { MA, 0 | F_NOMATCH, "\\R^", "\n" },
189 { MA, 1 | F_NOMATCH, "^", "\n" },
190 { 0, 0, "^ab", "ab" },
191 { 0, 0 | F_NOMATCH, "^ab", "aab" },
192 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "^a", "\r\raa\n\naa\r\naa" },
193 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
194 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^-", "a--b--\x85--" },
195 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xe2\x80\xa8--" },
196 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xc2\x85--" },
197 { 0, 0, "ab$", "ab" },
198 { 0, 0 | F_NOMATCH, "ab$", "abab\n\n" },
199 { PCRE_DOLLAR_ENDONLY, 0 | F_NOMATCH, "ab$", "abab\r\n" },
200 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "a$", "\r\raa\n\naa\r\naa" },
201 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aaa" },
202 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "#$", "#\xc2\x85###\r#" },
203 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "#$", "#\xe2\x80\xa9" },
204 { PCRE_NOTBOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "^a", "aa\naa" },
205 { PCRE_NOTBOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },
206 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\naa" },
207 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\r\n" },
208 { PCRE_UTF8 | PCRE_DOLLAR_ENDONLY | PCRE_NEWLINE_ANY, 0 | F_PROPERTY, "\\p{Any}{2,}$", "aa\r\n" },
209 { PCRE_NOTEOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },
210 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa" },
211 { PCRE_NEWLINE_CR | PCRE_UTF8, 0, "a\\Z", "aaa\r" },
212 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa\n" },
213 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r" },
214 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
215 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r\n" },
216 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
217 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
218 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
219 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
220 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
221 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
222 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
223 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
224 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
225 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xc2\x85" },
226 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
227 { MA, 0, "\\Aa", "aaa" },
228 { MA, 1 | F_NOMATCH, "\\Aa", "aaa" },
229 { MA, 1, "\\Ga", "aaa" },
230 { MA, 1 | F_NOMATCH, "\\Ga", "aba" },
231 { MA, 0, "a\\z", "aaa" },
232 { MA, 0 | F_NOMATCH, "a\\z", "aab" },
233
234 /* Brackets. */
235 { MUA, 0, "(ab|bb|cd)", "bacde" },
236 { MUA, 0, "(?:ab|a)(bc|c)", "ababc" },
237 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
238 { CMUA, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
239 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
240 { MUA, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
241
242 /* Greedy and non-greedy ? operators. */
243 { MUA, 0, "(?:a)?a", "laab" },
244 { CMUA, 0, "(A)?A", "llaab" },
245 { MUA, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
246 { MUA, 0, "(a)?a", "manm" },
247 { CMUA, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
248 { MUA, 0, "(a|b)?\?d((?:e)?)", "abcde" },
249 { MUA, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
250
251 /* Greedy and non-greedy + operators */
252 { MUA, 0, "(aa)+aa", "aaaaaaa" },
253 { MUA, 0, "(aa)+?aa", "aaaaaaa" },
254 { MUA, 0, "(?:aba|ab|a)+l", "ababamababal" },
255 { MUA, 0, "(?:aba|ab|a)+?l", "ababamababal" },
256 { MUA, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
257 { MUA, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
258 { MUA, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
259
260 /* Greedy and non-greedy * operators */
261 { CMUA, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
262 { MUA, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
263 { MUA, 0, "(aa|ab)*ab", "aaabaaab" },
264 { CMUA, 0, "(aa|Ab)*?aB", "aaabaaab" },
265 { MUA, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
266 { MUA, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
267 { MA, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
268 { MA, 0, "((?:a|)*){0}a", "a" },
269
270 /* Combining ? + * operators */
271 { MUA, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
272 { MUA, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
273 { MUA, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
274 { MUA, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
275 { MUA, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
276
277 /* Single character iterators. */
278 { MUA, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
279 { MUA, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
280 { MUA, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
281 { MUA, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
282 { MUA, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
283 { MUA, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
284 { MUA, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
285 { MUA, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
286 { MUA, 0, "(ba{2})+c", "baabaaabacbaabaac" },
287 { MUA, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
288 { MUA, 0, "(a?+[^b])+", "babaacacb" },
289 { MUA, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
290 { CMUA, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
291 { CMUA, 0, "[c-f]+k", "DemmFke" },
292 { MUA, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
293 { MUA, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
294 { CMUA, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
295 { CMUA, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
296 { CMUA, 0, "[ace]{3,}", "AcbDAcEEcEd" },
297 { CMUA, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
298 { MUA, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
299 { CMUA, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
300 { MUA, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
301 { MUA, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
302 { MUA, 0, "\\b\\w+\\B", "x,a_cd" },
303 { MUAP, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
304 { CMUA, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
305 { CMUAP, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" },
306 { CMUA, 0, "[^\xf0\x90\x90\x80]{2,4}@", "\xf0\x90\x90\xa8\xf0\x90\x90\x80###\xf0\x90\x90\x80@@@" },
307 { CMUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
308 { MUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
309 { MUA, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" },
310
311 /* Bracket repeats with limit. */
312 { MUA, 0, "(?:(ab){2}){5}M", "abababababababababababM" },
313 { MUA, 0, "(?:ab|abab){1,5}M", "abababababababababababM" },
314 { MUA, 0, "(?>ab|abab){1,5}M", "abababababababababababM" },
315 { MUA, 0, "(?:ab|abab){1,5}?M", "abababababababababababM" },
316 { MUA, 0, "(?>ab|abab){1,5}?M", "abababababababababababM" },
317 { MUA, 0, "(?:(ab){1,4}?){1,3}?M", "abababababababababababababM" },
318 { MUA, 0, "(?:(ab){1,4}){1,3}abababababababababababM", "ababababababababababababM" },
319 { MUA, 0 | F_NOMATCH, "(?:(ab){1,4}){1,3}abababababababababababM", "abababababababababababM" },
320 { MUA, 0, "(ab){4,6}?M", "abababababababM" },
321
322 /* Basic character sets. */
323 { MUA, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
324 { MUA, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
325 { MUA, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
326 { MUA, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
327 { MUA, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
328 { MUA, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
329
330 /* Unicode properties. */
331 { MUAP, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
332 { MUAP, 0 | F_PROPERTY, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
333 { MUAP, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
334 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}]", "abc" },
335 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}]", "abc" },
336 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
337 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
338 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
339 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
340 { MUAP, 0 | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
341 { MUAP, 0 | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
342 { MUAP, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
343 { CMUAP, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
344 { MUAP, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
345 { MUAP, 0 | F_PROPERTY, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
346 { MUA, 0 | F_PROPERTY, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
347 { CMUAP, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
348 { MUAP, 0 | F_PROPERTY, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
349 { MUAP, 0 | F_PROPERTY, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
350 { PCRE_UCP, 0, "[a-b\\s]{2,5}[^a]", "AB baaa" },
351
352 /* Possible empty brackets. */
353 { MUA, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
354 { MUA, 0, "(|ab||bc|a)+d", "abcxabcabd" },
355 { MUA, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
356 { MUA, 0, "(|ab||bc|a)*d", "abcxabcabd" },
357 { MUA, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
358 { MUA, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
359 { MUA, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
360 { MUA, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
361 { MUA, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
362 { MUA, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
363
364 /* Start offset. */
365 { MUA, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
366 { MUA, 4 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
367 { MUA, 2 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
368 { MUA, 1, "(\\w\\W\\w)+", "ab#d" },
369
370 /* Newline. */
371 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
372 { PCRE_MULTILINE | PCRE_NEWLINE_CR, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
373 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{1,3}[^#]", "\r\n##...." },
374
375 /* Any character except newline or any newline. */
376 { PCRE_NEWLINE_CRLF, 0, ".", "\r" },
377 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
378 { PCRE_NEWLINE_ANYCRLF, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
379 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
380 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
381 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0 | F_NOMATCH, ".(.).", "\xe2\x80\xa8\nb\r" },
382 { PCRE_NEWLINE_ANY, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
383 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
384 { PCRE_BSR_ANYCRLF, 0, "\\R", "\r" },
385 { PCRE_BSR_ANYCRLF, 0, "\\R", "\x85#\r\n#" },
386 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\xe2\x80\xa8#c" },
387 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\r\nc" },
388 { PCRE_NEWLINE_CRLF | PCRE_BSR_UNICODE | PCRE_UTF8, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
389 { MUA, 0 | F_NOMATCH, "\\R+", "ab" },
390 { MUA, 0, "\\R+", "ab\r\n\r" },
391 { MUA, 0, "\\R*", "ab\r\n\r" },
392 { MUA, 0, "\\R*", "\r\n\r" },
393 { MUA, 0, "\\R{2,4}", "\r\nab\r\r" },
394 { MUA, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
395 { MUA, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
396 { MUA, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
397 { MUA, 0 | F_NOMATCH, "\\R+\\R\\R", "\r\n\r\n" },
398 { MUA, 0, "\\R+\\R\\R", "\r\r\r" },
399 { MUA, 0, "\\R*\\R\\R", "\n\r" },
400 { MUA, 0 | F_NOMATCH, "\\R{2,4}\\R\\R", "\r\r\r" },
401 { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
402
403 /* Atomic groups (no fallback from "next" direction). */
404 { MUA, 0 | F_NOMATCH, "(?>ab)ab", "bab" },
405 { MUA, 0 | F_NOMATCH, "(?>(ab))ab", "bab" },
406 { MUA, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
407 "bababcdedefgheijijklmlmnop" },
408 { MUA, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
409 { MUA, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
410 { MUA, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
411 { MUA, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
412 { MUA, 0, "((?>a|)+?)b", "aaacaaab" },
413 { MUA, 0, "(?>x|)*$", "aaa" },
414 { MUA, 0, "(?>(x)|)*$", "aaa" },
415 { MUA, 0, "(?>x|())*$", "aaa" },
416 { MUA, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
417 { MUA, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
418 { MUA, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
419 { MUA, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
420 { MUA, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
421 { MUA, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
422 { MUA, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
423 { MUA, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
424 { MUA, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
425 { MUA, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
426 { MUA, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
427 { MUA, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
428 { MUA, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
429 { MUA, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
430 { CMA, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
431 { MUA, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
432 { MUA, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
433 { MUA, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d" },
434 { MUA, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
435 { MUA, 0 | F_PROPERTY, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
436 { MUA, 0 | F_PROPERTY, "\\X{2,4}", "abcdef" },
437 { MUA, 0 | F_PROPERTY, "\\X{2,4}?", "abcdef" },
438 { MUA, 0 | F_NOMATCH | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d##" },
439 { MUA, 0 | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
440 { MUA, 0, "(c(ab)?+ab)+", "cabcababcab" },
441 { MUA, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
442
443 /* Possessive quantifiers. */
444 { MUA, 0, "(?:a|b)++m", "mababbaaxababbaam" },
445 { MUA, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
446 { MUA, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
447 { MUA, 0, "(a|b)++m", "mababbaaxababbaam" },
448 { MUA, 0, "(a|b)*+m", "mababbaaxababbaam" },
449 { MUA, 0, "(a|b)*+m", "ababbaaxababbaam" },
450 { MUA, 0, "(a|b(*ACCEPT))++m", "maaxab" },
451 { MUA, 0, "(?:b*)++m", "bxbbxbbbxm" },
452 { MUA, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
453 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxm" },
454 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
455 { MUA, 0, "(b*)++m", "bxbbxbbbxm" },
456 { MUA, 0, "(b*)++m", "bxbbxbbbxbbm" },
457 { MUA, 0, "(b*)*+m", "bxbbxbbbxm" },
458 { MUA, 0, "(b*)*+m", "bxbbxbbbxbbm" },
459 { MUA, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
460 { MUA, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
461 { MUA, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
462 { MUA, 0, "(a|(b))++m", "mababbaaxababbaam" },
463 { MUA, 0, "((a)|b)*+m", "mababbaaxababbaam" },
464 { MUA, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
465 { MUA, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
466 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxm" },
467 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
468 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
469 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
470 { MUA, 0, "((b*))++m", "bxbbxbbbxm" },
471 { MUA, 0, "((b*))++m", "bxbbxbbbxbbm" },
472 { MUA, 0, "((b*))*+m", "bxbbxbbbxm" },
473 { MUA, 0, "((b*))*+m", "bxbbxbbbxbbm" },
474 { MUA, 0 | F_NOMATCH, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
475 { MUA, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
476 { MUA, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
477 { MUA, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
478 { MUA, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
479
480 /* Back references. */
481 { MUA, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
482 { CMUA, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
483 { CMA, 0, "(a{2,4})\\1", "AaAaaAaA" },
484 { MUA, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
485 { MUA, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
486 { MUA, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
487 { MUA, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
488 { MUA, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
489 { MUA, 0, "(?:(aa)|b)\\1?b", "bb" },
490 { CMUA, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
491 { MUA, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
492 { CMUA, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
493 { MUA, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
494 { CMA, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
495 { MUA, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
496 { MUA, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
497 { MA, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
498 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
499 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{0,2}", "wwwww." },
500 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwww" },
501 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwwww" },
502 { PCRE_UCP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
503 { CMUAP, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" },
504
505 /* Assertions. */
506 { MUA, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
507 { MUA, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
508 { MUA, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
509 { MUA, 0, "(?<=aaa|aa|a)a", "aaa" },
510 { MUA, 2, "(?<=aaa|aa|a)a", "aaa" },
511 { MA, 0, "(?<=aaa|aa|a)a", "aaa" },
512 { MA, 2, "(?<=aaa|aa|a)a", "aaa" },
513 { MUA, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
514 { MUA, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
515 { MUA, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
516 { MUA, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
517 { MUA, 0, "((?(?=(a))a)+k)", "bbak" },
518 { MUA, 0, "((?(?=a)a)+k)", "bbak" },
519 { MUA, 0 | F_NOMATCH, "(?=(?>(a))m)amk", "a k" },
520 { MUA, 0 | F_NOMATCH, "(?!(?>(a))m)amk", "a k" },
521 { MUA, 0 | F_NOMATCH, "(?>(?=(a))am)amk", "a k" },
522 { MUA, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
523 { MUA, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
524 { MUA, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
525 { MUA, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
526 { MUA, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
527 { MUA, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
528 { MUA, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
529 { MUA, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
530
531 /* Not empty, ACCEPT, FAIL */
532 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*", "bcx" },
533 { MUA | PCRE_NOTEMPTY, 0, "a*", "bcaad" },
534 { MUA | PCRE_NOTEMPTY, 0, "a*?", "bcaad" },
535 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
536 { MUA, 0, "a(*ACCEPT)b", "ab" },
537 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*(*ACCEPT)b", "bcx" },
538 { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
539 { MUA | PCRE_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
540 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "(?:z|a*(*ACCEPT)b)", "bcx" },
541 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
542 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
543 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
544 { MUA | PCRE_NOTEMPTY_ATSTART, 0 | F_NOMATCH, "a*(*ACCEPT)b", "" },
545 { MUA, 0, "((a(*ACCEPT)b))", "ab" },
546 { MUA, 0, "(a(*FAIL)a|a)", "aaa" },
547 { MUA, 0, "(?=ab(*ACCEPT)b)a", "ab" },
548 { MUA, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
549 { MUA, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
550 { MUA | PCRE_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
551
552 /* Conditional blocks. */
553 { MUA, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
554 { MUA, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
555 { MUA, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
556 { MUA, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
557 { MUA, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
558 { MUA, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
559 { MUA, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
560 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
561 { MUA, 0 | F_DIFF, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
562 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
563 { MUA, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
564 { MUA, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
565 { MUA, 0, "(?(?=a)ab)", "a" },
566 { MUA, 0, "(?(?<!b)c)", "b" },
567 { MUA, 0, "(?(DEFINE)a(b))", "a" },
568 { MUA, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
569 { MUA, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
570 { MUA, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
571 { MUA, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
572 { MUA, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
573 { MUA, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
574 { MUA, 0, "(c)?\?(?(1)a|b)", "cbb" },
575 { MUA, 0 | F_DIFF, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
576 { MUA, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
577 { MUA, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
578 { MUA, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
579 { MUA, 0 | F_DIFF, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
580 { MUA, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
581 { MUA, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
582 { MUA, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
583 { MUA, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
584 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)*l", "bc ddd abccabccl" },
585 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+?dd", "bcabcacdb bdddd" },
586 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+l", "ababccddabdbccd abcccl" },
587 { MUA, 0, "((?:a|aa)(?(1)aaa))x", "aax" },
588
589 /* Set start of match. */
590 { MUA, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
591 { MUA, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
592 { MUA, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
593 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a\\K(*ACCEPT)b", "aa" },
594 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
595
596 /* First line. */
597 { MUA | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}a", "bb\naaa" },
598 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}a", "bb\r\naaa" },
599 { MUA | PCRE_FIRSTLINE, 0, "(?<=a)", "a" },
600 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[^a][^b]", "ab" },
601 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "a", "\na" },
602 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[abc]", "\na" },
603 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^a", "\na" },
604 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^(?<=\n)", "\na" },
605 { MUA | PCRE_FIRSTLINE, 0, "\xf0\x90\x90\x80", "\xf0\x90\x90\x80" },
606 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\xc2\x85#" },
607 { PCRE_MULTILINE | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\x85#" },
608 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^#", "\xe2\x80\xa8#" },
609 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}", "\r\na" },
610 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, ".", "\r" },
611 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "a", "\ra" },
612 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH, "ba", "bbb\r\nba" },
613 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}{4}|a", "\r\na" },
614 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 1, ".", "\r\n" },
615 { PCRE_FIRSTLINE | PCRE_NEWLINE_LF | PCRE_DOTALL, 0 | F_NOMATCH, "ab.", "ab" },
616
617 /* Recurse. */
618 { MUA, 0, "(a)(?1)", "aa" },
619 { MUA, 0, "((a))(?1)", "aa" },
620 { MUA, 0, "(b|a)(?1)", "aa" },
621 { MUA, 0, "(b|(a))(?1)", "aa" },
622 { MUA, 0 | F_NOMATCH, "((a)(b)(?:a*))(?1)", "aba" },
623 { MUA, 0, "((a)(b)(?:a*))(?1)", "abab" },
624 { MUA, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
625 { MUA, 0, "((?2)b|(a)){2}(?1)", "aabab" },
626 { MUA, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
627 { MUA, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
628 { MUA, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
629 { MUA, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
630 { MUA, 0 | F_NOMATCH, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
631 { MUA, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
632 { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
633 { MUA, 0 | F_NOMATCH, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
634 { MUA, 0, "b|<(?R)*>", "<<b>" },
635 { MUA, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
636 { MUA, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
637 { MUA, 0, "(a)((?(R)a|b))(?2)", "aabbabaa" },
638 { MUA, 0, "(a)((?(R2)a|b))(?2)", "aabbabaa" },
639 { MUA, 0, "(a)((?(R1)a|b))(?2)", "ababba" },
640 { MUA, 0, "(?(R0)aa|bb(?R))", "abba aabb bbaa" },
641 { MUA, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" },
642 { MUA, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" },
643 { MUA, 0, "((?(R)a|(?1)){3})", "XaaaaaaaaaX" },
644 { MUA, 0, "((?:(?(R)a|(?1))){3})", "XaaaaaaaaaX" },
645 { MUA, 0, "((?(R)a|(?1)){1,3})aaaaaa", "aaaaaaaaXaaaaaaaaa" },
646 { MUA, 0, "((?(R)a|(?1)){1,3}?)M", "aaaM" },
647
648 /* 16 bit specific tests. */
649 { CMA, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" },
650 { CMA, 0 | F_FORCECONV, "\xe1\xbd\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
651 { CMA, 0 | F_FORCECONV, "[\xc3\xa1]", "\xc3\x81\xc3\xa1" },
652 { CMA, 0 | F_FORCECONV, "[\xe1\xbd\xb8]", "\xe1\xbf\xb8\xe1\xbd\xb8" },
653 { CMA, 0 | F_FORCECONV, "[a-\xed\xb0\x80]", "A" },
654 { CMA, 0 | F_NO8 | F_FORCECONV, "[a-\\x{dc00}]", "B" },
655 { CMA, 0 | F_NO8 | F_NOMATCH | F_FORCECONV, "[b-\\x{dc00}]", "a" },
656 { CMA, 0 | F_NO8 | F_FORCECONV, "\xed\xa0\x80\\x{d800}\xed\xb0\x80\\x{dc00}", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80" },
657 { CMA, 0 | F_NO8 | F_FORCECONV, "[\xed\xa0\x80\\x{d800}]{1,2}?[\xed\xb0\x80\\x{dc00}]{1,2}?#", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80#" },
658 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80\xed\xb0\x80#]{0,3}(?<=\xed\xb0\x80.)", "\xed\xa0\x80#\xed\xa0\x80##\xed\xb0\x80\xed\xa0\x80" },
659 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\x9f\xbf\xed\xa0\x83" },
660 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\xb4\x80\xed\xb3\xb0" },
661 { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" },
662 { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" },
663 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" },
664 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80][\xed\xb0\x80]{2,}", "\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" },
665 { MA, 0 | F_FORCECONV, "[^\xed\xb0\x80]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
666 { MA, 0 | F_NO8 | F_FORCECONV, "[^\\x{dc00}]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
667 { CMA, 0 | F_FORCECONV, ".\\B.", "\xed\xa0\x80\xed\xb0\x80" },
668 { CMA, 0 | F_FORCECONV, "\\D+(?:\\d+|.)\\S+(?:\\s+|.)\\W+(?:\\w+|.)\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80" },
669 { CMA, 0 | F_FORCECONV, "\\d*\\s*\\w*\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80" },
670 { CMA, 0 | F_FORCECONV | F_NOMATCH, "\\d*?\\D*?\\s*?\\S*?\\w*?\\W*?##", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80#" },
671 { CMA | PCRE_EXTENDED, 0 | F_FORCECONV, "\xed\xa0\x80 \xed\xb0\x80 !", "\xed\xa0\x80\xed\xb0\x80!" },
672 { CMA, 0 | F_FORCECONV, "\xed\xa0\x80+#[^#]+\xed\xa0\x80", "\xed\xa0\x80#a\xed\xa0\x80" },
673 { CMA, 0 | F_FORCECONV, "(\xed\xa0\x80+)#\\1", "\xed\xa0\x80\xed\xa0\x80#\xed\xa0\x80\xed\xa0\x80" },
674 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0 | F_NO8 | F_FORCECONV, "^-", "a--\xe2\x80\xa8--" },
675 { PCRE_BSR_UNICODE, 0 | F_NO8 | F_FORCECONV, "\\R", "ab\xe2\x80\xa8" },
676 { 0, 0 | F_NO8 | F_FORCECONV, "\\v", "ab\xe2\x80\xa9" },
677 { 0, 0 | F_NO8 | F_FORCECONV, "\\h", "ab\xe1\xa0\x8e" },
678 { 0, 0 | F_NO8 | F_FORCECONV, "\\v+?\\V+?#", "\xe2\x80\xa9\xe2\x80\xa9\xef\xbf\xbf\xef\xbf\xbf#" },
679 { 0, 0 | F_NO8 | F_FORCECONV, "\\h+?\\H+?#", "\xe1\xa0\x8e\xe1\xa0\x8e\xef\xbf\xbf\xef\xbf\xbf#" },
680
681 /* Partial matching. */
682 { MUA | PCRE_PARTIAL_SOFT, 0, "ab", "a" },
683 { MUA | PCRE_PARTIAL_SOFT, 0, "ab|a", "a" },
684 { MUA | PCRE_PARTIAL_HARD, 0, "ab|a", "a" },
685 { MUA | PCRE_PARTIAL_SOFT, 0, "\\b#", "a" },
686 { MUA | PCRE_PARTIAL_SOFT, 0, "(?<=a)b", "a" },
687 { MUA | PCRE_PARTIAL_SOFT, 0, "abc|(?<=xxa)bc", "xxab" },
688 { MUA | PCRE_PARTIAL_SOFT, 0, "a\\B", "a" },
689 { MUA | PCRE_PARTIAL_HARD, 0, "a\\b", "a" },
690
691 /* (*MARK) verb. */
692 { MUA, 0, "a(*MARK:aa)a", "ababaa" },
693 { MUA, 0 | F_NOMATCH, "a(*:aa)a", "abab" },
694 { MUA, 0, "a(*:aa)(b(*:bb)b|bc)", "abc" },
695 { MUA, 0 | F_NOMATCH, "a(*:1)x|b(*:2)y", "abc" },
696 { MUA, 0, "(?>a(*:aa))b|ac", "ac" },
697 { MUA, 0, "(?(DEFINE)(a(*:aa)))(?1)", "a" },
698 { MUA, 0 | F_NOMATCH, "(?(DEFINE)((a)(*:aa)))(?1)b", "aa" },
699 { MUA, 0, "(?(DEFINE)(a(*:aa)))a(?1)b|aac", "aac" },
700 { MUA, 0, "(a(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
701 { MUA, 0, "(a(*:aa)){0}(?:b(?1)b)+", "babba" },
702 { MUA, 0 | F_NOMATCH | F_STUDY, "(a(*:aa)){0}(?:b(?1)b)+", "ba" },
703 { MUA, 0, "(a\\K(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
704 { MUA, 0, "(a\\K(*:aa)){0}(?:b(?1)b)+", "babba" },
705 { MUA, 0 | F_NOMATCH | F_STUDY, "(a\\K(*:aa)){0}(?:b(?1)b)+", "ba" },
706 { MUA, 0 | F_NOMATCH | F_STUDY, "(*:mark)m", "a" },
707
708 /* (*COMMIT) verb. */
709 { MUA, 0 | F_NOMATCH, "a(*COMMIT)b", "ac" },
710 { MUA, 0, "aa(*COMMIT)b", "xaxaab" },
711 { MUA, 0 | F_NOMATCH, "a(*COMMIT)(*:msg)b|ac", "ac" },
712 { MUA, 0 | F_NOMATCH, "(a(*COMMIT)b)++", "abac" },
713 { MUA, 0 | F_NOMATCH, "((a)(*COMMIT)b)++", "abac" },
714 { MUA, 0 | F_NOMATCH, "(?=a(*COMMIT)b)ab|ad", "ad" },
715
716 /* (*PRUNE) verb. */
717 { MUA, 0, "aa\\K(*PRUNE)b", "aaab" },
718 { MUA, 0, "aa(*PRUNE:bb)b|a", "aa" },
719 { MUA, 0, "(a)(a)(*PRUNE)b|(a)", "aa" },
720 { MUA, 0, "(a)(a)(a)(a)(a)(a)(a)(a)(*PRUNE)b|(a)", "aaaaaaaa" },
721 { MUA | PCRE_PARTIAL_SOFT, 0, "a(*PRUNE)a|", "a" },
722 { MUA | PCRE_PARTIAL_SOFT, 0, "a(*PRUNE)a|m", "a" },
723 { MUA, 0 | F_NOMATCH, "(?=a(*PRUNE)b)ab|ad", "ad" },
724 { MUA, 0, "a(*COMMIT)(*PRUNE)d|bc", "abc" },
725 { MUA, 0, "(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
726 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
727 { MUA, 0, "(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
728 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
729 { MUA, 0, "(a(*COMMIT)b){0}a(?1)(*PRUNE)c|bc", "abc" },
730 { MUA, 0 | F_NOMATCH, "(a(*COMMIT)b){0}a(*COMMIT)(?1)(*PRUNE)c|bc", "abc" },
731 { MUA, 0, "(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
732 { MUA, 0 | F_NOMATCH, "(*COMMIT)(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
733 { MUA, 0, "((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
734 { MUA, 0 | F_NOMATCH, "(*COMMIT)((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
735 { MUA, 0, "(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" },
736 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" },
737 { MUA, 0, "(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" },
738 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" },
739 { MUA, 0, "(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" },
740 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" },
741 { MUA, 0, "(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" },
742 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" },
743 { MUA, 0, "(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" },
744 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" },
745 { MUA, 0, "(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" },
746 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" },
747
748 /* (*SKIP) verb. */
749 { MUA, 0 | F_NOMATCH, "(?=a(*SKIP)b)ab|ad", "ad" },
750
751 /* (*THEN) verb. */
752 { MUA, 0, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcaabcaabcaabcnacm" },
753 { MUA, 0 | F_NOMATCH, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcm" },
754 { MUA, 0, "((?:a(*THEN)|aab)c|a+)+m", "aabcaabcnmaabcaabcm" },
755 { MUA, 0, "((?:a|aab)(*THEN)c|a+)+m", "aam" },
756 { MUA, 0, "((?:a(*COMMIT)|aab)(*THEN)c|a+)+m", "aam" },
757 { MUA, 0, "(?(?=a(*THEN)b)ab|ad)", "ad" },
758 { MUA, 0, "(?(?!a(*THEN)b)ad|add)", "add" },
759 { MUA, 0 | F_NOMATCH, "(?(?=a)a(*THEN)b|ad)", "ad" },
760 { MUA, 0, "(?!(?(?=a)ab|b(*THEN)d))bn|bnn", "bnn" },
761
762 /* Deep recursion. */
763 { MUA, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
764 { MUA, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
765 { MUA, 0, "((a?)+)+b", "aaaaaaaaaaaa b" },
766
767 /* Deep recursion: Stack limit reached. */
768 { MA, 0 | F_NOMATCH, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
769 { MA, 0 | F_NOMATCH, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
770 { MA, 0 | F_NOMATCH, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
771 { MA, 0 | F_NOMATCH, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
772 { MA, 0 | F_NOMATCH, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
773
774 { 0, 0, NULL, NULL }
775 };
776
777 static const unsigned char *tables(int mode)
778 {
779 /* The purpose of this function to allow valgrind
780 for reporting invalid reads and writes. */
781 static unsigned char *tables_copy;
782 const char *errorptr;
783 int erroroffset;
784 unsigned char *default_tables;
785 #if defined SUPPORT_PCRE8
786 pcre *regex;
787 char null_str[1] = { 0 };
788 #elif defined SUPPORT_PCRE16
789 pcre16 *regex;
790 PCRE_UCHAR16 null_str[1] = { 0 };
791 #elif defined SUPPORT_PCRE32
792 pcre32 *regex;
793 PCRE_UCHAR32 null_str[1] = { 0 };
794 #endif
795
796 if (mode) {
797 if (tables_copy)
798 free(tables_copy);
799 tables_copy = NULL;
800 return NULL;
801 }
802
803 if (tables_copy)
804 return tables_copy;
805
806 default_tables = NULL;
807 #if defined SUPPORT_PCRE8
808 regex = pcre_compile(null_str, 0, &errorptr, &erroroffset, NULL);
809 if (regex) {
810 pcre_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
811 pcre_free(regex);
812 }
813 #elif defined SUPPORT_PCRE16
814 regex = pcre16_compile(null_str, 0, &errorptr, &erroroffset, NULL);
815 if (regex) {
816 pcre16_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
817 pcre16_free(regex);
818 }
819 #elif defined SUPPORT_PCRE32
820 regex = pcre32_compile(null_str, 0, &errorptr, &erroroffset, NULL);
821 if (regex) {
822 pcre32_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
823 pcre32_free(regex);
824 }
825 #endif
826 /* Shouldn't ever happen. */
827 if (!default_tables)
828 return NULL;
829
830 /* Unfortunately this value cannot get from pcre_fullinfo.
831 Since this is a test program, this is acceptable at the moment. */
832 tables_copy = (unsigned char *)malloc(1088);
833 if (!tables_copy)
834 return NULL;
835
836 memcpy(tables_copy, default_tables, 1088);
837 return tables_copy;
838 }
839
840 #ifdef SUPPORT_PCRE8
841 static pcre_jit_stack* callback8(void *arg)
842 {
843 return (pcre_jit_stack *)arg;
844 }
845 #endif
846
847 #ifdef SUPPORT_PCRE16
848 static pcre16_jit_stack* callback16(void *arg)
849 {
850 return (pcre16_jit_stack *)arg;
851 }
852 #endif
853
854 #ifdef SUPPORT_PCRE32
855 static pcre32_jit_stack* callback32(void *arg)
856 {
857 return (pcre32_jit_stack *)arg;
858 }
859 #endif
860
861 #ifdef SUPPORT_PCRE8
862 static pcre_jit_stack *stack8;
863
864 static pcre_jit_stack *getstack8(void)
865 {
866 if (!stack8)
867 stack8 = pcre_jit_stack_alloc(1, 1024 * 1024);
868 return stack8;
869 }
870
871 static void setstack8(pcre_extra *extra)
872 {
873 if (!extra) {
874 if (stack8)
875 pcre_jit_stack_free(stack8);
876 stack8 = NULL;
877 return;
878 }
879
880 pcre_assign_jit_stack(extra, callback8, getstack8());
881 }
882 #endif /* SUPPORT_PCRE8 */
883
884 #ifdef SUPPORT_PCRE16
885 static pcre16_jit_stack *stack16;
886
887 static pcre16_jit_stack *getstack16(void)
888 {
889 if (!stack16)
890 stack16 = pcre16_jit_stack_alloc(1, 1024 * 1024);
891 return stack16;
892 }
893
894 static void setstack16(pcre16_extra *extra)
895 {
896 if (!extra) {
897 if (stack16)
898 pcre16_jit_stack_free(stack16);
899 stack16 = NULL;
900 return;
901 }
902
903 pcre16_assign_jit_stack(extra, callback16, getstack16());
904 }
905 #endif /* SUPPORT_PCRE8 */
906
907 #ifdef SUPPORT_PCRE32
908 static pcre32_jit_stack *stack32;
909
910 static pcre32_jit_stack *getstack32(void)
911 {
912 if (!stack32)
913 stack32 = pcre32_jit_stack_alloc(1, 1024 * 1024);
914 return stack32;
915 }
916
917 static void setstack32(pcre32_extra *extra)
918 {
919 if (!extra) {
920 if (stack32)
921 pcre32_jit_stack_free(stack32);
922 stack32 = NULL;
923 return;
924 }
925
926 pcre32_assign_jit_stack(extra, callback32, getstack32());
927 }
928 #endif /* SUPPORT_PCRE8 */
929
930 #ifdef SUPPORT_PCRE16
931
932 static int convert_utf8_to_utf16(const char *input, PCRE_UCHAR16 *output, int *offsetmap, int max_length)
933 {
934 unsigned char *iptr = (unsigned char*)input;
935 PCRE_UCHAR16 *optr = output;
936 unsigned int c;
937
938 if (max_length == 0)
939 return 0;
940
941 while (*iptr && max_length > 1) {
942 c = 0;
943 if (offsetmap)
944 *offsetmap++ = (int)(iptr - (unsigned char*)input);
945
946 if (!(*iptr & 0x80))
947 c = *iptr++;
948 else if (!(*iptr & 0x20)) {
949 c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
950 iptr += 2;
951 } else if (!(*iptr & 0x10)) {
952 c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
953 iptr += 3;
954 } else if (!(*iptr & 0x08)) {
955 c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
956 iptr += 4;
957 }
958
959 if (c < 65536) {
960 *optr++ = c;
961 max_length--;
962 } else if (max_length <= 2) {
963 *optr = '\0';
964 return (int)(optr - output);
965 } else {
966 c -= 0x10000;
967 *optr++ = 0xd800 | ((c >> 10) & 0x3ff);
968 *optr++ = 0xdc00 | (c & 0x3ff);
969 max_length -= 2;
970 if (offsetmap)
971 offsetmap++;
972 }
973 }
974 if (offsetmap)
975 *offsetmap = (int)(iptr - (unsigned char*)input);
976 *optr = '\0';
977 return (int)(optr - output);
978 }
979
980 static int copy_char8_to_char16(const char *input, PCRE_UCHAR16 *output, int max_length)
981 {
982 unsigned char *iptr = (unsigned char*)input;
983 PCRE_UCHAR16 *optr = output;
984
985 if (max_length == 0)
986 return 0;
987
988 while (*iptr && max_length > 1) {
989 *optr++ = *iptr++;
990 max_length--;
991 }
992 *optr = '\0';
993 return (int)(optr - output);
994 }
995
996 #define REGTEST_MAX_LENGTH16 4096
997 static PCRE_UCHAR16 regtest_buf16[REGTEST_MAX_LENGTH16];
998 static int regtest_offsetmap16[REGTEST_MAX_LENGTH16];
999
1000 #endif /* SUPPORT_PCRE16 */
1001
1002 #ifdef SUPPORT_PCRE32
1003
1004 static int convert_utf8_to_utf32(const char *input, PCRE_UCHAR32 *output, int *offsetmap, int max_length)
1005 {
1006 unsigned char *iptr = (unsigned char*)input;
1007 PCRE_UCHAR32 *optr = output;
1008 unsigned int c;
1009
1010 if (max_length == 0)
1011 return 0;
1012
1013 while (*iptr && max_length > 1) {
1014 c = 0;
1015 if (offsetmap)
1016 *offsetmap++ = (int)(iptr - (unsigned char*)input);
1017
1018 if (!(*iptr & 0x80))
1019 c = *iptr++;
1020 else if (!(*iptr & 0x20)) {
1021 c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
1022 iptr += 2;
1023 } else if (!(*iptr & 0x10)) {
1024 c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
1025 iptr += 3;
1026 } else if (!(*iptr & 0x08)) {
1027 c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
1028 iptr += 4;
1029 }
1030
1031 *optr++ = c;
1032 max_length--;
1033 }
1034 if (offsetmap)
1035 *offsetmap = (int)(iptr - (unsigned char*)input);
1036 *optr = 0;
1037 return (int)(optr - output);
1038 }
1039
1040 static int copy_char8_to_char32(const char *input, PCRE_UCHAR32 *output, int max_length)
1041 {
1042 unsigned char *iptr = (unsigned char*)input;
1043 PCRE_UCHAR32 *optr = output;
1044
1045 if (max_length == 0)
1046 return 0;
1047
1048 while (*iptr && max_length > 1) {
1049 *optr++ = *iptr++;
1050 max_length--;
1051 }
1052 *optr = '\0';
1053 return (int)(optr - output);
1054 }
1055
1056 #define REGTEST_MAX_LENGTH32 4096
1057 static PCRE_UCHAR32 regtest_buf32[REGTEST_MAX_LENGTH32];
1058 static int regtest_offsetmap32[REGTEST_MAX_LENGTH32];
1059
1060 #endif /* SUPPORT_PCRE32 */
1061
1062 static int check_ascii(const char *input)
1063 {
1064 const unsigned char *ptr = (unsigned char *)input;
1065 while (*ptr) {
1066 if (*ptr > 127)
1067 return 0;
1068 ptr++;
1069 }
1070 return 1;
1071 }
1072
1073 static int regression_tests(void)
1074 {
1075 struct regression_test_case *current = regression_test_cases;
1076 const char *error;
1077 char *cpu_info;
1078 int i, err_offs;
1079 int is_successful, is_ascii_pattern, is_ascii_input;
1080 int total = 0;
1081 int successful = 0;
1082 int successful_row = 0;
1083 int counter = 0;
1084 int study_mode;
1085 int utf = 0, ucp = 0;
1086 int disabled_flags = 0;
1087 #ifdef SUPPORT_PCRE8
1088 pcre *re8;
1089 pcre_extra *extra8;
1090 pcre_extra dummy_extra8;
1091 int ovector8_1[32];
1092 int ovector8_2[32];
1093 int return_value8[2];
1094 unsigned char *mark8_1, *mark8_2;
1095 #endif
1096 #ifdef SUPPORT_PCRE16
1097 pcre16 *re16;
1098 pcre16_extra *extra16;
1099 pcre16_extra dummy_extra16;
1100 int ovector16_1[32];
1101 int ovector16_2[32];
1102 int return_value16[2];
1103 PCRE_UCHAR16 *mark16_1, *mark16_2;
1104 int length16;
1105 #endif
1106 #ifdef SUPPORT_PCRE32
1107 pcre32 *re32;
1108 pcre32_extra *extra32;
1109 pcre32_extra dummy_extra32;
1110 int ovector32_1[32];
1111 int ovector32_2[32];
1112 int return_value32[2];
1113 PCRE_UCHAR32 *mark32_1, *mark32_2;
1114 int length32;
1115 #endif
1116
1117 /* This test compares the behaviour of interpreter and JIT. Although disabling
1118 utf or ucp may make tests fail, if the pcre_exec result is the SAME, it is
1119 still considered successful from pcre_jit_test point of view. */
1120
1121 #if defined SUPPORT_PCRE8
1122 pcre_config(PCRE_CONFIG_JITTARGET, &cpu_info);
1123 #elif defined SUPPORT_PCRE16
1124 pcre16_config(PCRE_CONFIG_JITTARGET, &cpu_info);
1125 #elif defined SUPPORT_PCRE32
1126 pcre32_config(PCRE_CONFIG_JITTARGET, &cpu_info);
1127 #endif
1128
1129 printf("Running JIT regression tests\n");
1130 printf(" target CPU of SLJIT compiler: %s\n", cpu_info);
1131
1132 #if defined SUPPORT_PCRE8
1133 pcre_config(PCRE_CONFIG_UTF8, &utf);
1134 pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
1135 #elif defined SUPPORT_PCRE16
1136 pcre16_config(PCRE_CONFIG_UTF16, &utf);
1137 pcre16_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
1138 #elif defined SUPPORT_PCRE16
1139 pcre32_config(PCRE_CONFIG_UTF32, &utf);
1140 pcre32_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
1141 #endif
1142
1143 if (!utf)
1144 disabled_flags |= PCRE_UTF8 | PCRE_UTF16 | PCRE_UTF32;
1145 if (!ucp)
1146 disabled_flags |= PCRE_UCP;
1147 #ifdef SUPPORT_PCRE8
1148 printf(" in 8 bit mode with UTF-8 %s and ucp %s:\n", utf ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
1149 #endif
1150 #ifdef SUPPORT_PCRE16
1151 printf(" in 16 bit mode with UTF-16 %s and ucp %s:\n", utf ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
1152 #endif
1153 #ifdef SUPPORT_PCRE32
1154 printf(" in 32 bit mode with UTF-32 %s and ucp %s:\n", utf ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
1155 #endif
1156
1157 while (current->pattern) {
1158 /* printf("\nPattern: %s :\n", current->pattern); */
1159 total++;
1160 if (current->start_offset & F_PROPERTY) {
1161 is_ascii_pattern = 0;
1162 is_ascii_input = 0;
1163 } else {
1164 is_ascii_pattern = check_ascii(current->pattern);
1165 is_ascii_input = check_ascii(current->input);
1166 }
1167
1168 if (current->flags & PCRE_PARTIAL_SOFT)
1169 study_mode = PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE;
1170 else if (current->flags & PCRE_PARTIAL_HARD)
1171 study_mode = PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE;
1172 else
1173 study_mode = PCRE_STUDY_JIT_COMPILE;
1174 error = NULL;
1175 #ifdef SUPPORT_PCRE8
1176 re8 = NULL;
1177 if (!(current->start_offset & F_NO8))
1178 re8 = pcre_compile(current->pattern,
1179 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags),
1180 &error, &err_offs, tables(0));
1181
1182 extra8 = NULL;
1183 if (re8) {
1184 error = NULL;
1185 extra8 = pcre_study(re8, study_mode, &error);
1186 if (!extra8) {
1187 printf("\n8 bit: Cannot study pattern: %s\n", current->pattern);
1188 pcre_free(re8);
1189 re8 = NULL;
1190 }
1191 else if (!(extra8->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
1192 printf("\n8 bit: JIT compiler does not support: %s\n", current->pattern);
1193 pcre_free_study(extra8);
1194 pcre_free(re8);
1195 re8 = NULL;
1196 }
1197 extra8->flags |= PCRE_EXTRA_MARK;
1198 } else if (((utf && ucp) || is_ascii_pattern) && !(current->start_offset & F_NO8))
1199 printf("\n8 bit: Cannot compile pattern \"%s\": %s\n", current->pattern, error);
1200 #endif
1201 #ifdef SUPPORT_PCRE16
1202 if ((current->flags & PCRE_UTF16) || (current->start_offset & F_FORCECONV))
1203 convert_utf8_to_utf16(current->pattern, regtest_buf16, NULL, REGTEST_MAX_LENGTH16);
1204 else
1205 copy_char8_to_char16(current->pattern, regtest_buf16, REGTEST_MAX_LENGTH16);
1206
1207 re16 = NULL;
1208 if (!(current->start_offset & F_NO16))
1209 re16 = pcre16_compile(regtest_buf16,
1210 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags),
1211 &error, &err_offs, tables(0));
1212
1213 extra16 = NULL;
1214 if (re16) {
1215 error = NULL;
1216 extra16 = pcre16_study(re16, study_mode, &error);
1217 if (!extra16) {
1218 printf("\n16 bit: Cannot study pattern: %s\n", current->pattern);
1219 pcre16_free(re16);
1220 re16 = NULL;
1221 }
1222 else if (!(extra16->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
1223 printf("\n16 bit: JIT compiler does not support: %s\n", current->pattern);
1224 pcre16_free_study(extra16);
1225 pcre16_free(re16);
1226 re16 = NULL;
1227 }
1228 extra16->flags |= PCRE_EXTRA_MARK;
1229 } else if (((utf && ucp) || is_ascii_pattern) && !(current->start_offset & F_NO16))
1230 printf("\n16 bit: Cannot compile pattern \"%s\": %s\n", current->pattern, error);
1231 #endif
1232 #ifdef SUPPORT_PCRE32
1233 if ((current->flags & PCRE_UTF32) || (current->start_offset & F_FORCECONV))
1234 convert_utf8_to_utf32(current->pattern, regtest_buf32, NULL, REGTEST_MAX_LENGTH32);
1235 else
1236 copy_char8_to_char32(current->pattern, regtest_buf32, REGTEST_MAX_LENGTH32);
1237
1238 re32 = NULL;
1239 if (!(current->start_offset & F_NO32))
1240 re32 = pcre32_compile(regtest_buf32,
1241 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags),
1242 &error, &err_offs, tables(0));
1243
1244 extra32 = NULL;
1245 if (re32) {
1246 error = NULL;
1247 extra32 = pcre32_study(re32, study_mode, &error);
1248 if (!extra32) {
1249 printf("\n32 bit: Cannot study pattern: %s\n", current->pattern);
1250 pcre32_free(re32);
1251 re32 = NULL;
1252 }
1253 if (!(extra32->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
1254 printf("\n32 bit: JIT compiler does not support: %s\n", current->pattern);
1255 pcre32_free_study(extra32);
1256 pcre32_free(re32);
1257 re32 = NULL;
1258 }
1259 extra32->flags |= PCRE_EXTRA_MARK;
1260 } else if (((utf && ucp) || is_ascii_pattern) && !(current->start_offset & F_NO32))
1261 printf("\n32 bit: Cannot compile pattern \"%s\": %s\n", current->pattern, error);
1262 #endif
1263
1264 counter++;
1265 if ((counter & 0x3) != 0) {
1266 #ifdef SUPPORT_PCRE8
1267 setstack8(NULL);
1268 #endif
1269 #ifdef SUPPORT_PCRE16
1270 setstack16(NULL);
1271 #endif
1272 #ifdef SUPPORT_PCRE32
1273 setstack32(NULL);
1274 #endif
1275 }
1276
1277 #ifdef SUPPORT_PCRE8
1278 return_value8[0] = -1000;
1279 return_value8[1] = -1000;
1280 for (i = 0; i < 32; ++i)
1281 ovector8_1[i] = -2;
1282 for (i = 0; i < 32; ++i)
1283 ovector8_2[i] = -2;
1284 if (re8) {
1285 mark8_1 = NULL;
1286 mark8_2 = NULL;
1287 extra8->mark = &mark8_1;
1288
1289 if ((counter & 0x1) != 0) {
1290 setstack8(extra8);
1291 return_value8[0] = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1292 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_1, 32);
1293 } else
1294 return_value8[0] = pcre_jit_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1295 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_1, 32, getstack8());
1296 memset(&dummy_extra8, 0, sizeof(pcre_extra));
1297 dummy_extra8.flags = PCRE_EXTRA_MARK;
1298 if (current->start_offset & F_STUDY) {
1299 dummy_extra8.flags |= PCRE_EXTRA_STUDY_DATA;
1300 dummy_extra8.study_data = extra8->study_data;
1301 }
1302 dummy_extra8.mark = &mark8_2;
1303 return_value8[1] = pcre_exec(re8, &dummy_extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1304 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_2, 32);
1305 }
1306 #endif
1307
1308 #ifdef SUPPORT_PCRE16
1309 return_value16[0] = -1000;
1310 return_value16[1] = -1000;
1311 for (i = 0; i < 32; ++i)
1312 ovector16_1[i] = -2;
1313 for (i = 0; i < 32; ++i)
1314 ovector16_2[i] = -2;
1315 if (re16) {
1316 mark16_1 = NULL;
1317 mark16_2 = NULL;
1318 if ((current->flags & PCRE_UTF16) || (current->start_offset & F_FORCECONV))
1319 length16 = convert_utf8_to_utf16(current->input, regtest_buf16, regtest_offsetmap16, REGTEST_MAX_LENGTH16);
1320 else
1321 length16 = copy_char8_to_char16(current->input, regtest_buf16, REGTEST_MAX_LENGTH16);
1322 extra16->mark = &mark16_1;
1323 if ((counter & 0x1) != 0) {
1324 setstack16(extra16);
1325 return_value16[0] = pcre16_exec(re16, extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
1326 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_1, 32);
1327 } else
1328 return_value16[0] = pcre16_jit_exec(re16, extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
1329 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_1, 32, getstack16());
1330 memset(&dummy_extra16, 0, sizeof(pcre16_extra));
1331 dummy_extra16.flags = PCRE_EXTRA_MARK;
1332 if (current->start_offset & F_STUDY) {
1333 dummy_extra16.flags |= PCRE_EXTRA_STUDY_DATA;
1334 dummy_extra16.study_data = extra16->study_data;
1335 }
1336 dummy_extra16.mark = &mark16_2;
1337 return_value16[1] = pcre16_exec(re16, &dummy_extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
1338 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_2, 32);
1339 }
1340 #endif
1341
1342 #ifdef SUPPORT_PCRE32
1343 return_value32[0] = -1000;
1344 return_value32[1] = -1000;
1345 for (i = 0; i < 32; ++i)
1346 ovector32_1[i] = -2;
1347 for (i = 0; i < 32; ++i)
1348 ovector32_2[i] = -2;
1349 if (re32) {
1350 mark32_1 = NULL;
1351 mark32_2 = NULL;
1352 if ((current->flags & PCRE_UTF32) || (current->start_offset & F_FORCECONV))
1353 length32 = convert_utf8_to_utf32(current->input, regtest_buf32, regtest_offsetmap32, REGTEST_MAX_LENGTH32);
1354 else
1355 length32 = copy_char8_to_char32(current->input, regtest_buf32, REGTEST_MAX_LENGTH32);
1356 extra32->mark = &mark32_1;
1357 if ((counter & 0x1) != 0) {
1358 setstack32(extra32);
1359 return_value32[0] = pcre32_exec(re32, extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
1360 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector32_1, 32);
1361 } else
1362 return_value32[0] = pcre32_jit_exec(re32, extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
1363 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector32_1, 32, getstack32());
1364 memset(&dummy_extra32, 0, sizeof(pcre32_extra));
1365 dummy_extra32.flags = PCRE_EXTRA_MARK;
1366 if (current->start_offset & F_STUDY) {
1367 dummy_extra32.flags |= PCRE_EXTRA_STUDY_DATA;
1368 dummy_extra32.study_data = extra32->study_data;
1369 }
1370 dummy_extra32.mark = &mark32_2;
1371 return_value32[1] = pcre32_exec(re32, &dummy_extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
1372 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector32_2, 32);
1373 }
1374 #endif
1375
1376 /* printf("[%d-%d-%d|%d-%d|%d-%d|%d-%d]%s",
1377 return_value8[0], return_value16[0],
1378 ovector8_1[0], ovector8_1[1],
1379 ovector16_1[0], ovector16_1[1],
1380 ovector32_1[0], ovector32_1[1],
1381 (current->flags & PCRE_CASELESS) ? "C" : ""); */
1382
1383 /* If F_DIFF is set, just run the test, but do not compare the results.
1384 Segfaults can still be captured. */
1385
1386 is_successful = 1;
1387 if (!(current->start_offset & F_DIFF)) {
1388 #if defined SUPPORT_UTF && ((defined(SUPPORT_PCRE8) + defined(SUPPORT_PCRE16) + defined(SUPPORT_PCRE32)) >= 2)
1389 if (!(current->start_offset & F_FORCECONV)) {
1390 int return_value;
1391
1392 /* All results must be the same. */
1393 #ifdef SUPPORT_PCRE8
1394 if ((return_value = return_value8[0]) != return_value8[1]) {
1395 printf("\n8 bit: Return value differs(J8:%d,I8:%d): [%d] '%s' @ '%s'\n",
1396 return_value8[0], return_value8[1], total, current->pattern, current->input);
1397 is_successful = 0;
1398 } else
1399 #endif
1400 #ifdef SUPPORT_PCRE16
1401 if ((return_value = return_value16[0]) != return_value16[1]) {
1402 printf("\n16 bit: Return value differs(J16:%d,I16:%d): [%d] '%s' @ '%s'\n",
1403 return_value16[0], return_value16[1], total, current->pattern, current->input);
1404 is_successful = 0;
1405 } else
1406 #endif
1407 #ifdef SUPPORT_PCRE32
1408 if ((return_value = return_value32[0]) != return_value32[1]) {
1409 printf("\n32 bit: Return value differs(J32:%d,I32:%d): [%d] '%s' @ '%s'\n",
1410 return_value32[0], return_value32[1], total, current->pattern, current->input);
1411 is_successful = 0;
1412 } else
1413 #endif
1414 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1415 if (return_value8[0] != return_value16[0]) {
1416 printf("\n8 and 16 bit: Return value differs(J8:%d,J16:%d): [%d] '%s' @ '%s'\n",
1417 return_value8[0], return_value16[0],
1418 total, current->pattern, current->input);
1419 is_successful = 0;
1420 } else
1421 #endif
1422 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE32
1423 if (return_value8[0] != return_value32[0]) {
1424 printf("\n8 and 32 bit: Return value differs(J8:%d,J32:%d): [%d] '%s' @ '%s'\n",
1425 return_value8[0], return_value32[0],
1426 total, current->pattern, current->input);
1427 is_successful = 0;
1428 } else
1429 #endif
1430 #if defined SUPPORT_PCRE16 && defined SUPPORT_PCRE32
1431 if (return_value16[0] != return_value32[0]) {
1432 printf("\n16 and 32 bit: Return value differs(J16:%d,J32:%d): [%d] '%s' @ '%s'\n",
1433 return_value16[0], return_value32[0],
1434 total, current->pattern, current->input);
1435 is_successful = 0;
1436 } else
1437 #endif
1438 if (return_value >= 0 || return_value == PCRE_ERROR_PARTIAL) {
1439 if (return_value == PCRE_ERROR_PARTIAL) {
1440 return_value = 2;
1441 } else {
1442 return_value *= 2;
1443 }
1444 #ifdef SUPPORT_PCRE8
1445 return_value8[0] = return_value;
1446 #endif
1447 #ifdef SUPPORT_PCRE16
1448 return_value16[0] = return_value;
1449 #endif
1450 #ifdef SUPPORT_PCRE32
1451 return_value32[0] = return_value;
1452 #endif
1453 /* Transform back the results. */
1454 if (current->flags & PCRE_UTF8) {
1455 #ifdef SUPPORT_PCRE16
1456 for (i = 0; i < return_value; ++i) {
1457 if (ovector16_1[i] >= 0)
1458 ovector16_1[i] = regtest_offsetmap16[ovector16_1[i]];
1459 if (ovector16_2[i] >= 0)
1460 ovector16_2[i] = regtest_offsetmap16[ovector16_2[i]];
1461 }
1462 #endif
1463 #ifdef SUPPORT_PCRE32
1464 for (i = 0; i < return_value; ++i) {
1465 if (ovector32_1[i] >= 0)
1466 ovector32_1[i] = regtest_offsetmap32[ovector32_1[i]];
1467 if (ovector32_2[i] >= 0)
1468 ovector32_2[i] = regtest_offsetmap32[ovector32_2[i]];
1469 }
1470 #endif
1471 }
1472
1473 for (i = 0; i < return_value; ++i) {
1474 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1475 if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
1476 printf("\n8 and 16 bit: Ovector[%d] value differs(J8:%d,I8:%d,J16:%d,I16:%d): [%d] '%s' @ '%s' \n",
1477 i, ovector8_1[i], ovector8_2[i], ovector16_1[i], ovector16_2[i],
1478 total, current->pattern, current->input);
1479 is_successful = 0;
1480 }
1481 #endif
1482 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE32
1483 if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector32_1[i] || ovector8_1[i] != ovector32_2[i]) {
1484 printf("\n8 and 32 bit: Ovector[%d] value differs(J8:%d,I8:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
1485 i, ovector8_1[i], ovector8_2[i], ovector32_1[i], ovector32_2[i],
1486 total, current->pattern, current->input);
1487 is_successful = 0;
1488 }
1489 #endif
1490 #if defined SUPPORT_PCRE16 && defined SUPPORT_PCRE16
1491 if (ovector16_1[i] != ovector16_2[i] || ovector16_1[i] != ovector16_1[i] || ovector16_1[i] != ovector16_2[i]) {
1492 printf("\n16 and 16 bit: Ovector[%d] value differs(J16:%d,I16:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
1493 i, ovector16_1[i], ovector16_2[i], ovector16_1[i], ovector16_2[i],
1494 total, current->pattern, current->input);
1495 is_successful = 0;
1496 }
1497 #endif
1498 }
1499 }
1500 } else
1501 #endif /* more than one of SUPPORT_PCRE8, SUPPORT_PCRE16 and SUPPORT_PCRE32 */
1502 {
1503 /* Only the 8 bit and 16 bit results must be equal. */
1504 #ifdef SUPPORT_PCRE8
1505 if (return_value8[0] != return_value8[1]) {
1506 printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1507 return_value8[0], return_value8[1], total, current->pattern, current->input);
1508 is_successful = 0;
1509 } else if (return_value8[0] >= 0 || return_value8[0] == PCRE_ERROR_PARTIAL) {
1510 if (return_value8[0] == PCRE_ERROR_PARTIAL)
1511 return_value8[0] = 2;
1512 else
1513 return_value8[0] *= 2;
1514
1515 for (i = 0; i < return_value8[0]; ++i)
1516 if (ovector8_1[i] != ovector8_2[i]) {
1517 printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1518 i, ovector8_1[i], ovector8_2[i], total, current->pattern, current->input);
1519 is_successful = 0;
1520 }
1521 }
1522 #endif
1523
1524 #ifdef SUPPORT_PCRE16
1525 if (return_value16[0] != return_value16[1]) {
1526 printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1527 return_value16[0], return_value16[1], total, current->pattern, current->input);
1528 is_successful = 0;
1529 } else if (return_value16[0] >= 0 || return_value16[0] == PCRE_ERROR_PARTIAL) {
1530 if (return_value16[0] == PCRE_ERROR_PARTIAL)
1531 return_value16[0] = 2;
1532 else
1533 return_value16[0] *= 2;
1534
1535 for (i = 0; i < return_value16[0]; ++i)
1536 if (ovector16_1[i] != ovector16_2[i]) {
1537 printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1538 i, ovector16_1[i], ovector16_2[i], total, current->pattern, current->input);
1539 is_successful = 0;
1540 }
1541 }
1542 #endif
1543
1544 #ifdef SUPPORT_PCRE32
1545 if (return_value32[0] != return_value32[1]) {
1546 printf("\n32 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1547 return_value32[0], return_value32[1], total, current->pattern, current->input);
1548 is_successful = 0;
1549 } else if (return_value32[0] >= 0 || return_value32[0] == PCRE_ERROR_PARTIAL) {
1550 if (return_value32[0] == PCRE_ERROR_PARTIAL)
1551 return_value32[0] = 2;
1552 else
1553 return_value32[0] *= 2;
1554
1555 for (i = 0; i < return_value32[0]; ++i)
1556 if (ovector32_1[i] != ovector32_2[i]) {
1557 printf("\n32 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1558 i, ovector32_1[i], ovector32_2[i], total, current->pattern, current->input);
1559 is_successful = 0;
1560 }
1561 }
1562 #endif
1563 }
1564 }
1565
1566 if (is_successful) {
1567 #ifdef SUPPORT_PCRE8
1568 if (!(current->start_offset & F_NO8) && ((utf && ucp) || is_ascii_input)) {
1569 if (return_value8[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1570 printf("8 bit: Test should match: [%d] '%s' @ '%s'\n",
1571 total, current->pattern, current->input);
1572 is_successful = 0;
1573 }
1574
1575 if (return_value8[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1576 printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n",
1577 total, current->pattern, current->input);
1578 is_successful = 0;
1579 }
1580 }
1581 #endif
1582 #ifdef SUPPORT_PCRE16
1583 if (!(current->start_offset & F_NO16) && ((utf && ucp) || is_ascii_input)) {
1584 if (return_value16[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1585 printf("16 bit: Test should match: [%d] '%s' @ '%s'\n",
1586 total, current->pattern, current->input);
1587 is_successful = 0;
1588 }
1589
1590 if (return_value16[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1591 printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n",
1592 total, current->pattern, current->input);
1593 is_successful = 0;
1594 }
1595 }
1596 #endif
1597 #ifdef SUPPORT_PCRE32
1598 if (!(current->start_offset & F_NO32) && ((utf && ucp) || is_ascii_input)) {
1599 if (return_value32[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1600 printf("32 bit: Test should match: [%d] '%s' @ '%s'\n",
1601 total, current->pattern, current->input);
1602 is_successful = 0;
1603 }
1604
1605 if (return_value32[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1606 printf("32 bit: Test should not match: [%d] '%s' @ '%s'\n",
1607 total, current->pattern, current->input);
1608 is_successful = 0;
1609 }
1610 }
1611 #endif
1612 }
1613
1614 if (is_successful) {
1615 #ifdef SUPPORT_PCRE8
1616 if (mark8_1 != mark8_2) {
1617 printf("8 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1618 total, current->pattern, current->input);
1619 is_successful = 0;
1620 }
1621 #endif
1622 #ifdef SUPPORT_PCRE16
1623 if (mark16_1 != mark16_2) {
1624 printf("16 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1625 total, current->pattern, current->input);
1626 is_successful = 0;
1627 }
1628 #endif
1629 #ifdef SUPPORT_PCRE32
1630 if (mark32_1 != mark32_2) {
1631 printf("32 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1632 total, current->pattern, current->input);
1633 is_successful = 0;
1634 }
1635 #endif
1636 }
1637
1638 #ifdef SUPPORT_PCRE8
1639 if (re8) {
1640 pcre_free_study(extra8);
1641 pcre_free(re8);
1642 }
1643 #endif
1644 #ifdef SUPPORT_PCRE16
1645 if (re16) {
1646 pcre16_free_study(extra16);
1647 pcre16_free(re16);
1648 }
1649 #endif
1650 #ifdef SUPPORT_PCRE32
1651 if (re32) {
1652 pcre32_free_study(extra32);
1653 pcre32_free(re32);
1654 }
1655 #endif
1656
1657 if (is_successful) {
1658 successful++;
1659 successful_row++;
1660 printf(".");
1661 if (successful_row >= 60) {
1662 successful_row = 0;
1663 printf("\n");
1664 }
1665 } else
1666 successful_row = 0;
1667
1668 fflush(stdout);
1669 current++;
1670 }
1671 tables(1);
1672 #ifdef SUPPORT_PCRE8
1673 setstack8(NULL);
1674 #endif
1675 #ifdef SUPPORT_PCRE16
1676 setstack16(NULL);
1677 #endif
1678 #ifdef SUPPORT_PCRE32
1679 setstack32(NULL);
1680 #endif
1681
1682 if (total == successful) {
1683 printf("\nAll JIT regression tests are successfully passed.\n");
1684 return 0;
1685 } else {
1686 printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
1687 return 1;
1688 }
1689 }
1690
1691 /* End of pcre_jit_test.c */

  ViewVC Help
Powered by ViewVC 1.1.5