/[pcre]/code/branches/pcre16/pcre_jit_test.c
ViewVC logotype

Contents of /code/branches/pcre16/pcre_jit_test.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 786 - (show annotations)
Tue Dec 6 11:33:41 2011 UTC (9 years, 4 months ago) by zherczeg
File MIME type: text/plain
File size: 40329 byte(s)
Updating pcre_jit_test. Most of the JIT tests are working now in 16 bit mode.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Main Library written by Philip Hazel
9 Copyright (c) 1997-2011 University of Cambridge
10
11 This JIT compiler regression test program was written by Zoltan Herczeg
12 Copyright (c) 2010-2011
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include <stdio.h>
48 #include <string.h>
49 #include "pcre.h"
50
51 #define PCRE_BUG 0x80000000
52
53 /*
54 Letter characters:
55 \xe6\x92\xad = 0x64ad = 25773 (kanji)
56 Non-letter characters:
57 \xc2\xa1 = 0xa1 = (Inverted Exclamation Mark)
58 \xf3\xa9\xb7\x80 = 0xe9dc0 = 957888
59 Newlines:
60 \xc2\x85 = 0x85 = 133 (NExt Line = NEL)
61 \xe2\x80\xa8 = 0x2028 = 8232 (Line Separator)
62 Othercase pairs:
63 \xc3\xa9 = 0xe9 = 233 (e')
64 \xc3\x89 = 0xc9 = 201 (E')
65 \xc3\xa1 = 0xe1 = 225 (a')
66 \xc3\x81 = 0xc1 = 193 (A')
67 \xc8\xba = 0x23a = 570
68 \xe2\xb1\xa5 = 0x2c65 = 11365
69 \xe1\xbd\xb8 = 0x1f78 = 8056
70 \xe1\xbf\xb8 = 0x1ff8 = 8184
71 \xf0\x90\x90\x80 = 0x10400 = 66560
72 \xf0\x90\x90\xa8 = 0x10428 = 66600
73 Mark property:
74 \xcc\x8d = 0x30d = 781
75 Special:
76 \xdf\xbf = 0x7ff = 2047 (highest 2 byte character)
77 \xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character)
78 \xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character)
79 \xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character)
80 \xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character)
81 */
82
83 static int regression_tests(void);
84
85 int main(void)
86 {
87 int jit = 0;
88 pcre_config(PCRE_CONFIG_JIT, &jit);
89 if (!jit) {
90 printf("JIT must be enabled to run pcre_jit_test\n");
91 return 1;
92 }
93 return regression_tests();
94 }
95
96 /* --------------------------------------------------------------------------------------- */
97
98 #if !(defined SUPPORT_PCRE8) && !(defined SUPPORT_PCRE16)
99 #error SUPPORT_PCRE8 or SUPPORT_PCRE16 must be defined
100 #endif
101
102 #define MUA (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
103 #define MUAP (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
104 #define CMUA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
105 #define CMUAP (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
106 #define MA (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
107 #define MAP (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
108 #define CMA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
109
110 struct regression_test_case {
111 int flags;
112 int start_offset;
113 const char *pattern;
114 const char *input;
115 };
116
117 static struct regression_test_case regression_test_cases[] = {
118 /* Constant strings. */
119 { MUA, 0, "AbC", "AbAbC" },
120 { MUA, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
121 { CMUA, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
122 { MA, 0, "[^a]", "aAbB" },
123 { CMA, 0, "[^m]", "mMnN" },
124 { MA, 0, "a[^b][^#]", "abacd" },
125 { CMA, 0, "A[^B][^E]", "abacd" },
126 { CMUA, 0, "[^x][^#]", "XxBll" },
127 { MUA, 0, "[^a]", "aaa\xc3\xa1#Ab" },
128 { CMUA, 0, "[^A]", "aA\xe6\x92\xad" },
129 { MUA, 0, "\\W(\\W)?\\w", "\r\n+bc" },
130 { MUA, 0, "\\W(\\W)?\\w", "\n\r+bc" },
131 { MUA, 0, "\\W(\\W)?\\w", "\r\r+bc" },
132 { MUA, 0, "\\W(\\W)?\\w", "\n\n+bc" },
133 { MUA, 0, "[axd]", "sAXd" },
134 { CMUA, 0, "[axd]", "sAXd" },
135 { CMUA, 0, "[^axd]", "DxA" },
136 { MUA, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
137 { MUA, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
138 { CMUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
139 { MUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
140 { MUA, 0, "[^a]", "\xc2\x80[]" },
141 { CMUA, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
142 { CMA, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
143 { PCRE_CASELESS, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
144 { PCRE_CASELESS, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
145 { PCRE_CASELESS, 0, "a1", "Aa1" },
146 { MA, 0, "\\Ca", "cda" },
147 { CMA, 0, "\\Ca", "CDA" },
148 { MA, 0, "\\Cx", "cda" },
149 { CMA, 0, "\\Cx", "CDA" },
150 { CMUAP, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
151 { CMUAP, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
152 { CMUAP, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
153 { CMUAP, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
154
155 /* Assertions. */
156 { MUA, 0, "\\b[^A]", "A_B#" },
157 { MA, 0, "\\b\\W", "\n*" },
158 { MUA, 0, "\\B[^,]\\b[^s]\\b", "#X" },
159 { MAP, 0, "\\B", "_\xa1" },
160 { MAP, 0, "\\b_\\b[,A]\\B", "_," },
161 { MUAP, 0, "\\b", "\xe6\x92\xad!" },
162 { MUAP, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
163 { MUAP, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
164 { MUAP, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
165 { MUA, 0, "\\b.", "\xcd\xbe" },
166 { CMUAP, 0, "\\By", "\xf0\x90\x90\xa8y" },
167 { MA, 0, "\\R^", "\n" },
168 { MA, 1, "^", "\n" },
169 { 0, 0, "^ab", "ab" },
170 { 0, 0, "^ab", "aab" },
171 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "^a", "\r\raa\n\naa\r\naa" },
172 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
173 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^-", "a--b--\x85--" },
174 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xe2\x80\xa8--" },
175 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xc2\x85--" },
176 { 0, 0, "ab$", "ab" },
177 { 0, 0, "ab$", "ab\r\n" },
178 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "a$", "\r\raa\n\naa\r\naa" },
179 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aaa" },
180 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "#$", "#\xc2\x85###\r#" },
181 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "#$", "#\xe2\x80\xa9" },
182 { PCRE_NOTBOL | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },
183 { PCRE_NOTBOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },
184 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },
185 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0, "a$", "aa\r\n" },
186 { PCRE_UTF8 | PCRE_DOLLAR_ENDONLY | PCRE_NEWLINE_ANY, 0, "\\p{Any}{2,}$", "aa\r\n" },
187 { PCRE_NOTEOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },
188 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa" },
189 { PCRE_NEWLINE_CR | PCRE_UTF8, 0, "a\\Z", "aaa\r" },
190 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa\n" },
191 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r" },
192 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
193 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r\n" },
194 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
195 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
196 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
197 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
198 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
199 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
200 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
201 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
202 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
203 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xc2\x85" },
204 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
205 { MA, 0, "\\Aa", "aaa" },
206 { MA, 1, "\\Aa", "aaa" },
207 { MA, 1, "\\Ga", "aaa" },
208 { MA, 1, "\\Ga", "aba" },
209 { MA, 0, "a\\z", "aaa" },
210 { MA, 0, "a\\z", "aab" },
211
212 /* Brackets. */
213 { MUA, 0, "(ab|bb|cd)", "bacde" },
214 { MUA, 0, "(?:ab|a)(bc|c)", "ababc" },
215 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
216 { CMUA, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
217 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
218 { MUA, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
219
220 /* Greedy and non-greedy ? operators. */
221 { MUA, 0, "(?:a)?a", "laab" },
222 { CMUA, 0, "(A)?A", "llaab" },
223 { MUA, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
224 { MUA, 0, "(a)?a", "manm" },
225 { CMUA, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
226 { MUA, 0, "(a|b)?\?d((?:e)?)", "abcde" },
227 { MUA, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
228
229 /* Greedy and non-greedy + operators */
230 { MUA, 0, "(aa)+aa", "aaaaaaa" },
231 { MUA, 0, "(aa)+?aa", "aaaaaaa" },
232 { MUA, 0, "(?:aba|ab|a)+l", "ababamababal" },
233 { MUA, 0, "(?:aba|ab|a)+?l", "ababamababal" },
234 { MUA, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
235 { MUA, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
236 { MUA, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
237
238 /* Greedy and non-greedy * operators */
239 { CMUA, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
240 { MUA, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
241 { MUA, 0, "(aa|ab)*ab", "aaabaaab" },
242 { CMUA, 0, "(aa|Ab)*?aB", "aaabaaab" },
243 { MUA, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
244 { MUA, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
245 { MA, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
246 { MA, 0, "((?:a|)*){0}a", "a" },
247
248 /* Combining ? + * operators */
249 { MUA, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
250 { MUA, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
251 { MUA, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
252 { MUA, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
253 { MUA, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
254
255 /* Single character iterators. */
256 { MUA, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
257 { MUA, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
258 { MUA, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
259 { MUA, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
260 { MUA, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
261 { MUA, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
262 { MUA, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
263 { MUA, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
264 { MUA, 0, "(ba{2})+c", "baabaaabacbaabaac" },
265 { MUA, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
266 { MUA, 0, "(a?+[^b])+", "babaacacb" },
267 { MUA, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
268 { CMUA, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
269 { CMUA, 0, "[c-f]+k", "DemmFke" },
270 { MUA, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
271 { MUA, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
272 { CMUA, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
273 { CMUA, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
274 { CMUA, 0, "[ace]{3,}", "AcbDAcEEcEd" },
275 { CMUA, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
276 { MUA, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
277 { CMUA, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
278 { MUA, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
279 { MUA, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
280 { MUA, 0, "\\b\\w+\\B", "x,a_cd" },
281 { MUAP, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
282 { CMUA, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
283 { CMUAP, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" },
284
285 /* Basic character sets. */
286 { MUA, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
287 { MUA, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
288 { MUA, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
289 { MUA, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
290 { MUA, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
291 { MUA, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
292
293 /* Unicode properties. */
294 { MUAP, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
295 { MUAP, 0, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
296 { MUAP, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
297 { MUAP, 0, "[\\P{Any}]", "abc" },
298 { MUAP, 0, "[^\\p{Any}]", "abc" },
299 { MUAP, 0, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
300 { MUAP, 0, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
301 { MUAP, 0, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
302 { MUAP, 0, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
303 { MUAP, 0, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
304 { MUAP, 0, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
305 { MUAP, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
306 { CMUAP, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
307 { MUAP, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
308 { MUAP, 0, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
309 { MUA, 0, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
310 { CMUAP, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
311 { MUAP, 0, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
312 { MUAP, 0, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
313 { PCRE_UCP, 0, "[a-b\\s]{2,5}[^a]", "AB baaa" },
314
315 /* Possible empty brackets. */
316 { MUA, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
317 { MUA, 0, "(|ab||bc|a)+d", "abcxabcabd" },
318 { MUA, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
319 { MUA, 0, "(|ab||bc|a)*d", "abcxabcabd" },
320 { MUA, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
321 { MUA, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
322 { MUA, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
323 { MUA, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
324 { MUA, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
325 { MUA, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
326
327 /* Start offset. */
328 { MUA, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
329 { MUA, 4, "(\\w\\W\\w)+", "ab#d" },
330 { MUA, 2, "(\\w\\W\\w)+", "ab#d" },
331 { MUA, 1, "(\\w\\W\\w)+", "ab#d" },
332
333 /* Newline. */
334 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
335 { PCRE_MULTILINE | PCRE_NEWLINE_CR, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
336 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{1,3}[^#]", "\r\n##...." },
337
338 /* Any character except newline or any newline. */
339 { PCRE_NEWLINE_CRLF, 0, ".", "\r" },
340 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
341 { PCRE_NEWLINE_ANYCRLF, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
342 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
343 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
344 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.).", "\xe2\x80\xa8\nb\r" },
345 { PCRE_NEWLINE_ANY, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
346 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
347 { PCRE_BSR_ANYCRLF, 0, "\\R", "\r" },
348 { PCRE_BSR_ANYCRLF, 0, "\\R", "\x85#\r\n#" },
349 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\xe2\x80\xa8#c" },
350 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\r\nc" },
351 { PCRE_NEWLINE_CRLF | PCRE_BSR_UNICODE | PCRE_UTF8, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
352 { MUA, 0, "\\R+", "ab" },
353 { MUA, 0, "\\R+", "ab\r\n\r" },
354 { MUA, 0, "\\R*", "ab\r\n\r" },
355 { MUA, 0, "\\R*", "\r\n\r" },
356 { MUA, 0, "\\R{2,4}", "\r\nab\r\r" },
357 { MUA, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
358 { MUA, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
359 { MUA, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
360 { MUA, 0, "\\R+\\R\\R", "\r\n\r\n" },
361 { MUA, 0, "\\R+\\R\\R", "\r\r\r" },
362 { MUA, 0, "\\R*\\R\\R", "\n\r" },
363 { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r" },
364 { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
365
366 /* Atomic groups (no fallback from "next" direction). */
367 { MUA, 0, "(?>ab)ab", "bab" },
368 { MUA, 0, "(?>(ab))ab", "bab" },
369 { MUA, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
370 "bababcdedefgheijijklmlmnop" },
371 { MUA, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
372 { MUA, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
373 { MUA, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
374 { MUA, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
375 { MUA, 0, "((?>a|)+?)b", "aaacaaab" },
376 { MUA, 0, "(?>x|)*$", "aaa" },
377 { MUA, 0, "(?>(x)|)*$", "aaa" },
378 { MUA, 0, "(?>x|())*$", "aaa" },
379 { MUA, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
380 { MUA, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
381 { MUA, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
382 { MUA, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
383 { MUA, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
384 { MUA, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
385 { MUA, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
386 { MUA, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
387 { MUA, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
388 { MUA, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
389 { MUA, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
390 { MUA, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
391 { MUA, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
392 { MUA, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
393 { CMA, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
394 { MUA, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
395 { MUA, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
396 { MUA, 0, "\\X", "\xcc\x8d\xcc\x8d" },
397 { MUA, 0, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
398 { MUA, 0, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
399 { MUA, 0, "\\X{2,4}", "abcdef" },
400 { MUA, 0, "\\X{2,4}?", "abcdef" },
401 { MUA, 0, "\\X{2,4}..", "#\xcc\x8d##" },
402 { MUA, 0, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
403 { MUA, 0, "(c(ab)?+ab)+", "cabcababcab" },
404 { MUA, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
405
406 /* Possessive quantifiers. */
407 { MUA, 0, "(?:a|b)++m", "mababbaaxababbaam" },
408 { MUA, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
409 { MUA, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
410 { MUA, 0, "(a|b)++m", "mababbaaxababbaam" },
411 { MUA, 0, "(a|b)*+m", "mababbaaxababbaam" },
412 { MUA, 0, "(a|b)*+m", "ababbaaxababbaam" },
413 { MUA, 0, "(a|b(*ACCEPT))++m", "maaxab" },
414 { MUA, 0, "(?:b*)++m", "bxbbxbbbxm" },
415 { MUA, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
416 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxm" },
417 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
418 { MUA, 0, "(b*)++m", "bxbbxbbbxm" },
419 { MUA, 0, "(b*)++m", "bxbbxbbbxbbm" },
420 { MUA, 0, "(b*)*+m", "bxbbxbbbxm" },
421 { MUA, 0, "(b*)*+m", "bxbbxbbbxbbm" },
422 { MUA, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
423 { MUA, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
424 { MUA, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
425 { MUA, 0, "(a|(b))++m", "mababbaaxababbaam" },
426 { MUA, 0, "((a)|b)*+m", "mababbaaxababbaam" },
427 { MUA, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
428 { MUA, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
429 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxm" },
430 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
431 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
432 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
433 { MUA, 0, "((b*))++m", "bxbbxbbbxm" },
434 { MUA, 0, "((b*))++m", "bxbbxbbbxbbm" },
435 { MUA, 0, "((b*))*+m", "bxbbxbbbxm" },
436 { MUA, 0, "((b*))*+m", "bxbbxbbbxbbm" },
437 { MUA, 0, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
438 { MUA, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
439 { MUA, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
440 { MUA, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
441 { MUA, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
442
443 /* Back references. */
444 { MUA, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
445 { CMUA, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
446 { CMA, 0, "(a{2,4})\\1", "AaAaaAaA" },
447 { MUA, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
448 { MUA, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
449 { MUA, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
450 { MUA, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
451 { MUA, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
452 { MUA, 0, "(?:(aa)|b)\\1?b", "bb" },
453 { CMUA, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
454 { MUA, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
455 { CMUA, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
456 { MUA, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
457 { CMA, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
458 { MUA, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
459 { MUA, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
460 { MA, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
461 { MUAP, 0, "(\\P{N})\\1{2,}", ".www." },
462 { MUAP, 0, "(\\P{N})\\1{0,2}", "wwwww." },
463 { MUAP, 0, "(\\P{N})\\1{1,2}ww", "wwww" },
464 { MUAP, 0, "(\\P{N})\\1{1,2}ww", "wwwww" },
465 { PCRE_UCP, 0, "(\\P{N})\\1{2,}", ".www." },
466 { CMUAP, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" },
467
468 /* Assertions. */
469 { MUA, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
470 { MUA, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
471 { MUA, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
472 { MUA, 0, "(?<=aaa|aa|a)a", "aaa" },
473 { MUA, 2, "(?<=aaa|aa|a)a", "aaa" },
474 { MA, 0, "(?<=aaa|aa|a)a", "aaa" },
475 { MA, 2, "(?<=aaa|aa|a)a", "aaa" },
476 { MUA, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
477 { MUA, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
478 { MUA, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
479 { MUA, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
480 { MUA, 0, "((?(?=(a))a)+k)", "bbak" },
481 { MUA, 0, "((?(?=a)a)+k)", "bbak" },
482 { MUA, 0, "(?=(?>(a))m)amk", "a k" },
483 { MUA, 0, "(?!(?>(a))m)amk", "a k" },
484 { MUA, 0, "(?>(?=(a))am)amk", "a k" },
485 { MUA, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
486 { MUA, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
487 { MUA, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
488 { MUA, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
489 { MUA, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
490 { MUA, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
491 { MUA, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
492 { MUA, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
493
494 /* Not empty, ACCEPT, FAIL */
495 { MUA | PCRE_NOTEMPTY, 0, "a*", "bcx" },
496 { MUA | PCRE_NOTEMPTY, 0, "a*", "bcaad" },
497 { MUA | PCRE_NOTEMPTY, 0, "a*?", "bcaad" },
498 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
499 { MUA, 0, "a(*ACCEPT)b", "ab" },
500 { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcx" },
501 { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
502 { MUA | PCRE_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
503 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcx" },
504 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
505 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
506 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
507 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "" },
508 { MUA, 0, "((a(*ACCEPT)b))", "ab" },
509 { MUA, 0, "(a(*FAIL)a|a)", "aaa" },
510 { MUA, 0, "(?=ab(*ACCEPT)b)a", "ab" },
511 { MUA, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
512 { MUA, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
513 { MUA | PCRE_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
514
515 /* Conditional blocks. */
516 { MUA, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
517 { MUA, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
518 { MUA, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
519 { MUA, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
520 { MUA, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
521 { MUA, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
522 { MUA, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
523 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
524 { MUA | PCRE_BUG, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
525 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
526 { MUA, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
527 { MUA, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
528 { MUA, 0, "(?(?=a)ab)", "a" },
529 { MUA, 0, "(?(?<!b)c)", "b" },
530 { MUA, 0, "(?(DEFINE)a(b))", "a" },
531 { MUA, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
532 { MUA, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
533 { MUA, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
534 { MUA, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
535 { MUA, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
536 { MUA, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
537 { MUA, 0, "(c)?\?(?(1)a|b)", "cbb" },
538 { MUA | PCRE_BUG, 0, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
539 { MUA, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
540 { MUA, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
541 { MUA, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
542 { MUA | PCRE_BUG, 0, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
543 { MUA, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
544 { MUA, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
545 { MUA, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
546 { MUA, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
547 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)*l", "bc ddd abccabccl" },
548 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+?dd", "bcabcacdb bdddd" },
549 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+l", "ababccddabdbccd abcccl" },
550
551 /* Set start of match. */
552 { MUA, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
553 { MUA, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
554 { MUA, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
555 { MUA | PCRE_NOTEMPTY, 0, "a\\K(*ACCEPT)b", "aa" },
556 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
557
558 /* First line. */
559 { MUA | PCRE_FIRSTLINE, 0, "\\p{Any}a", "bb\naaa" },
560 { MUA | PCRE_FIRSTLINE, 0, "\\p{Any}a", "bb\r\naaa" },
561 { MUA | PCRE_FIRSTLINE, 0, "(?<=a)", "a" },
562 { MUA | PCRE_FIRSTLINE, 0, "[^a][^b]", "ab" },
563 { MUA | PCRE_FIRSTLINE, 0, "a", "\na" },
564 { MUA | PCRE_FIRSTLINE, 0, "[abc]", "\na" },
565 { MUA | PCRE_FIRSTLINE, 0, "^a", "\na" },
566 { MUA | PCRE_FIRSTLINE, 0, "^(?<=\n)", "\na" },
567 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0, "#", "\xc2\x85#" },
568 { PCRE_MULTILINE | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0, "#", "\x85#" },
569 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0, "^#", "\xe2\x80\xa8#" },
570 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "\\p{Any}", "\r\na" },
571 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, ".", "\r" },
572 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "a", "\ra" },
573 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "ba", "bbb\r\nba" },
574 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "\\p{Any}{4}|a", "\r\na" },
575 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 1, ".", "\r\n" },
576
577 /* Recurse. */
578 { MUA, 0, "(a)(?1)", "aa" },
579 { MUA, 0, "((a))(?1)", "aa" },
580 { MUA, 0, "(b|a)(?1)", "aa" },
581 { MUA, 0, "(b|(a))(?1)", "aa" },
582 { MUA, 0, "((a)(b)(?:a*))(?1)", "aba" },
583 { MUA, 0, "((a)(b)(?:a*))(?1)", "abab" },
584 { MUA, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
585 { MUA, 0, "((?2)b|(a)){2}(?1)", "aabab" },
586 { MUA, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
587 { MUA, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
588 { MUA, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
589 { MUA, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
590 { MUA, 0, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
591 { MUA, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
592 { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
593 { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
594 { MUA, 0, "b|<(?R)*>", "<<b>" },
595 { MUA, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
596 { MUA, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
597 { MUA, 0, "(a)((?(R)a|b))(?2)", "aabbabaa" },
598 { MUA, 0, "(a)((?(R2)a|b))(?2)", "aabbabaa" },
599 { MUA, 0, "(a)((?(R1)a|b))(?2)", "ababba" },
600 { MUA, 0, "(?(R0)aa|bb(?R))", "abba aabb bbaa" },
601 { MUA, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" },
602 { MUA, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" },
603
604 /* Deep recursion. */
605 { MUA, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
606 { MUA, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
607 { MUA, 0, "((a?)+)+b", "aaaaaaaaaaaaa b" },
608
609 /* Deep recursion: Stack limit reached. */
610 { MA, 0, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
611 { MA, 0, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
612 { MA, 0, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
613 { MA, 0, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
614 { MA, 0, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
615
616 { 0, 0, NULL, NULL }
617 };
618
619 pcre_jit_stack* callback(void *arg)
620 {
621 return (pcre_jit_stack *)arg;
622 }
623
624 static void setstack(pcre_extra *extra, int realloc)
625 {
626 static pcre_jit_stack *stack;
627
628 if (realloc) {
629 if (stack)
630 pcre_jit_stack_free(stack);
631 stack = pcre_jit_stack_alloc(1, 1024 * 1024);
632 }
633 /* Extra can be NULL. */
634 pcre_assign_jit_stack(extra, callback, stack);
635 }
636
637 #ifdef SUPPORT_PCRE16
638
639 static int convert_utf8_to_utf16(const char *input, PCRE_SCHAR16 *output, int *offsetmap, int max_length)
640 {
641 unsigned char *ptr = (unsigned char*)input;
642 PCRE_SCHAR16 *optr = output;
643 unsigned int c;
644
645 if (max_length == 0)
646 return 0;
647
648 while (*ptr && max_length > 1) {
649 c = 0;
650 if (offsetmap)
651 *offsetmap++ = (int)(ptr - (unsigned char*)input);
652
653 if (!(*ptr & 0x80))
654 c = *ptr++;
655 else if (!(*ptr & 0x20)) {
656 c = ((ptr[0] & 0x1f) << 6) | (ptr[1] & 0x3f);
657 ptr += 2;
658 } else if (!(*ptr & 0x10)) {
659 c = ((ptr[0] & 0x0f) << 12) | ((ptr[1] & 0x3f) << 6) | (ptr[2] & 0x3f);
660 ptr += 3;
661 } else if (!(*ptr & 0x08)) {
662 c = ((ptr[0] & 0x07) << 18) | ((ptr[1] & 0x3f) << 12) | ((ptr[2] & 0x3f) << 6) | (ptr[3] & 0x3f);
663 ptr += 4;
664 }
665
666 if (c < 65536) {
667 *optr++ = c;
668 max_length--;
669 } else if (max_length <= 2) {
670 *optr = '\0';
671 return optr - output;
672 } else {
673 c -= 0x10000;
674 *optr++ = 0xd800 | ((c >> 10) & 0x3ff);
675 *optr++ = 0xdc00 | (c & 0x3ff);
676 max_length -= 2;
677 if (offsetmap)
678 offsetmap++;
679 }
680 }
681 if (offsetmap)
682 *offsetmap = (int)(ptr - (unsigned char*)input);
683 *optr = '\0';
684 return optr - output;
685 }
686
687 static int copy_char8_to_char16(const char *input, PCRE_SCHAR16 *output, int max_length)
688 {
689 PCRE_SCHAR16 *optr = output;
690
691 if (max_length == 0)
692 return 0;
693
694 while (*input && max_length > 1) {
695 *optr++ = *input++;
696 max_length--;
697 }
698 *optr = '\0';
699 return optr - output;
700 }
701
702 #define REGTEST_MAX_LENGTH 4096
703 static PCRE_SCHAR16 regtest_buf[REGTEST_MAX_LENGTH];
704 static int regtest_offsetmap[REGTEST_MAX_LENGTH];
705
706 #endif /* SUPPORT_PCRE16 */
707
708 static int regression_tests(void)
709 {
710 struct regression_test_case *current = regression_test_cases;
711 const char *error;
712 int i, err_offs, is_succesful;
713 int total = 0;
714 int succesful = 0;
715 int counter = 0;
716 #ifdef SUPPORT_PCRE8
717 pcre *re8;
718 pcre_extra *extra8;
719 int ovector8_1[32];
720 int ovector8_2[32];
721 int return_value8_1, return_value8_2;
722 int utf8 = 0, ucp8 = 0;
723 int disabled_flags8 = PCRE_BUG;
724 #endif
725 #ifdef SUPPORT_PCRE16
726 pcre *re16;
727 pcre_extra *extra16;
728 int ovector16_1[32];
729 int ovector16_2[32];
730 int return_value16_1, return_value16_2;
731 int utf16 = 0, ucp16 = 0;
732 int disabled_flags16 = PCRE_BUG;
733 int length16;
734 #endif
735
736 /* This test compares the behaviour of interpreter and JIT. Although disabling
737 utf or ucp may make tests fail, if the pcre_exec result is the SAME, it is
738 still considered successful from pcre_jit_test point of view. */
739
740 printf("Running JIT regression\n");
741
742 #ifdef SUPPORT_PCRE8
743 pcre_config(PCRE_CONFIG_UTF8, &utf8);
744 pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp8);
745 if (!utf8)
746 disabled_flags8 |= PCRE_UTF8;
747 if (!ucp8)
748 disabled_flags8 |= PCRE_UCP;
749 printf(" in 8 bit mode with utf8 %s and ucp %s:\n", utf8 ? "enabled" : "disabled", ucp8 ? "enabled" : "disabled");
750 #endif
751 #ifdef SUPPORT_PCRE16
752 pcre16_config(PCRE_CONFIG_UTF16, &utf16);
753 pcre16_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp16);
754 if (!utf16)
755 disabled_flags16 |= PCRE_UTF8;
756 if (!ucp16)
757 disabled_flags16 |= PCRE_UCP;
758 printf(" in 16 bit mode with utf16 %s and ucp %s:\n", utf16 ? "enabled" : "disabled", ucp16 ? "enabled" : "disabled");
759 #endif
760
761 while (current->pattern) {
762 /* printf("\nPattern: %s :\n", current->pattern); */
763 total++;
764
765 error = NULL;
766 #ifdef SUPPORT_PCRE8
767 re8 = pcre_compile(current->pattern,
768 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags8),
769 &error, &err_offs, NULL);
770
771 if (re8) {
772 error = NULL;
773 extra8 = pcre_study(re8, PCRE_STUDY_JIT_COMPILE, &error);
774 if (!extra8) {
775 printf("\n8 bit: Cannot study pattern: %s\n", current->pattern);
776 pcre_free(re8);
777 re8 = NULL;
778 }
779 if (!(extra8->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
780 printf("\n8 bit: JIT compiler does not support: %s\n", current->pattern);
781 pcre_free_study(extra8);
782 pcre_free(re8);
783 re8 = NULL;
784 }
785 } else if (utf8 && ucp8)
786 printf("\n8 bit: Cannot compile pattern: %s\n", current->pattern);
787 #endif
788 #ifdef SUPPORT_PCRE16
789 convert_utf8_to_utf16(current->pattern, regtest_buf, NULL, REGTEST_MAX_LENGTH);
790 re16 = pcre16_compile(regtest_buf,
791 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags16),
792 &error, &err_offs, NULL);
793 if (re16) {
794 error = NULL;
795 extra16 = pcre16_study(re16, PCRE_STUDY_JIT_COMPILE, &error);
796 if (!extra16) {
797 printf("\n16 bit: Cannot study pattern: %s\n", current->pattern);
798 pcre_free(re16);
799 re16 = NULL;
800 }
801 if (!(extra16->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
802 printf("\n16 bit: JIT compiler does not support: %s\n", current->pattern);
803 pcre_free_study(extra16);
804 pcre_free(re16);
805 re16 = NULL;
806 }
807 } else if (utf16 && ucp16)
808 printf("\n16 bit: Cannot compile pattern: %s\n", current->pattern);
809 #endif
810
811 counter++;
812 if ((counter & 0x3) != 0)
813 setstack(NULL, 1);
814
815 #ifdef SUPPORT_PCRE8
816 if (re8) {
817 setstack(extra8, 0);
818 for (i = 0; i < 32; ++i)
819 ovector8_1[i] = -2;
820 return_value8_1 = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset,
821 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector8_1, 32);
822
823 for (i = 0; i < 32; ++i)
824 ovector8_2[i] = -2;
825 return_value8_2 = pcre_exec(re8, NULL, current->input, strlen(current->input), current->start_offset,
826 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector8_2, 32);
827 }
828 #endif
829
830 #ifdef SUPPORT_PCRE16
831 if (re16) {
832 setstack(extra16, 0);
833 if (current->flags & PCRE_UTF8)
834 length16 = convert_utf8_to_utf16(current->input, regtest_buf, regtest_offsetmap, REGTEST_MAX_LENGTH);
835 else
836 length16 = copy_char8_to_char16(current->input, regtest_buf, REGTEST_MAX_LENGTH);
837
838 for (i = 0; i < 32; ++i)
839 ovector16_1[i] = -2;
840 return_value16_1 = pcre16_exec(re16, extra16, regtest_buf, length16, current->start_offset,
841 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector16_1, 32);
842
843 for (i = 0; i < 32; ++i)
844 ovector16_2[i] = -2;
845 return_value16_2 = pcre16_exec(re16, NULL, regtest_buf, length16, current->start_offset,
846 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector16_2, 32);
847 }
848 #endif
849
850 /* If PCRE_BUG is set, just run the test, but do not compare the results.
851 Segfaults can still be captured. */
852
853 is_succesful = 1;
854 if (!(current->flags & PCRE_BUG)) {
855 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
856 if ((current->flags & PCRE_UTF8) && utf8 && utf16) {
857 /* All results must be the same. */
858 if (return_value8_1 != return_value8_2 || return_value8_1 != return_value16_1 || return_value8_1 != return_value16_2) {
859 printf("\n8 and 16 bit: Return value differs(%d:%d:%d:%d): [%d] '%s' @ '%s'\n",
860 return_value8_1, return_value8_2, return_value16_1, return_value16_2,
861 total, current->pattern, current->input);
862 is_succesful = 0;
863 } else if (return_value8_1 >= 0) {
864 return_value8_1 *= 2;
865 /* Transform back the results. */
866 for (i = 0; i < return_value8_1; ++i) {
867 if (ovector16_1[i] >= 0)
868 ovector16_1[i] = regtest_offsetmap[ovector16_1[i]];
869 if (ovector16_2[i] >= 0)
870 ovector16_2[i] = regtest_offsetmap[ovector16_2[i]];
871 }
872
873 for (i = 0; i < return_value8_1; ++i)
874 if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
875 printf("\n8 and 16 bit: Ovector[%d] value differs(%d:%d:%d:%d): [%d] '%s' @ '%s' \n",
876 i, ovector8_1[i], ovector8_2[i], ovector16_1[i], ovector16_2[i],
877 total, current->pattern, current->input);
878 is_succesful = 0;
879 }
880 }
881 } else {
882 #endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */
883 /* Only the 8 bit and 16 bit results must be equal. */
884 #ifdef SUPPORT_PCRE8
885 if (return_value8_1 != return_value8_2) {
886 printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
887 return_value8_1, return_value8_2, total, current->pattern, current->input);
888 is_succesful = 0;
889 } else if (return_value8_1 >= 0) {
890 return_value8_1 *= 2;
891 for (i = 0; i < return_value8_1; ++i)
892 if (ovector8_1[i] != ovector8_2[i]) {
893 printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s' \n",
894 i, ovector8_1[i], ovector8_2[i], total, current->pattern, current->input);
895 is_succesful = 0;
896 }
897 }
898 #endif
899
900 #ifdef SUPPORT_PCRE16
901 if (return_value16_1 != return_value16_2) {
902 printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
903 return_value16_1, return_value16_2, total, current->pattern, current->input);
904 is_succesful = 0;
905 } else if (return_value16_1 >= 0) {
906 return_value16_1 *= 2;
907 for (i = 0; i < return_value16_1; ++i)
908 if (ovector16_1[i] != ovector16_2[i]) {
909 printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s' \n",
910 i, ovector16_1[i], ovector16_2[i], total, current->pattern, current->input);
911 is_succesful = 0;
912 }
913 }
914 #endif
915
916 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
917 }
918 #endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */
919 }
920
921 if (is_succesful)
922 succesful++;
923
924 #ifdef SUPPORT_PCRE8
925 if (re8) {
926 pcre_free_study(extra8);
927 pcre_free(re8);
928 }
929 #endif
930 #ifdef SUPPORT_PCRE16
931 if (re16) {
932 pcre16_free_study(extra16);
933 pcre_free(re16);
934 }
935 #endif
936
937 /* printf("[%d-%d]%s", ovector1[0], ovector1[1], (current->flags & PCRE_CASELESS) ? "C" : ""); */
938 printf(".");
939 fflush(stdout);
940 current++;
941 }
942
943 if (total == succesful) {
944 printf("\nAll JIT regression tests are successfully passed.\n");
945 return 0;
946 } else {
947 printf("\nSuccessful test ratio: %d%%\n", succesful * 100 / total);
948 return 1;
949 }
950 }
951
952
953 /* End of pcre_jit_test.c */

  ViewVC Help
Powered by ViewVC 1.1.5