/[pcre]/code/branches/pcre16/pcre_jit_test.c
ViewVC logotype

Contents of /code/branches/pcre16/pcre_jit_test.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 789 - (show annotations)
Wed Dec 7 14:36:26 2011 UTC (7 years, 9 months ago) by zherczeg
File MIME type: text/plain
File size: 40801 byte(s)
Error occurred while calculating annotation data.
UTF16 fixes: iterated character parsing, named references
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Main Library written by Philip Hazel
9 Copyright (c) 1997-2011 University of Cambridge
10
11 This JIT compiler regression test program was written by Zoltan Herczeg
12 Copyright (c) 2010-2011
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include <stdio.h>
48 #include <string.h>
49 #include "pcre.h"
50
51 #define PCRE_BUG 0x80000000
52
53 /*
54 Letter characters:
55 \xe6\x92\xad = 0x64ad = 25773 (kanji)
56 Non-letter characters:
57 \xc2\xa1 = 0xa1 = (Inverted Exclamation Mark)
58 \xf3\xa9\xb7\x80 = 0xe9dc0 = 957888
59 Newlines:
60 \xc2\x85 = 0x85 = 133 (NExt Line = NEL)
61 \xe2\x80\xa8 = 0x2028 = 8232 (Line Separator)
62 Othercase pairs:
63 \xc3\xa9 = 0xe9 = 233 (e')
64 \xc3\x89 = 0xc9 = 201 (E')
65 \xc3\xa1 = 0xe1 = 225 (a')
66 \xc3\x81 = 0xc1 = 193 (A')
67 \xc8\xba = 0x23a = 570
68 \xe2\xb1\xa5 = 0x2c65 = 11365
69 \xe1\xbd\xb8 = 0x1f78 = 8056
70 \xe1\xbf\xb8 = 0x1ff8 = 8184
71 \xf0\x90\x90\x80 = 0x10400 = 66560
72 \xf0\x90\x90\xa8 = 0x10428 = 66600
73 Mark property:
74 \xcc\x8d = 0x30d = 781
75 Special:
76 \xdf\xbf = 0x7ff = 2047 (highest 2 byte character)
77 \xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character)
78 \xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character)
79 \xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character)
80 \xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character)
81 */
82
83 static int regression_tests(void);
84
85 int main(void)
86 {
87 int jit = 0;
88 pcre_config(PCRE_CONFIG_JIT, &jit);
89 if (!jit) {
90 printf("JIT must be enabled to run pcre_jit_test\n");
91 return 1;
92 }
93 return regression_tests();
94 }
95
96 /* --------------------------------------------------------------------------------------- */
97
98 #if !(defined SUPPORT_PCRE8) && !(defined SUPPORT_PCRE16)
99 #error SUPPORT_PCRE8 or SUPPORT_PCRE16 must be defined
100 #endif
101
102 #define MUA (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
103 #define MUAP (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
104 #define CMUA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
105 #define CMUAP (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
106 #define MA (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
107 #define MAP (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
108 #define CMA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
109
110 struct regression_test_case {
111 int flags;
112 int start_offset;
113 const char *pattern;
114 const char *input;
115 };
116
117 static struct regression_test_case regression_test_cases[] = {
118 /* Constant strings. */
119 { MUA, 0, "AbC", "AbAbC" },
120 { MUA, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
121 { CMUA, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
122 { MA, 0, "[^a]", "aAbB" },
123 { CMA, 0, "[^m]", "mMnN" },
124 { MA, 0, "a[^b][^#]", "abacd" },
125 { CMA, 0, "A[^B][^E]", "abacd" },
126 { CMUA, 0, "[^x][^#]", "XxBll" },
127 { MUA, 0, "[^a]", "aaa\xc3\xa1#Ab" },
128 { CMUA, 0, "[^A]", "aA\xe6\x92\xad" },
129 { MUA, 0, "\\W(\\W)?\\w", "\r\n+bc" },
130 { MUA, 0, "\\W(\\W)?\\w", "\n\r+bc" },
131 { MUA, 0, "\\W(\\W)?\\w", "\r\r+bc" },
132 { MUA, 0, "\\W(\\W)?\\w", "\n\n+bc" },
133 { MUA, 0, "[axd]", "sAXd" },
134 { CMUA, 0, "[axd]", "sAXd" },
135 { CMUA, 0, "[^axd]", "DxA" },
136 { MUA, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
137 { MUA, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
138 { CMUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
139 { MUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
140 { MUA, 0, "[^a]", "\xc2\x80[]" },
141 { CMUA, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
142 { CMA, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
143 { PCRE_CASELESS, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
144 { PCRE_CASELESS, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
145 { PCRE_CASELESS, 0, "a1", "Aa1" },
146 { MA, 0, "\\Ca", "cda" },
147 { CMA, 0, "\\Ca", "CDA" },
148 { MA, 0, "\\Cx", "cda" },
149 { CMA, 0, "\\Cx", "CDA" },
150 { CMUAP, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
151 { CMUAP, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
152 { CMUAP, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
153 { CMUAP, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
154
155 /* Assertions. */
156 { MUA, 0, "\\b[^A]", "A_B#" },
157 { MA, 0, "\\b\\W", "\n*" },
158 { MUA, 0, "\\B[^,]\\b[^s]\\b", "#X" },
159 { MAP, 0, "\\B", "_\xa1" },
160 { MAP, 0, "\\b_\\b[,A]\\B", "_," },
161 { MUAP, 0, "\\b", "\xe6\x92\xad!" },
162 { MUAP, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
163 { MUAP, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
164 { MUAP, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
165 { MUA, 0, "\\b.", "\xcd\xbe" },
166 { CMUAP, 0, "\\By", "\xf0\x90\x90\xa8y" },
167 { MA, 0, "\\R^", "\n" },
168 { MA, 1, "^", "\n" },
169 { 0, 0, "^ab", "ab" },
170 { 0, 0, "^ab", "aab" },
171 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "^a", "\r\raa\n\naa\r\naa" },
172 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
173 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^-", "a--b--\x85--" },
174 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xe2\x80\xa8--" },
175 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xc2\x85--" },
176 { 0, 0, "ab$", "ab" },
177 { 0, 0, "ab$", "ab\r\n" },
178 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "a$", "\r\raa\n\naa\r\naa" },
179 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aaa" },
180 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "#$", "#\xc2\x85###\r#" },
181 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "#$", "#\xe2\x80\xa9" },
182 { PCRE_NOTBOL | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },
183 { PCRE_NOTBOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },
184 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },
185 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0, "a$", "aa\r\n" },
186 { PCRE_UTF8 | PCRE_DOLLAR_ENDONLY | PCRE_NEWLINE_ANY, 0, "\\p{Any}{2,}$", "aa\r\n" },
187 { PCRE_NOTEOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },
188 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa" },
189 { PCRE_NEWLINE_CR | PCRE_UTF8, 0, "a\\Z", "aaa\r" },
190 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa\n" },
191 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r" },
192 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
193 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r\n" },
194 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
195 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
196 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
197 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
198 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
199 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
200 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
201 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
202 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
203 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xc2\x85" },
204 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
205 { MA, 0, "\\Aa", "aaa" },
206 { MA, 1, "\\Aa", "aaa" },
207 { MA, 1, "\\Ga", "aaa" },
208 { MA, 1, "\\Ga", "aba" },
209 { MA, 0, "a\\z", "aaa" },
210 { MA, 0, "a\\z", "aab" },
211
212 /* Brackets. */
213 { MUA, 0, "(ab|bb|cd)", "bacde" },
214 { MUA, 0, "(?:ab|a)(bc|c)", "ababc" },
215 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
216 { CMUA, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
217 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
218 { MUA, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
219
220 /* Greedy and non-greedy ? operators. */
221 { MUA, 0, "(?:a)?a", "laab" },
222 { CMUA, 0, "(A)?A", "llaab" },
223 { MUA, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
224 { MUA, 0, "(a)?a", "manm" },
225 { CMUA, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
226 { MUA, 0, "(a|b)?\?d((?:e)?)", "abcde" },
227 { MUA, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
228
229 /* Greedy and non-greedy + operators */
230 { MUA, 0, "(aa)+aa", "aaaaaaa" },
231 { MUA, 0, "(aa)+?aa", "aaaaaaa" },
232 { MUA, 0, "(?:aba|ab|a)+l", "ababamababal" },
233 { MUA, 0, "(?:aba|ab|a)+?l", "ababamababal" },
234 { MUA, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
235 { MUA, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
236 { MUA, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
237
238 /* Greedy and non-greedy * operators */
239 { CMUA, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
240 { MUA, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
241 { MUA, 0, "(aa|ab)*ab", "aaabaaab" },
242 { CMUA, 0, "(aa|Ab)*?aB", "aaabaaab" },
243 { MUA, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
244 { MUA, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
245 { MA, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
246 { MA, 0, "((?:a|)*){0}a", "a" },
247
248 /* Combining ? + * operators */
249 { MUA, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
250 { MUA, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
251 { MUA, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
252 { MUA, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
253 { MUA, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
254
255 /* Single character iterators. */
256 { MUA, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
257 { MUA, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
258 { MUA, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
259 { MUA, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
260 { MUA, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
261 { MUA, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
262 { MUA, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
263 { MUA, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
264 { MUA, 0, "(ba{2})+c", "baabaaabacbaabaac" },
265 { MUA, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
266 { MUA, 0, "(a?+[^b])+", "babaacacb" },
267 { MUA, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
268 { CMUA, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
269 { CMUA, 0, "[c-f]+k", "DemmFke" },
270 { MUA, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
271 { MUA, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
272 { CMUA, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
273 { CMUA, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
274 { CMUA, 0, "[ace]{3,}", "AcbDAcEEcEd" },
275 { CMUA, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
276 { MUA, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
277 { CMUA, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
278 { MUA, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
279 { MUA, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
280 { MUA, 0, "\\b\\w+\\B", "x,a_cd" },
281 { MUAP, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
282 { CMUA, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
283 { CMUAP, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" },
284
285 /* Basic character sets. */
286 { MUA, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
287 { MUA, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
288 { MUA, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
289 { MUA, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
290 { MUA, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
291 { MUA, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
292
293 /* Unicode properties. */
294 { MUAP, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
295 { MUAP, 0, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
296 { MUAP, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
297 { MUAP, 0, "[\\P{Any}]", "abc" },
298 { MUAP, 0, "[^\\p{Any}]", "abc" },
299 { MUAP, 0, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
300 { MUAP, 0, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
301 { MUAP, 0, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
302 { MUAP, 0, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
303 { MUAP, 0, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
304 { MUAP, 0, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
305 { MUAP, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
306 { CMUAP, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
307 { MUAP, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
308 { MUAP, 0, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
309 { MUA, 0, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
310 { CMUAP, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
311 { MUAP, 0, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
312 { MUAP, 0, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
313 { PCRE_UCP, 0, "[a-b\\s]{2,5}[^a]", "AB baaa" },
314
315 /* Possible empty brackets. */
316 { MUA, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
317 { MUA, 0, "(|ab||bc|a)+d", "abcxabcabd" },
318 { MUA, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
319 { MUA, 0, "(|ab||bc|a)*d", "abcxabcabd" },
320 { MUA, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
321 { MUA, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
322 { MUA, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
323 { MUA, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
324 { MUA, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
325 { MUA, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
326
327 /* Start offset. */
328 { MUA, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
329 { MUA, 4, "(\\w\\W\\w)+", "ab#d" },
330 { MUA, 2, "(\\w\\W\\w)+", "ab#d" },
331 { MUA, 1, "(\\w\\W\\w)+", "ab#d" },
332
333 /* Newline. */
334 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
335 { PCRE_MULTILINE | PCRE_NEWLINE_CR, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
336 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{1,3}[^#]", "\r\n##...." },
337
338 /* Any character except newline or any newline. */
339 { PCRE_NEWLINE_CRLF, 0, ".", "\r" },
340 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
341 { PCRE_NEWLINE_ANYCRLF, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
342 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
343 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
344 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.).", "\xe2\x80\xa8\nb\r" },
345 { PCRE_NEWLINE_ANY, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
346 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
347 { PCRE_BSR_ANYCRLF, 0, "\\R", "\r" },
348 { PCRE_BSR_ANYCRLF, 0, "\\R", "\x85#\r\n#" },
349 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\xe2\x80\xa8#c" },
350 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\r\nc" },
351 { PCRE_NEWLINE_CRLF | PCRE_BSR_UNICODE | PCRE_UTF8, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
352 { MUA, 0, "\\R+", "ab" },
353 { MUA, 0, "\\R+", "ab\r\n\r" },
354 { MUA, 0, "\\R*", "ab\r\n\r" },
355 { MUA, 0, "\\R*", "\r\n\r" },
356 { MUA, 0, "\\R{2,4}", "\r\nab\r\r" },
357 { MUA, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
358 { MUA, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
359 { MUA, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
360 { MUA, 0, "\\R+\\R\\R", "\r\n\r\n" },
361 { MUA, 0, "\\R+\\R\\R", "\r\r\r" },
362 { MUA, 0, "\\R*\\R\\R", "\n\r" },
363 { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r" },
364 { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
365
366 /* Atomic groups (no fallback from "next" direction). */
367 { MUA, 0, "(?>ab)ab", "bab" },
368 { MUA, 0, "(?>(ab))ab", "bab" },
369 { MUA, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
370 "bababcdedefgheijijklmlmnop" },
371 { MUA, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
372 { MUA, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
373 { MUA, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
374 { MUA, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
375 { MUA, 0, "((?>a|)+?)b", "aaacaaab" },
376 { MUA, 0, "(?>x|)*$", "aaa" },
377 { MUA, 0, "(?>(x)|)*$", "aaa" },
378 { MUA, 0, "(?>x|())*$", "aaa" },
379 { MUA, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
380 { MUA, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
381 { MUA, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
382 { MUA, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
383 { MUA, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
384 { MUA, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
385 { MUA, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
386 { MUA, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
387 { MUA, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
388 { MUA, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
389 { MUA, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
390 { MUA, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
391 { MUA, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
392 { MUA, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
393 { CMA, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
394 { MUA, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
395 { MUA, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
396 { MUA, 0, "\\X", "\xcc\x8d\xcc\x8d" },
397 { MUA, 0, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
398 { MUA, 0, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
399 { MUA, 0, "\\X{2,4}", "abcdef" },
400 { MUA, 0, "\\X{2,4}?", "abcdef" },
401 { MUA, 0, "\\X{2,4}..", "#\xcc\x8d##" },
402 { MUA, 0, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
403 { MUA, 0, "(c(ab)?+ab)+", "cabcababcab" },
404 { MUA, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
405
406 /* Possessive quantifiers. */
407 { MUA, 0, "(?:a|b)++m", "mababbaaxababbaam" },
408 { MUA, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
409 { MUA, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
410 { MUA, 0, "(a|b)++m", "mababbaaxababbaam" },
411 { MUA, 0, "(a|b)*+m", "mababbaaxababbaam" },
412 { MUA, 0, "(a|b)*+m", "ababbaaxababbaam" },
413 { MUA, 0, "(a|b(*ACCEPT))++m", "maaxab" },
414 { MUA, 0, "(?:b*)++m", "bxbbxbbbxm" },
415 { MUA, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
416 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxm" },
417 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
418 { MUA, 0, "(b*)++m", "bxbbxbbbxm" },
419 { MUA, 0, "(b*)++m", "bxbbxbbbxbbm" },
420 { MUA, 0, "(b*)*+m", "bxbbxbbbxm" },
421 { MUA, 0, "(b*)*+m", "bxbbxbbbxbbm" },
422 { MUA, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
423 { MUA, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
424 { MUA, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
425 { MUA, 0, "(a|(b))++m", "mababbaaxababbaam" },
426 { MUA, 0, "((a)|b)*+m", "mababbaaxababbaam" },
427 { MUA, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
428 { MUA, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
429 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxm" },
430 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
431 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
432 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
433 { MUA, 0, "((b*))++m", "bxbbxbbbxm" },
434 { MUA, 0, "((b*))++m", "bxbbxbbbxbbm" },
435 { MUA, 0, "((b*))*+m", "bxbbxbbbxm" },
436 { MUA, 0, "((b*))*+m", "bxbbxbbbxbbm" },
437 { MUA, 0, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
438 { MUA, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
439 { MUA, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
440 { MUA, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
441 { MUA, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
442
443 /* Back references. */
444 { MUA, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
445 { CMUA, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
446 { CMA, 0, "(a{2,4})\\1", "AaAaaAaA" },
447 { MUA, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
448 { MUA, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
449 { MUA, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
450 { MUA, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
451 { MUA, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
452 { MUA, 0, "(?:(aa)|b)\\1?b", "bb" },
453 { CMUA, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
454 { MUA, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
455 { CMUA, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
456 { MUA, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
457 { CMA, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
458 { MUA, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
459 { MUA, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
460 { MA, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
461 { MUAP, 0, "(\\P{N})\\1{2,}", ".www." },
462 { MUAP, 0, "(\\P{N})\\1{0,2}", "wwwww." },
463 { MUAP, 0, "(\\P{N})\\1{1,2}ww", "wwww" },
464 { MUAP, 0, "(\\P{N})\\1{1,2}ww", "wwwww" },
465 { PCRE_UCP, 0, "(\\P{N})\\1{2,}", ".www." },
466 { CMUAP, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" },
467
468 /* Assertions. */
469 { MUA, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
470 { MUA, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
471 { MUA, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
472 { MUA, 0, "(?<=aaa|aa|a)a", "aaa" },
473 { MUA, 2, "(?<=aaa|aa|a)a", "aaa" },
474 { MA, 0, "(?<=aaa|aa|a)a", "aaa" },
475 { MA, 2, "(?<=aaa|aa|a)a", "aaa" },
476 { MUA, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
477 { MUA, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
478 { MUA, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
479 { MUA, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
480 { MUA, 0, "((?(?=(a))a)+k)", "bbak" },
481 { MUA, 0, "((?(?=a)a)+k)", "bbak" },
482 { MUA, 0, "(?=(?>(a))m)amk", "a k" },
483 { MUA, 0, "(?!(?>(a))m)amk", "a k" },
484 { MUA, 0, "(?>(?=(a))am)amk", "a k" },
485 { MUA, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
486 { MUA, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
487 { MUA, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
488 { MUA, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
489 { MUA, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
490 { MUA, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
491 { MUA, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
492 { MUA, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
493
494 /* Not empty, ACCEPT, FAIL */
495 { MUA | PCRE_NOTEMPTY, 0, "a*", "bcx" },
496 { MUA | PCRE_NOTEMPTY, 0, "a*", "bcaad" },
497 { MUA | PCRE_NOTEMPTY, 0, "a*?", "bcaad" },
498 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
499 { MUA, 0, "a(*ACCEPT)b", "ab" },
500 { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcx" },
501 { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
502 { MUA | PCRE_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
503 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcx" },
504 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
505 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
506 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
507 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "" },
508 { MUA, 0, "((a(*ACCEPT)b))", "ab" },
509 { MUA, 0, "(a(*FAIL)a|a)", "aaa" },
510 { MUA, 0, "(?=ab(*ACCEPT)b)a", "ab" },
511 { MUA, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
512 { MUA, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
513 { MUA | PCRE_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
514
515 /* Conditional blocks. */
516 { MUA, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
517 { MUA, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
518 { MUA, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
519 { MUA, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
520 { MUA, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
521 { MUA, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
522 { MUA, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
523 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
524 { MUA | PCRE_BUG, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
525 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
526 { MUA, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
527 { MUA, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
528 { MUA, 0, "(?(?=a)ab)", "a" },
529 { MUA, 0, "(?(?<!b)c)", "b" },
530 { MUA, 0, "(?(DEFINE)a(b))", "a" },
531 { MUA, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
532 { MUA, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
533 { MUA, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
534 { MUA, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
535 { MUA, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
536 { MUA, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
537 { MUA, 0, "(c)?\?(?(1)a|b)", "cbb" },
538 { MUA | PCRE_BUG, 0, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
539 { MUA, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
540 { MUA, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
541 { MUA, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
542 { MUA | PCRE_BUG, 0, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
543 { MUA, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
544 { MUA, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
545 { MUA, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
546 { MUA, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
547 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)*l", "bc ddd abccabccl" },
548 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+?dd", "bcabcacdb bdddd" },
549 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+l", "ababccddabdbccd abcccl" },
550
551 /* Set start of match. */
552 { MUA, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
553 { MUA, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
554 { MUA, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
555 { MUA | PCRE_NOTEMPTY, 0, "a\\K(*ACCEPT)b", "aa" },
556 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
557
558 /* First line. */
559 { MUA | PCRE_FIRSTLINE, 0, "\\p{Any}a", "bb\naaa" },
560 { MUA | PCRE_FIRSTLINE, 0, "\\p{Any}a", "bb\r\naaa" },
561 { MUA | PCRE_FIRSTLINE, 0, "(?<=a)", "a" },
562 { MUA | PCRE_FIRSTLINE, 0, "[^a][^b]", "ab" },
563 { MUA | PCRE_FIRSTLINE, 0, "a", "\na" },
564 { MUA | PCRE_FIRSTLINE, 0, "[abc]", "\na" },
565 { MUA | PCRE_FIRSTLINE, 0, "^a", "\na" },
566 { MUA | PCRE_FIRSTLINE, 0, "^(?<=\n)", "\na" },
567 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0, "#", "\xc2\x85#" },
568 { PCRE_MULTILINE | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0, "#", "\x85#" },
569 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0, "^#", "\xe2\x80\xa8#" },
570 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "\\p{Any}", "\r\na" },
571 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, ".", "\r" },
572 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "a", "\ra" },
573 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "ba", "bbb\r\nba" },
574 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "\\p{Any}{4}|a", "\r\na" },
575 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 1, ".", "\r\n" },
576
577 /* Recurse. */
578 { MUA, 0, "(a)(?1)", "aa" },
579 { MUA, 0, "((a))(?1)", "aa" },
580 { MUA, 0, "(b|a)(?1)", "aa" },
581 { MUA, 0, "(b|(a))(?1)", "aa" },
582 { MUA, 0, "((a)(b)(?:a*))(?1)", "aba" },
583 { MUA, 0, "((a)(b)(?:a*))(?1)", "abab" },
584 { MUA, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
585 { MUA, 0, "((?2)b|(a)){2}(?1)", "aabab" },
586 { MUA, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
587 { MUA, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
588 { MUA, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
589 { MUA, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
590 { MUA, 0, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
591 { MUA, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
592 { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
593 { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
594 { MUA, 0, "b|<(?R)*>", "<<b>" },
595 { MUA, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
596 { MUA, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
597 { MUA, 0, "(a)((?(R)a|b))(?2)", "aabbabaa" },
598 { MUA, 0, "(a)((?(R2)a|b))(?2)", "aabbabaa" },
599 { MUA, 0, "(a)((?(R1)a|b))(?2)", "ababba" },
600 { MUA, 0, "(?(R0)aa|bb(?R))", "abba aabb bbaa" },
601 { MUA, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" },
602 { MUA, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" },
603
604 /* Deep recursion. */
605 { MUA, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
606 { MUA, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
607 { MUA, 0, "((a?)+)+b", "aaaaaaaaaaaaa b" },
608
609 /* Deep recursion: Stack limit reached. */
610 { MA, 0, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
611 { MA, 0, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
612 { MA, 0, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
613 { MA, 0, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
614 { MA, 0, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
615
616 { 0, 0, NULL, NULL }
617 };
618
619 pcre_jit_stack* callback(void *arg)
620 {
621 return (pcre_jit_stack *)arg;
622 }
623
624 static void setstack(pcre_extra *extra, int alloc_again)
625 {
626 static pcre_jit_stack *stack;
627
628 if (alloc_again) {
629 if (stack)
630 pcre_jit_stack_free(stack);
631 stack = pcre_jit_stack_alloc(1, 1024 * 1024);
632 }
633 /* Extra can be NULL. */
634 pcre_assign_jit_stack(extra, callback, stack);
635 }
636
637 #ifdef SUPPORT_PCRE16
638
639 static int convert_utf8_to_utf16(const char *input, PCRE_SCHAR16 *output, int *offsetmap, int max_length)
640 {
641 unsigned char *iptr = (unsigned char*)input;
642 unsigned short *optr = (unsigned short *)output;
643 unsigned int c;
644
645 if (max_length == 0)
646 return 0;
647
648 while (*iptr && max_length > 1) {
649 c = 0;
650 if (offsetmap)
651 *offsetmap++ = (int)(iptr - (unsigned char*)input);
652
653 if (!(*iptr & 0x80))
654 c = *iptr++;
655 else if (!(*iptr & 0x20)) {
656 c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
657 iptr += 2;
658 } else if (!(*iptr & 0x10)) {
659 c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
660 iptr += 3;
661 } else if (!(*iptr & 0x08)) {
662 c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
663 iptr += 4;
664 }
665
666 if (c < 65536) {
667 *optr++ = c;
668 max_length--;
669 } else if (max_length <= 2) {
670 *optr = '\0';
671 return (int)(optr - (unsigned short *)output);
672 } else {
673 c -= 0x10000;
674 *optr++ = 0xd800 | ((c >> 10) & 0x3ff);
675 *optr++ = 0xdc00 | (c & 0x3ff);
676 max_length -= 2;
677 if (offsetmap)
678 offsetmap++;
679 }
680 }
681 if (offsetmap)
682 *offsetmap = (int)(iptr - (unsigned char*)input);
683 *optr = '\0';
684 return (int)(optr - (unsigned short *)output);
685 }
686
687 static int copy_char8_to_char16(const char *input, PCRE_SCHAR16 *output, int max_length)
688 {
689 unsigned char *iptr = (unsigned char*)input;
690 unsigned short *optr = (unsigned short *)output;
691
692 if (max_length == 0)
693 return 0;
694
695 while (*iptr && max_length > 1) {
696 *optr++ = *iptr++;
697 max_length--;
698 }
699 *optr = '\0';
700 return (int)(optr - (unsigned short *)output);
701 }
702
703 #define REGTEST_MAX_LENGTH 4096
704 static PCRE_SCHAR16 regtest_buf[REGTEST_MAX_LENGTH];
705 static int regtest_offsetmap[REGTEST_MAX_LENGTH];
706
707 #endif /* SUPPORT_PCRE16 */
708
709 static int regression_tests(void)
710 {
711 struct regression_test_case *current = regression_test_cases;
712 const char *error;
713 int i, err_offs, is_succesful;
714 int total = 0;
715 int succesful = 0;
716 int counter = 0;
717 #ifdef SUPPORT_PCRE8
718 pcre *re8;
719 pcre_extra *extra8;
720 int ovector8_1[32];
721 int ovector8_2[32];
722 int return_value8_1, return_value8_2;
723 int utf8 = 0, ucp8 = 0;
724 int disabled_flags8 = PCRE_BUG;
725 #endif
726 #ifdef SUPPORT_PCRE16
727 pcre *re16;
728 pcre_extra *extra16;
729 int ovector16_1[32];
730 int ovector16_2[32];
731 int return_value16_1, return_value16_2;
732 int utf16 = 0, ucp16 = 0;
733 int disabled_flags16 = PCRE_BUG;
734 int length16;
735 #endif
736
737 /* This test compares the behaviour of interpreter and JIT. Although disabling
738 utf or ucp may make tests fail, if the pcre_exec result is the SAME, it is
739 still considered successful from pcre_jit_test point of view. */
740
741 printf("Running JIT regression\n");
742
743 #ifdef SUPPORT_PCRE8
744 pcre_config(PCRE_CONFIG_UTF8, &utf8);
745 pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp8);
746 if (!utf8)
747 disabled_flags8 |= PCRE_UTF8;
748 if (!ucp8)
749 disabled_flags8 |= PCRE_UCP;
750 printf(" in 8 bit mode with utf8 %s and ucp %s:\n", utf8 ? "enabled" : "disabled", ucp8 ? "enabled" : "disabled");
751 #endif
752 #ifdef SUPPORT_PCRE16
753 pcre16_config(PCRE_CONFIG_UTF16, &utf16);
754 pcre16_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp16);
755 if (!utf16)
756 disabled_flags16 |= PCRE_UTF8;
757 if (!ucp16)
758 disabled_flags16 |= PCRE_UCP;
759 printf(" in 16 bit mode with utf16 %s and ucp %s:\n", utf16 ? "enabled" : "disabled", ucp16 ? "enabled" : "disabled");
760 #endif
761
762 while (current->pattern) {
763 /* printf("\nPattern: %s :\n", current->pattern); */
764 total++;
765
766 error = NULL;
767 #ifdef SUPPORT_PCRE8
768 re8 = pcre_compile(current->pattern,
769 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags8),
770 &error, &err_offs, NULL);
771
772 extra8 = NULL;
773 if (re8) {
774 error = NULL;
775 extra8 = pcre_study(re8, PCRE_STUDY_JIT_COMPILE, &error);
776 if (!extra8) {
777 printf("\n8 bit: Cannot study pattern: %s\n", current->pattern);
778 pcre_free(re8);
779 re8 = NULL;
780 }
781 if (!(extra8->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
782 printf("\n8 bit: JIT compiler does not support: %s\n", current->pattern);
783 pcre_free_study(extra8);
784 pcre_free(re8);
785 re8 = NULL;
786 }
787 } else if (utf8 && ucp8)
788 printf("\n8 bit: Cannot compile pattern: %s\n", current->pattern);
789 #endif
790 #ifdef SUPPORT_PCRE16
791 if (current->flags & PCRE_UTF8)
792 convert_utf8_to_utf16(current->pattern, regtest_buf, NULL, REGTEST_MAX_LENGTH);
793 else
794 copy_char8_to_char16(current->pattern, regtest_buf, REGTEST_MAX_LENGTH);
795 re16 = pcre16_compile(regtest_buf,
796 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags16),
797 &error, &err_offs, NULL);
798
799 extra16 = NULL;
800 if (re16) {
801 error = NULL;
802 extra16 = pcre16_study(re16, PCRE_STUDY_JIT_COMPILE, &error);
803 if (!extra16) {
804 printf("\n16 bit: Cannot study pattern: %s\n", current->pattern);
805 pcre_free(re16);
806 re16 = NULL;
807 }
808 if (!(extra16->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
809 printf("\n16 bit: JIT compiler does not support: %s\n", current->pattern);
810 pcre_free_study(extra16);
811 pcre_free(re16);
812 re16 = NULL;
813 }
814 } else if (utf16 && ucp16)
815 printf("\n16 bit: Cannot compile pattern: %s\n", current->pattern);
816 #endif
817
818 counter++;
819 if ((counter & 0x3) != 0)
820 setstack(NULL, 1);
821
822 #ifdef SUPPORT_PCRE8
823 return_value8_1 = -1000;
824 return_value8_2 = -1000;
825 if (re8) {
826 setstack(extra8, 0);
827 for (i = 0; i < 32; ++i)
828 ovector8_1[i] = -2;
829 return_value8_1 = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset,
830 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector8_1, 32);
831
832 for (i = 0; i < 32; ++i)
833 ovector8_2[i] = -2;
834 return_value8_2 = pcre_exec(re8, NULL, current->input, strlen(current->input), current->start_offset,
835 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector8_2, 32);
836 }
837 #endif
838
839 #ifdef SUPPORT_PCRE16
840 return_value16_1 = -1000;
841 return_value16_2 = -1000;
842 if (re16) {
843 setstack(extra16, 0);
844 if (current->flags & PCRE_UTF8)
845 length16 = convert_utf8_to_utf16(current->input, regtest_buf, regtest_offsetmap, REGTEST_MAX_LENGTH);
846 else
847 length16 = copy_char8_to_char16(current->input, regtest_buf, REGTEST_MAX_LENGTH);
848
849 for (i = 0; i < 32; ++i)
850 ovector16_1[i] = -2;
851 return_value16_1 = pcre16_exec(re16, extra16, regtest_buf, length16, current->start_offset,
852 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector16_1, 32);
853
854 for (i = 0; i < 32; ++i)
855 ovector16_2[i] = -2;
856 return_value16_2 = pcre16_exec(re16, NULL, regtest_buf, length16, current->start_offset,
857 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector16_2, 32);
858 }
859 #endif
860
861 /* If PCRE_BUG is set, just run the test, but do not compare the results.
862 Segfaults can still be captured. */
863
864 is_succesful = 1;
865 if (!(current->flags & PCRE_BUG)) {
866 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
867 if (utf8 == utf16) {
868 /* All results must be the same. */
869 if (return_value8_1 != return_value8_2 || return_value8_1 != return_value16_1 || return_value8_1 != return_value16_2) {
870 printf("\n8 and 16 bit: Return value differs(%d:%d:%d:%d): [%d] '%s' @ '%s'\n",
871 return_value8_1, return_value8_2, return_value16_1, return_value16_2,
872 total, current->pattern, current->input);
873 is_succesful = 0;
874 } else if (return_value8_1 >= 0) {
875 return_value8_1 *= 2;
876 /* Transform back the results. */
877 if (current->flags & PCRE_UTF8) {
878 for (i = 0; i < return_value8_1; ++i) {
879 if (ovector16_1[i] >= 0)
880 ovector16_1[i] = regtest_offsetmap[ovector16_1[i]];
881 if (ovector16_2[i] >= 0)
882 ovector16_2[i] = regtest_offsetmap[ovector16_2[i]];
883 }
884 }
885
886 for (i = 0; i < return_value8_1; ++i)
887 if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
888 printf("\n8 and 16 bit: Ovector[%d] value differs(%d:%d:%d:%d): [%d] '%s' @ '%s' \n",
889 i, ovector8_1[i], ovector8_2[i], ovector16_1[i], ovector16_2[i],
890 total, current->pattern, current->input);
891 is_succesful = 0;
892 }
893 }
894 } else {
895 #endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */
896 /* Only the 8 bit and 16 bit results must be equal. */
897 #ifdef SUPPORT_PCRE8
898 if (return_value8_1 != return_value8_2) {
899 printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
900 return_value8_1, return_value8_2, total, current->pattern, current->input);
901 is_succesful = 0;
902 } else if (return_value8_1 >= 0) {
903 return_value8_1 *= 2;
904 for (i = 0; i < return_value8_1; ++i)
905 if (ovector8_1[i] != ovector8_2[i]) {
906 printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s' \n",
907 i, ovector8_1[i], ovector8_2[i], total, current->pattern, current->input);
908 is_succesful = 0;
909 }
910 }
911 #endif
912
913 #ifdef SUPPORT_PCRE16
914 if (return_value16_1 != return_value16_2) {
915 printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
916 return_value16_1, return_value16_2, total, current->pattern, current->input);
917 is_succesful = 0;
918 } else if (return_value16_1 >= 0) {
919 return_value16_1 *= 2;
920 for (i = 0; i < return_value16_1; ++i)
921 if (ovector16_1[i] != ovector16_2[i]) {
922 printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s' \n",
923 i, ovector16_1[i], ovector16_2[i], total, current->pattern, current->input);
924 is_succesful = 0;
925 }
926 }
927 #endif
928
929 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
930 }
931 #endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */
932 }
933
934 if (is_succesful)
935 succesful++;
936
937 #ifdef SUPPORT_PCRE8
938 if (re8) {
939 pcre_free_study(extra8);
940 pcre_free(re8);
941 }
942 #endif
943 #ifdef SUPPORT_PCRE16
944 if (re16) {
945 pcre16_free_study(extra16);
946 pcre_free(re16);
947 }
948 #endif
949
950 /* printf("[%d-%d]%s", ovector1[0], ovector1[1], (current->flags & PCRE_CASELESS) ? "C" : ""); */
951 printf(".");
952 fflush(stdout);
953 current++;
954 }
955
956 if (total == succesful) {
957 printf("\nAll JIT regression tests are successfully passed.\n");
958 return 0;
959 } else {
960 printf("\nSuccessful test ratio: %d%%\n", succesful * 100 / total);
961 return 1;
962 }
963 }
964
965
966 /* End of pcre_jit_test.c */

  ViewVC Help
Powered by ViewVC 1.1.5