/[pcre]/code/trunk/pcre_jit_test.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_test.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1012 - (show annotations)
Sat Aug 25 15:34:13 2012 UTC (7 years, 1 month ago) by zherczeg
File MIME type: text/plain
File size: 53693 byte(s)
JIT support for extended grapheme cluster.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Main Library written by Philip Hazel
9 Copyright (c) 1997-2012 University of Cambridge
10
11 This JIT compiler regression test program was written by Zoltan Herczeg
12 Copyright (c) 2010-2012
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include <stdio.h>
48 #include <string.h>
49 #include "pcre.h"
50
51 #define PCRE_BUG 0x80000000
52
53 /*
54 Letter characters:
55 \xe6\x92\xad = 0x64ad = 25773 (kanji)
56 Non-letter characters:
57 \xc2\xa1 = 0xa1 = (Inverted Exclamation Mark)
58 \xf3\xa9\xb7\x80 = 0xe9dc0 = 957888
59 \xed\xa0\x80 = 55296 = 0xd800 (Invalid UTF character)
60 \xed\xb0\x80 = 56320 = 0xdc00 (Invalid UTF character)
61 Newlines:
62 \xc2\x85 = 0x85 = 133 (NExt Line = NEL)
63 \xe2\x80\xa8 = 0x2028 = 8232 (Line Separator)
64 Othercase pairs:
65 \xc3\xa9 = 0xe9 = 233 (e')
66 \xc3\x89 = 0xc9 = 201 (E')
67 \xc3\xa1 = 0xe1 = 225 (a')
68 \xc3\x81 = 0xc1 = 193 (A')
69 \xc8\xba = 0x23a = 570
70 \xe2\xb1\xa5 = 0x2c65 = 11365
71 \xe1\xbd\xb8 = 0x1f78 = 8056
72 \xe1\xbf\xb8 = 0x1ff8 = 8184
73 \xf0\x90\x90\x80 = 0x10400 = 66560
74 \xf0\x90\x90\xa8 = 0x10428 = 66600
75 Mark property:
76 \xcc\x8d = 0x30d = 781
77 Special:
78 \xdf\xbf = 0x7ff = 2047 (highest 2 byte character)
79 \xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character)
80 \xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character)
81 \xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character)
82 \xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character)
83 */
84
85 static int regression_tests(void);
86
87 int main(void)
88 {
89 int jit = 0;
90 #ifdef SUPPORT_PCRE8
91 pcre_config(PCRE_CONFIG_JIT, &jit);
92 #else
93 pcre16_config(PCRE_CONFIG_JIT, &jit);
94 #endif
95 if (!jit) {
96 printf("JIT must be enabled to run pcre_jit_test\n");
97 return 1;
98 }
99 return regression_tests();
100 }
101
102 /* --------------------------------------------------------------------------------------- */
103
104 #if !(defined SUPPORT_PCRE8) && !(defined SUPPORT_PCRE16)
105 #error SUPPORT_PCRE8 or SUPPORT_PCRE16 must be defined
106 #endif
107
108 #define MUA (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
109 #define MUAP (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
110 #define CMUA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
111 #define CMUAP (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
112 #define MA (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
113 #define MAP (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
114 #define CMA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
115
116 #define OFFSET_MASK 0x00ffff
117 #define F_NO8 0x010000
118 #define F_NO16 0x020000
119 #define F_NOMATCH 0x040000
120 #define F_DIFF 0x080000
121 #define F_FORCECONV 0x100000
122 #define F_PROPERTY 0x200000
123
124 struct regression_test_case {
125 int flags;
126 int start_offset;
127 const char *pattern;
128 const char *input;
129 };
130
131 static struct regression_test_case regression_test_cases[] = {
132 /* Constant strings. */
133 { MUA, 0, "AbC", "AbAbC" },
134 { MUA, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
135 { CMUA, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
136 { MA, 0, "[^a]", "aAbB" },
137 { CMA, 0, "[^m]", "mMnN" },
138 { MA, 0, "a[^b][^#]", "abacd" },
139 { CMA, 0, "A[^B][^E]", "abacd" },
140 { CMUA, 0, "[^x][^#]", "XxBll" },
141 { MUA, 0, "[^a]", "aaa\xc3\xa1#Ab" },
142 { CMUA, 0, "[^A]", "aA\xe6\x92\xad" },
143 { MUA, 0, "\\W(\\W)?\\w", "\r\n+bc" },
144 { MUA, 0, "\\W(\\W)?\\w", "\n\r+bc" },
145 { MUA, 0, "\\W(\\W)?\\w", "\r\r+bc" },
146 { MUA, 0, "\\W(\\W)?\\w", "\n\n+bc" },
147 { MUA, 0, "[axd]", "sAXd" },
148 { CMUA, 0, "[axd]", "sAXd" },
149 { CMUA, 0 | F_NOMATCH, "[^axd]", "DxA" },
150 { MUA, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
151 { MUA, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
152 { CMUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
153 { MUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
154 { MUA, 0, "[^a]", "\xc2\x80[]" },
155 { CMUA, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
156 { CMA, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
157 { PCRE_CASELESS, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
158 { PCRE_CASELESS, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
159 { PCRE_CASELESS, 0, "a1", "Aa1" },
160 { MA, 0, "\\Ca", "cda" },
161 { CMA, 0, "\\Ca", "CDA" },
162 { MA, 0 | F_NOMATCH, "\\Cx", "cda" },
163 { CMA, 0 | F_NOMATCH, "\\Cx", "CDA" },
164 { CMUAP, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
165 { CMUAP, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
166 { CMUAP, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
167 { CMUAP, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
168
169 /* Assertions. */
170 { MUA, 0, "\\b[^A]", "A_B#" },
171 { MA, 0 | F_NOMATCH, "\\b\\W", "\n*" },
172 { MUA, 0, "\\B[^,]\\b[^s]\\b", "#X" },
173 { MAP, 0, "\\B", "_\xa1" },
174 { MAP, 0, "\\b_\\b[,A]\\B", "_," },
175 { MUAP, 0, "\\b", "\xe6\x92\xad!" },
176 { MUAP, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
177 { MUAP, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
178 { MUAP, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
179 { MUA, 0 | F_NOMATCH, "\\b.", "\xcd\xbe" },
180 { CMUAP, 0, "\\By", "\xf0\x90\x90\xa8y" },
181 { MA, 0 | F_NOMATCH, "\\R^", "\n" },
182 { MA, 1 | F_NOMATCH, "^", "\n" },
183 { 0, 0, "^ab", "ab" },
184 { 0, 0 | F_NOMATCH, "^ab", "aab" },
185 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "^a", "\r\raa\n\naa\r\naa" },
186 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
187 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^-", "a--b--\x85--" },
188 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xe2\x80\xa8--" },
189 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xc2\x85--" },
190 { 0, 0, "ab$", "ab" },
191 { 0, 0 | F_NOMATCH, "ab$", "abab\n\n" },
192 { PCRE_DOLLAR_ENDONLY, 0 | F_NOMATCH, "ab$", "abab\r\n" },
193 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "a$", "\r\raa\n\naa\r\naa" },
194 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aaa" },
195 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "#$", "#\xc2\x85###\r#" },
196 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "#$", "#\xe2\x80\xa9" },
197 { PCRE_NOTBOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "^a", "aa\naa" },
198 { PCRE_NOTBOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },
199 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\naa" },
200 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\r\n" },
201 { PCRE_UTF8 | PCRE_DOLLAR_ENDONLY | PCRE_NEWLINE_ANY, 0 | F_PROPERTY, "\\p{Any}{2,}$", "aa\r\n" },
202 { PCRE_NOTEOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },
203 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa" },
204 { PCRE_NEWLINE_CR | PCRE_UTF8, 0, "a\\Z", "aaa\r" },
205 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa\n" },
206 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r" },
207 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
208 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r\n" },
209 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
210 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
211 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
212 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
213 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
214 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
215 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
216 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
217 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
218 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xc2\x85" },
219 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
220 { MA, 0, "\\Aa", "aaa" },
221 { MA, 1 | F_NOMATCH, "\\Aa", "aaa" },
222 { MA, 1, "\\Ga", "aaa" },
223 { MA, 1 | F_NOMATCH, "\\Ga", "aba" },
224 { MA, 0, "a\\z", "aaa" },
225 { MA, 0 | F_NOMATCH, "a\\z", "aab" },
226
227 /* Brackets. */
228 { MUA, 0, "(ab|bb|cd)", "bacde" },
229 { MUA, 0, "(?:ab|a)(bc|c)", "ababc" },
230 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
231 { CMUA, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
232 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
233 { MUA, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
234
235 /* Greedy and non-greedy ? operators. */
236 { MUA, 0, "(?:a)?a", "laab" },
237 { CMUA, 0, "(A)?A", "llaab" },
238 { MUA, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
239 { MUA, 0, "(a)?a", "manm" },
240 { CMUA, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
241 { MUA, 0, "(a|b)?\?d((?:e)?)", "abcde" },
242 { MUA, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
243
244 /* Greedy and non-greedy + operators */
245 { MUA, 0, "(aa)+aa", "aaaaaaa" },
246 { MUA, 0, "(aa)+?aa", "aaaaaaa" },
247 { MUA, 0, "(?:aba|ab|a)+l", "ababamababal" },
248 { MUA, 0, "(?:aba|ab|a)+?l", "ababamababal" },
249 { MUA, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
250 { MUA, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
251 { MUA, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
252
253 /* Greedy and non-greedy * operators */
254 { CMUA, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
255 { MUA, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
256 { MUA, 0, "(aa|ab)*ab", "aaabaaab" },
257 { CMUA, 0, "(aa|Ab)*?aB", "aaabaaab" },
258 { MUA, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
259 { MUA, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
260 { MA, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
261 { MA, 0, "((?:a|)*){0}a", "a" },
262
263 /* Combining ? + * operators */
264 { MUA, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
265 { MUA, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
266 { MUA, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
267 { MUA, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
268 { MUA, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
269
270 /* Single character iterators. */
271 { MUA, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
272 { MUA, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
273 { MUA, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
274 { MUA, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
275 { MUA, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
276 { MUA, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
277 { MUA, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
278 { MUA, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
279 { MUA, 0, "(ba{2})+c", "baabaaabacbaabaac" },
280 { MUA, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
281 { MUA, 0, "(a?+[^b])+", "babaacacb" },
282 { MUA, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
283 { CMUA, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
284 { CMUA, 0, "[c-f]+k", "DemmFke" },
285 { MUA, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
286 { MUA, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
287 { CMUA, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
288 { CMUA, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
289 { CMUA, 0, "[ace]{3,}", "AcbDAcEEcEd" },
290 { CMUA, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
291 { MUA, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
292 { CMUA, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
293 { MUA, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
294 { MUA, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
295 { MUA, 0, "\\b\\w+\\B", "x,a_cd" },
296 { MUAP, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
297 { CMUA, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
298 { CMUAP, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" },
299 { CMUA, 0, "[^\xf0\x90\x90\x80]{2,4}@", "\xf0\x90\x90\xa8\xf0\x90\x90\x80###\xf0\x90\x90\x80@@@" },
300 { CMUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
301 { MUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
302 { MUA, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" },
303
304 /* Basic character sets. */
305 { MUA, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
306 { MUA, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
307 { MUA, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
308 { MUA, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
309 { MUA, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
310 { MUA, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
311
312 /* Unicode properties. */
313 { MUAP, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
314 { MUAP, 0 | F_PROPERTY, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
315 { MUAP, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
316 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}]", "abc" },
317 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}]", "abc" },
318 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
319 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
320 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
321 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
322 { MUAP, 0 | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
323 { MUAP, 0 | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
324 { MUAP, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
325 { CMUAP, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
326 { MUAP, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
327 { MUAP, 0 | F_PROPERTY, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
328 { MUA, 0 | F_PROPERTY, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
329 { CMUAP, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
330 { MUAP, 0 | F_PROPERTY, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
331 { MUAP, 0 | F_PROPERTY, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
332 { PCRE_UCP, 0, "[a-b\\s]{2,5}[^a]", "AB baaa" },
333
334 /* Possible empty brackets. */
335 { MUA, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
336 { MUA, 0, "(|ab||bc|a)+d", "abcxabcabd" },
337 { MUA, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
338 { MUA, 0, "(|ab||bc|a)*d", "abcxabcabd" },
339 { MUA, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
340 { MUA, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
341 { MUA, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
342 { MUA, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
343 { MUA, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
344 { MUA, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
345
346 /* Start offset. */
347 { MUA, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
348 { MUA, 4 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
349 { MUA, 2 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
350 { MUA, 1, "(\\w\\W\\w)+", "ab#d" },
351
352 /* Newline. */
353 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
354 { PCRE_MULTILINE | PCRE_NEWLINE_CR, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
355 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{1,3}[^#]", "\r\n##...." },
356
357 /* Any character except newline or any newline. */
358 { PCRE_NEWLINE_CRLF, 0, ".", "\r" },
359 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
360 { PCRE_NEWLINE_ANYCRLF, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
361 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
362 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
363 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0 | F_NOMATCH, ".(.).", "\xe2\x80\xa8\nb\r" },
364 { PCRE_NEWLINE_ANY, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
365 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
366 { PCRE_BSR_ANYCRLF, 0, "\\R", "\r" },
367 { PCRE_BSR_ANYCRLF, 0, "\\R", "\x85#\r\n#" },
368 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\xe2\x80\xa8#c" },
369 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\r\nc" },
370 { PCRE_NEWLINE_CRLF | PCRE_BSR_UNICODE | PCRE_UTF8, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
371 { MUA, 0 | F_NOMATCH, "\\R+", "ab" },
372 { MUA, 0, "\\R+", "ab\r\n\r" },
373 { MUA, 0, "\\R*", "ab\r\n\r" },
374 { MUA, 0, "\\R*", "\r\n\r" },
375 { MUA, 0, "\\R{2,4}", "\r\nab\r\r" },
376 { MUA, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
377 { MUA, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
378 { MUA, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
379 { MUA, 0 | F_NOMATCH, "\\R+\\R\\R", "\r\n\r\n" },
380 { MUA, 0, "\\R+\\R\\R", "\r\r\r" },
381 { MUA, 0, "\\R*\\R\\R", "\n\r" },
382 { MUA, 0 | F_NOMATCH, "\\R{2,4}\\R\\R", "\r\r\r" },
383 { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
384
385 /* Atomic groups (no fallback from "next" direction). */
386 { MUA, 0 | F_NOMATCH, "(?>ab)ab", "bab" },
387 { MUA, 0 | F_NOMATCH, "(?>(ab))ab", "bab" },
388 { MUA, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
389 "bababcdedefgheijijklmlmnop" },
390 { MUA, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
391 { MUA, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
392 { MUA, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
393 { MUA, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
394 { MUA, 0, "((?>a|)+?)b", "aaacaaab" },
395 { MUA, 0, "(?>x|)*$", "aaa" },
396 { MUA, 0, "(?>(x)|)*$", "aaa" },
397 { MUA, 0, "(?>x|())*$", "aaa" },
398 { MUA, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
399 { MUA, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
400 { MUA, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
401 { MUA, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
402 { MUA, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
403 { MUA, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
404 { MUA, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
405 { MUA, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
406 { MUA, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
407 { MUA, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
408 { MUA, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
409 { MUA, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
410 { MUA, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
411 { MUA, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
412 { CMA, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
413 { MUA, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
414 { MUA, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
415 { MUA, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d" },
416 { MUA, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
417 { MUA, 0 | F_PROPERTY, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
418 { MUA, 0 | F_PROPERTY, "\\X{2,4}", "abcdef" },
419 { MUA, 0 | F_PROPERTY, "\\X{2,4}?", "abcdef" },
420 { MUA, 0 | F_NOMATCH | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d##" },
421 { MUA, 0 | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
422 { MUA, 0, "(c(ab)?+ab)+", "cabcababcab" },
423 { MUA, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
424
425 /* Possessive quantifiers. */
426 { MUA, 0, "(?:a|b)++m", "mababbaaxababbaam" },
427 { MUA, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
428 { MUA, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
429 { MUA, 0, "(a|b)++m", "mababbaaxababbaam" },
430 { MUA, 0, "(a|b)*+m", "mababbaaxababbaam" },
431 { MUA, 0, "(a|b)*+m", "ababbaaxababbaam" },
432 { MUA, 0, "(a|b(*ACCEPT))++m", "maaxab" },
433 { MUA, 0, "(?:b*)++m", "bxbbxbbbxm" },
434 { MUA, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
435 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxm" },
436 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
437 { MUA, 0, "(b*)++m", "bxbbxbbbxm" },
438 { MUA, 0, "(b*)++m", "bxbbxbbbxbbm" },
439 { MUA, 0, "(b*)*+m", "bxbbxbbbxm" },
440 { MUA, 0, "(b*)*+m", "bxbbxbbbxbbm" },
441 { MUA, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
442 { MUA, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
443 { MUA, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
444 { MUA, 0, "(a|(b))++m", "mababbaaxababbaam" },
445 { MUA, 0, "((a)|b)*+m", "mababbaaxababbaam" },
446 { MUA, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
447 { MUA, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
448 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxm" },
449 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
450 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
451 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
452 { MUA, 0, "((b*))++m", "bxbbxbbbxm" },
453 { MUA, 0, "((b*))++m", "bxbbxbbbxbbm" },
454 { MUA, 0, "((b*))*+m", "bxbbxbbbxm" },
455 { MUA, 0, "((b*))*+m", "bxbbxbbbxbbm" },
456 { MUA, 0 | F_NOMATCH, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
457 { MUA, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
458 { MUA, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
459 { MUA, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
460 { MUA, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
461
462 /* Back references. */
463 { MUA, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
464 { CMUA, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
465 { CMA, 0, "(a{2,4})\\1", "AaAaaAaA" },
466 { MUA, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
467 { MUA, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
468 { MUA, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
469 { MUA, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
470 { MUA, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
471 { MUA, 0, "(?:(aa)|b)\\1?b", "bb" },
472 { CMUA, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
473 { MUA, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
474 { CMUA, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
475 { MUA, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
476 { CMA, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
477 { MUA, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
478 { MUA, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
479 { MA, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
480 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
481 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{0,2}", "wwwww." },
482 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwww" },
483 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwwww" },
484 { PCRE_UCP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
485 { CMUAP, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" },
486
487 /* Assertions. */
488 { MUA, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
489 { MUA, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
490 { MUA, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
491 { MUA, 0, "(?<=aaa|aa|a)a", "aaa" },
492 { MUA, 2, "(?<=aaa|aa|a)a", "aaa" },
493 { MA, 0, "(?<=aaa|aa|a)a", "aaa" },
494 { MA, 2, "(?<=aaa|aa|a)a", "aaa" },
495 { MUA, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
496 { MUA, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
497 { MUA, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
498 { MUA, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
499 { MUA, 0, "((?(?=(a))a)+k)", "bbak" },
500 { MUA, 0, "((?(?=a)a)+k)", "bbak" },
501 { MUA, 0 | F_NOMATCH, "(?=(?>(a))m)amk", "a k" },
502 { MUA, 0 | F_NOMATCH, "(?!(?>(a))m)amk", "a k" },
503 { MUA, 0 | F_NOMATCH, "(?>(?=(a))am)amk", "a k" },
504 { MUA, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
505 { MUA, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
506 { MUA, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
507 { MUA, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
508 { MUA, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
509 { MUA, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
510 { MUA, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
511 { MUA, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
512
513 /* Not empty, ACCEPT, FAIL */
514 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*", "bcx" },
515 { MUA | PCRE_NOTEMPTY, 0, "a*", "bcaad" },
516 { MUA | PCRE_NOTEMPTY, 0, "a*?", "bcaad" },
517 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
518 { MUA, 0, "a(*ACCEPT)b", "ab" },
519 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*(*ACCEPT)b", "bcx" },
520 { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
521 { MUA | PCRE_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
522 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "(?:z|a*(*ACCEPT)b)", "bcx" },
523 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
524 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
525 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
526 { MUA | PCRE_NOTEMPTY_ATSTART, 0 | F_NOMATCH, "a*(*ACCEPT)b", "" },
527 { MUA, 0, "((a(*ACCEPT)b))", "ab" },
528 { MUA, 0, "(a(*FAIL)a|a)", "aaa" },
529 { MUA, 0, "(?=ab(*ACCEPT)b)a", "ab" },
530 { MUA, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
531 { MUA, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
532 { MUA | PCRE_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
533
534 /* Conditional blocks. */
535 { MUA, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
536 { MUA, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
537 { MUA, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
538 { MUA, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
539 { MUA, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
540 { MUA, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
541 { MUA, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
542 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
543 { MUA, 0 | F_DIFF, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
544 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
545 { MUA, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
546 { MUA, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
547 { MUA, 0, "(?(?=a)ab)", "a" },
548 { MUA, 0, "(?(?<!b)c)", "b" },
549 { MUA, 0, "(?(DEFINE)a(b))", "a" },
550 { MUA, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
551 { MUA, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
552 { MUA, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
553 { MUA, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
554 { MUA, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
555 { MUA, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
556 { MUA, 0, "(c)?\?(?(1)a|b)", "cbb" },
557 { MUA, 0 | F_DIFF, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
558 { MUA, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
559 { MUA, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
560 { MUA, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
561 { MUA, 0 | F_DIFF, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
562 { MUA, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
563 { MUA, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
564 { MUA, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
565 { MUA, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
566 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)*l", "bc ddd abccabccl" },
567 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+?dd", "bcabcacdb bdddd" },
568 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+l", "ababccddabdbccd abcccl" },
569
570 /* Set start of match. */
571 { MUA, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
572 { MUA, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
573 { MUA, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
574 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a\\K(*ACCEPT)b", "aa" },
575 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
576
577 /* First line. */
578 { MUA | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}a", "bb\naaa" },
579 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}a", "bb\r\naaa" },
580 { MUA | PCRE_FIRSTLINE, 0, "(?<=a)", "a" },
581 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[^a][^b]", "ab" },
582 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "a", "\na" },
583 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[abc]", "\na" },
584 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^a", "\na" },
585 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^(?<=\n)", "\na" },
586 { MUA | PCRE_FIRSTLINE, 0, "\xf0\x90\x90\x80", "\xf0\x90\x90\x80" },
587 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\xc2\x85#" },
588 { PCRE_MULTILINE | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\x85#" },
589 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^#", "\xe2\x80\xa8#" },
590 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}", "\r\na" },
591 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, ".", "\r" },
592 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "a", "\ra" },
593 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH, "ba", "bbb\r\nba" },
594 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}{4}|a", "\r\na" },
595 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 1, ".", "\r\n" },
596 { PCRE_FIRSTLINE | PCRE_NEWLINE_LF | PCRE_DOTALL, 0 | F_NOMATCH, "ab.", "ab" },
597
598 /* Recurse. */
599 { MUA, 0, "(a)(?1)", "aa" },
600 { MUA, 0, "((a))(?1)", "aa" },
601 { MUA, 0, "(b|a)(?1)", "aa" },
602 { MUA, 0, "(b|(a))(?1)", "aa" },
603 { MUA, 0 | F_NOMATCH, "((a)(b)(?:a*))(?1)", "aba" },
604 { MUA, 0, "((a)(b)(?:a*))(?1)", "abab" },
605 { MUA, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
606 { MUA, 0, "((?2)b|(a)){2}(?1)", "aabab" },
607 { MUA, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
608 { MUA, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
609 { MUA, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
610 { MUA, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
611 { MUA, 0 | F_NOMATCH, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
612 { MUA, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
613 { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
614 { MUA, 0 | F_NOMATCH, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
615 { MUA, 0, "b|<(?R)*>", "<<b>" },
616 { MUA, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
617 { MUA, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
618 { MUA, 0, "(a)((?(R)a|b))(?2)", "aabbabaa" },
619 { MUA, 0, "(a)((?(R2)a|b))(?2)", "aabbabaa" },
620 { MUA, 0, "(a)((?(R1)a|b))(?2)", "ababba" },
621 { MUA, 0, "(?(R0)aa|bb(?R))", "abba aabb bbaa" },
622 { MUA, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" },
623 { MUA, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" },
624
625 /* 16 bit specific tests. */
626 { CMA, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" },
627 { CMA, 0 | F_FORCECONV, "\xe1\xbd\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
628 { CMA, 0 | F_FORCECONV, "[\xc3\xa1]", "\xc3\x81\xc3\xa1" },
629 { CMA, 0 | F_FORCECONV, "[\xe1\xbd\xb8]", "\xe1\xbf\xb8\xe1\xbd\xb8" },
630 { CMA, 0 | F_FORCECONV, "[a-\xed\xb0\x80]", "A" },
631 { CMA, 0 | F_NO8 | F_FORCECONV, "[a-\\x{dc00}]", "B" },
632 { CMA, 0 | F_NO8 | F_NOMATCH | F_FORCECONV, "[b-\\x{dc00}]", "a" },
633 { CMA, 0 | F_NO8 | F_FORCECONV, "\xed\xa0\x80\\x{d800}\xed\xb0\x80\\x{dc00}", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80" },
634 { CMA, 0 | F_NO8 | F_FORCECONV, "[\xed\xa0\x80\\x{d800}]{1,2}?[\xed\xb0\x80\\x{dc00}]{1,2}?#", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80#" },
635 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80\xed\xb0\x80#]{0,3}(?<=\xed\xb0\x80.)", "\xed\xa0\x80#\xed\xa0\x80##\xed\xb0\x80\xed\xa0\x80" },
636 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\x9f\xbf\xed\xa0\x83" },
637 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\xb4\x80\xed\xb3\xb0" },
638 { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" },
639 { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" },
640 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" },
641 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80][\xed\xb0\x80]{2,}", "\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" },
642 { MA, 0 | F_FORCECONV, "[^\xed\xb0\x80]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
643 { MA, 0 | F_NO8 | F_FORCECONV, "[^\\x{dc00}]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
644 { CMA, 0 | F_FORCECONV, ".\\B.", "\xed\xa0\x80\xed\xb0\x80" },
645 { CMA, 0 | F_FORCECONV, "\\D+(?:\\d+|.)\\S+(?:\\s+|.)\\W+(?:\\w+|.)\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80" },
646 { CMA, 0 | F_FORCECONV, "\\d*\\s*\\w*\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80" },
647 { CMA, 0 | F_FORCECONV | F_NOMATCH, "\\d*?\\D*?\\s*?\\S*?\\w*?\\W*?##", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80#" },
648 { CMA | PCRE_EXTENDED, 0 | F_FORCECONV, "\xed\xa0\x80 \xed\xb0\x80 !", "\xed\xa0\x80\xed\xb0\x80!" },
649 { CMA, 0 | F_FORCECONV, "\xed\xa0\x80+#[^#]+\xed\xa0\x80", "\xed\xa0\x80#a\xed\xa0\x80" },
650 { CMA, 0 | F_FORCECONV, "(\xed\xa0\x80+)#\\1", "\xed\xa0\x80\xed\xa0\x80#\xed\xa0\x80\xed\xa0\x80" },
651 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0 | F_NO8 | F_FORCECONV, "^-", "a--\xe2\x80\xa8--" },
652 { PCRE_BSR_UNICODE, 0 | F_NO8 | F_FORCECONV, "\\R", "ab\xe2\x80\xa8" },
653 { 0, 0 | F_NO8 | F_FORCECONV, "\\v", "ab\xe2\x80\xa9" },
654 { 0, 0 | F_NO8 | F_FORCECONV, "\\h", "ab\xe1\xa0\x8e" },
655 { 0, 0 | F_NO8 | F_FORCECONV, "\\v+?\\V+?#", "\xe2\x80\xa9\xe2\x80\xa9\xef\xbf\xbf\xef\xbf\xbf#" },
656 { 0, 0 | F_NO8 | F_FORCECONV, "\\h+?\\H+?#", "\xe1\xa0\x8e\xe1\xa0\x8e\xef\xbf\xbf\xef\xbf\xbf#" },
657
658 /* Partial matching. */
659 { MUA | PCRE_PARTIAL_SOFT, 0, "ab", "a" },
660 { MUA | PCRE_PARTIAL_SOFT, 0, "ab|a", "a" },
661 { MUA | PCRE_PARTIAL_HARD, 0, "ab|a", "a" },
662 { MUA | PCRE_PARTIAL_SOFT, 0, "\\b#", "a" },
663 { MUA | PCRE_PARTIAL_SOFT, 0, "(?<=a)b", "a" },
664 { MUA | PCRE_PARTIAL_SOFT, 0, "abc|(?<=xxa)bc", "xxab" },
665 { MUA | PCRE_PARTIAL_SOFT, 0, "a\\B", "a" },
666 { MUA | PCRE_PARTIAL_HARD, 0, "a\\b", "a" },
667
668 /* (*MARK) verb. */
669 { MUA, 0, "a(*MARK:aa)a", "ababaa" },
670 { MUA, 0 | F_NOMATCH, "a(*:aa)a", "abab" },
671 { MUA, 0, "a(*:aa)(b(*:bb)b|bc)", "abc" },
672 { MUA, 0 | F_NOMATCH, "a(*:1)x|b(*:2)y", "abc" },
673 { MUA, 0, "(?>a(*:aa))b|ac", "ac" },
674 { MUA, 0, "(?(DEFINE)(a(*:aa)))(?1)", "a" },
675 { MUA, 0 | F_NOMATCH, "(?(DEFINE)((a)(*:aa)))(?1)b", "aa" },
676 { MUA, 0, "(?(DEFINE)(a(*:aa)))a(?1)b|aac", "aac" },
677 { MUA, 0, "(a(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
678 { MUA, 0, "(a(*:aa)){0}(?:b(?1)b)+", "babba" },
679 { MUA, 0 | F_NOMATCH, "(a(*:aa)){0}(?:b(?1)b)+", "ba" },
680 { MUA, 0, "(a\\K(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
681 { MUA, 0, "(a\\K(*:aa)){0}(?:b(?1)b)+", "babba" },
682 { MUA, 0 | F_NOMATCH, "(a\\K(*:aa)){0}(?:b(?1)b)+", "ba" },
683
684 /* (*COMMIT) verb. */
685 { MUA, 0 | F_NOMATCH, "a(*COMMIT)b", "ac" },
686 { MUA, 0, "aa(*COMMIT)b", "xaxaab" },
687 { MUA, 0 | F_NOMATCH, "a(*COMMIT)(*:msg)b|ac", "ac" },
688 { MUA, 0, "(?=a(*COMMIT)b|ac)ac|(*:m)(a)c", "ac" },
689 { MUA, 0, "(?!a(*COMMIT)(*:msg)b)a(c)|cd", "acd" },
690
691 /* Deep recursion. */
692 { MUA, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
693 { MUA, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
694 { MUA, 0, "((a?)+)+b", "aaaaaaaaaaaa b" },
695
696 /* Deep recursion: Stack limit reached. */
697 { MA, 0 | F_NOMATCH, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
698 { MA, 0 | F_NOMATCH, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
699 { MA, 0 | F_NOMATCH, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
700 { MA, 0 | F_NOMATCH, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
701 { MA, 0 | F_NOMATCH, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
702
703 { 0, 0, NULL, NULL }
704 };
705
706 static const unsigned char *tables(int mode)
707 {
708 /* The purpose of this function to allow valgrind
709 for reporting invalid reads and writes. */
710 static unsigned char *tables_copy;
711 const char *errorptr;
712 int erroroffset;
713 unsigned char *default_tables;
714 #ifdef SUPPORT_PCRE8
715 pcre *regex;
716 char null_str[1] = { 0 };
717 #else
718 pcre16 *regex;
719 PCRE_UCHAR16 null_str[1] = { 0 };
720 #endif
721
722 if (mode) {
723 if (tables_copy)
724 free(tables_copy);
725 tables_copy = NULL;
726 return NULL;
727 }
728
729 if (tables_copy)
730 return tables_copy;
731
732 default_tables = NULL;
733 #ifdef SUPPORT_PCRE8
734 regex = pcre_compile(null_str, 0, &errorptr, &erroroffset, NULL);
735 if (regex) {
736 pcre_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
737 pcre_free(regex);
738 }
739 #else
740 regex = pcre16_compile(null_str, 0, &errorptr, &erroroffset, NULL);
741 if (regex) {
742 pcre16_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
743 pcre16_free(regex);
744 }
745 #endif
746 /* Shouldn't ever happen. */
747 if (!default_tables)
748 return NULL;
749
750 /* Unfortunately this value cannot get from pcre_fullinfo.
751 Since this is a test program, this is acceptable at the moment. */
752 tables_copy = (unsigned char *)malloc(1088);
753 if (!tables_copy)
754 return NULL;
755
756 memcpy(tables_copy, default_tables, 1088);
757 return tables_copy;
758 }
759
760 #ifdef SUPPORT_PCRE8
761 static pcre_jit_stack* callback8(void *arg)
762 {
763 return (pcre_jit_stack *)arg;
764 }
765 #endif
766
767 #ifdef SUPPORT_PCRE16
768 static pcre16_jit_stack* callback16(void *arg)
769 {
770 return (pcre16_jit_stack *)arg;
771 }
772 #endif
773
774 #ifdef SUPPORT_PCRE8
775 static void setstack8(pcre_extra *extra)
776 {
777 static pcre_jit_stack *stack;
778
779 if (!extra) {
780 if (stack)
781 pcre_jit_stack_free(stack);
782 stack = NULL;
783 return;
784 }
785
786 if (!stack)
787 stack = pcre_jit_stack_alloc(1, 1024 * 1024);
788 /* Extra can be NULL. */
789 pcre_assign_jit_stack(extra, callback8, stack);
790 }
791 #endif /* SUPPORT_PCRE8 */
792
793 #ifdef SUPPORT_PCRE16
794 static void setstack16(pcre16_extra *extra)
795 {
796 static pcre16_jit_stack *stack;
797
798 if (!extra) {
799 if (stack)
800 pcre16_jit_stack_free(stack);
801 stack = NULL;
802 return;
803 }
804
805 if (!stack)
806 stack = pcre16_jit_stack_alloc(1, 1024 * 1024);
807 /* Extra can be NULL. */
808 pcre16_assign_jit_stack(extra, callback16, stack);
809 }
810 #endif /* SUPPORT_PCRE8 */
811
812 #ifdef SUPPORT_PCRE16
813
814 static int convert_utf8_to_utf16(const char *input, PCRE_UCHAR16 *output, int *offsetmap, int max_length)
815 {
816 unsigned char *iptr = (unsigned char*)input;
817 unsigned short *optr = (unsigned short *)output;
818 unsigned int c;
819
820 if (max_length == 0)
821 return 0;
822
823 while (*iptr && max_length > 1) {
824 c = 0;
825 if (offsetmap)
826 *offsetmap++ = (int)(iptr - (unsigned char*)input);
827
828 if (!(*iptr & 0x80))
829 c = *iptr++;
830 else if (!(*iptr & 0x20)) {
831 c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
832 iptr += 2;
833 } else if (!(*iptr & 0x10)) {
834 c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
835 iptr += 3;
836 } else if (!(*iptr & 0x08)) {
837 c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
838 iptr += 4;
839 }
840
841 if (c < 65536) {
842 *optr++ = c;
843 max_length--;
844 } else if (max_length <= 2) {
845 *optr = '\0';
846 return (int)(optr - (unsigned short *)output);
847 } else {
848 c -= 0x10000;
849 *optr++ = 0xd800 | ((c >> 10) & 0x3ff);
850 *optr++ = 0xdc00 | (c & 0x3ff);
851 max_length -= 2;
852 if (offsetmap)
853 offsetmap++;
854 }
855 }
856 if (offsetmap)
857 *offsetmap = (int)(iptr - (unsigned char*)input);
858 *optr = '\0';
859 return (int)(optr - (unsigned short *)output);
860 }
861
862 static int copy_char8_to_char16(const char *input, PCRE_UCHAR16 *output, int max_length)
863 {
864 unsigned char *iptr = (unsigned char*)input;
865 unsigned short *optr = (unsigned short *)output;
866
867 if (max_length == 0)
868 return 0;
869
870 while (*iptr && max_length > 1) {
871 *optr++ = *iptr++;
872 max_length--;
873 }
874 *optr = '\0';
875 return (int)(optr - (unsigned short *)output);
876 }
877
878 #define REGTEST_MAX_LENGTH 4096
879 static PCRE_UCHAR16 regtest_buf[REGTEST_MAX_LENGTH];
880 static int regtest_offsetmap[REGTEST_MAX_LENGTH];
881
882 #endif /* SUPPORT_PCRE16 */
883
884 static int check_ascii(const char *input)
885 {
886 const unsigned char *ptr = (unsigned char *)input;
887 while (*ptr) {
888 if (*ptr > 127)
889 return 0;
890 ptr++;
891 }
892 return 1;
893 }
894
895 static int regression_tests(void)
896 {
897 struct regression_test_case *current = regression_test_cases;
898 const char *error;
899 char *cpu_info;
900 int i, err_offs;
901 int is_successful, is_ascii_pattern, is_ascii_input;
902 int total = 0;
903 int successful = 0;
904 int successful_row = 0;
905 int counter = 0;
906 int study_mode;
907 #ifdef SUPPORT_PCRE8
908 pcre *re8;
909 pcre_extra *extra8;
910 pcre_extra dummy_extra8;
911 int ovector8_1[32];
912 int ovector8_2[32];
913 int return_value8_1, return_value8_2;
914 unsigned char *mark8_1, *mark8_2;
915 int utf8 = 0, ucp8 = 0;
916 int disabled_flags8 = 0;
917 #endif
918 #ifdef SUPPORT_PCRE16
919 pcre16 *re16;
920 pcre16_extra *extra16;
921 pcre16_extra dummy_extra16;
922 int ovector16_1[32];
923 int ovector16_2[32];
924 int return_value16_1, return_value16_2;
925 PCRE_UCHAR16 *mark16_1, *mark16_2;
926 int utf16 = 0, ucp16 = 0;
927 int disabled_flags16 = 0;
928 int length16;
929 #endif
930
931 /* This test compares the behaviour of interpreter and JIT. Although disabling
932 utf or ucp may make tests fail, if the pcre_exec result is the SAME, it is
933 still considered successful from pcre_jit_test point of view. */
934
935 #ifdef SUPPORT_PCRE8
936 pcre_config(PCRE_CONFIG_JITTARGET, &cpu_info);
937 #else
938 pcre16_config(PCRE_CONFIG_JITTARGET, &cpu_info);
939 #endif
940
941 printf("Running JIT regression tests\n");
942 printf(" target CPU of SLJIT compiler: %s\n", cpu_info);
943
944 #ifdef SUPPORT_PCRE8
945 pcre_config(PCRE_CONFIG_UTF8, &utf8);
946 pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp8);
947 if (!utf8)
948 disabled_flags8 |= PCRE_UTF8;
949 if (!ucp8)
950 disabled_flags8 |= PCRE_UCP;
951 printf(" in 8 bit mode with utf8 %s and ucp %s:\n", utf8 ? "enabled" : "disabled", ucp8 ? "enabled" : "disabled");
952 #endif
953 #ifdef SUPPORT_PCRE16
954 pcre16_config(PCRE_CONFIG_UTF16, &utf16);
955 pcre16_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp16);
956 if (!utf16)
957 disabled_flags16 |= PCRE_UTF8;
958 if (!ucp16)
959 disabled_flags16 |= PCRE_UCP;
960 printf(" in 16 bit mode with utf16 %s and ucp %s:\n", utf16 ? "enabled" : "disabled", ucp16 ? "enabled" : "disabled");
961 #endif
962
963 while (current->pattern) {
964 /* printf("\nPattern: %s :\n", current->pattern); */
965 total++;
966 if (current->start_offset & F_PROPERTY) {
967 is_ascii_pattern = 0;
968 is_ascii_input = 0;
969 } else {
970 is_ascii_pattern = check_ascii(current->pattern);
971 is_ascii_input = check_ascii(current->input);
972 }
973
974 if (current->flags & PCRE_PARTIAL_SOFT)
975 study_mode = PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE;
976 else if (current->flags & PCRE_PARTIAL_HARD)
977 study_mode = PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE;
978 else
979 study_mode = PCRE_STUDY_JIT_COMPILE;
980 error = NULL;
981 #ifdef SUPPORT_PCRE8
982 re8 = NULL;
983 if (!(current->start_offset & F_NO8))
984 re8 = pcre_compile(current->pattern,
985 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags8),
986 &error, &err_offs, tables(0));
987
988 extra8 = NULL;
989 if (re8) {
990 error = NULL;
991 extra8 = pcre_study(re8, study_mode, &error);
992 if (!extra8) {
993 printf("\n8 bit: Cannot study pattern: %s\n", current->pattern);
994 pcre_free(re8);
995 re8 = NULL;
996 }
997 else if (!(extra8->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
998 printf("\n8 bit: JIT compiler does not support: %s\n", current->pattern);
999 pcre_free_study(extra8);
1000 pcre_free(re8);
1001 re8 = NULL;
1002 }
1003 extra8->flags |= PCRE_EXTRA_MARK;
1004 } else if (((utf8 && ucp8) || is_ascii_pattern) && !(current->start_offset & F_NO8))
1005 printf("\n8 bit: Cannot compile pattern: %s\n", current->pattern);
1006 #endif
1007 #ifdef SUPPORT_PCRE16
1008 if ((current->flags & PCRE_UTF8) || (current->start_offset & F_FORCECONV))
1009 convert_utf8_to_utf16(current->pattern, regtest_buf, NULL, REGTEST_MAX_LENGTH);
1010 else
1011 copy_char8_to_char16(current->pattern, regtest_buf, REGTEST_MAX_LENGTH);
1012
1013 re16 = NULL;
1014 if (!(current->start_offset & F_NO16))
1015 re16 = pcre16_compile(regtest_buf,
1016 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags16),
1017 &error, &err_offs, tables(0));
1018
1019 extra16 = NULL;
1020 if (re16) {
1021 error = NULL;
1022 extra16 = pcre16_study(re16, study_mode, &error);
1023 if (!extra16) {
1024 printf("\n16 bit: Cannot study pattern: %s\n", current->pattern);
1025 pcre16_free(re16);
1026 re16 = NULL;
1027 }
1028 else if (!(extra16->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
1029 printf("\n16 bit: JIT compiler does not support: %s\n", current->pattern);
1030 pcre16_free_study(extra16);
1031 pcre16_free(re16);
1032 re16 = NULL;
1033 }
1034 extra16->flags |= PCRE_EXTRA_MARK;
1035 } else if (((utf16 && ucp16) || is_ascii_pattern) && !(current->start_offset & F_NO16))
1036 printf("\n16 bit: Cannot compile pattern: %s\n", current->pattern);
1037 #endif
1038
1039 counter++;
1040 if ((counter & 0x3) != 0) {
1041 #ifdef SUPPORT_PCRE8
1042 setstack8(NULL);
1043 #endif
1044 #ifdef SUPPORT_PCRE16
1045 setstack16(NULL);
1046 #endif
1047 }
1048
1049 #ifdef SUPPORT_PCRE8
1050 return_value8_1 = -1000;
1051 return_value8_2 = -1000;
1052 for (i = 0; i < 32; ++i)
1053 ovector8_1[i] = -2;
1054 for (i = 0; i < 32; ++i)
1055 ovector8_2[i] = -2;
1056 if (re8) {
1057 mark8_1 = NULL;
1058 mark8_2 = NULL;
1059 setstack8(extra8);
1060 extra8->mark = &mark8_1;
1061 return_value8_1 = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1062 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_1, 32);
1063 memset(&dummy_extra8, 0, sizeof(pcre_extra));
1064 dummy_extra8.flags = PCRE_EXTRA_MARK;
1065 dummy_extra8.mark = &mark8_2;
1066 return_value8_2 = pcre_exec(re8, &dummy_extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1067 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_2, 32);
1068 }
1069 #endif
1070
1071 #ifdef SUPPORT_PCRE16
1072 return_value16_1 = -1000;
1073 return_value16_2 = -1000;
1074 for (i = 0; i < 32; ++i)
1075 ovector16_1[i] = -2;
1076 for (i = 0; i < 32; ++i)
1077 ovector16_2[i] = -2;
1078 if (re16) {
1079 mark16_1 = NULL;
1080 mark16_2 = NULL;
1081 setstack16(extra16);
1082 if ((current->flags & PCRE_UTF8) || (current->start_offset & F_FORCECONV))
1083 length16 = convert_utf8_to_utf16(current->input, regtest_buf, regtest_offsetmap, REGTEST_MAX_LENGTH);
1084 else
1085 length16 = copy_char8_to_char16(current->input, regtest_buf, REGTEST_MAX_LENGTH);
1086 extra16->mark = &mark16_1;
1087 return_value16_1 = pcre16_exec(re16, extra16, regtest_buf, length16, current->start_offset & OFFSET_MASK,
1088 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_1, 32);
1089 memset(&dummy_extra16, 0, sizeof(pcre16_extra));
1090 dummy_extra16.flags = PCRE_EXTRA_MARK;
1091 dummy_extra16.mark = &mark16_2;
1092 return_value16_2 = pcre16_exec(re16, &dummy_extra16, regtest_buf, length16, current->start_offset & OFFSET_MASK,
1093 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_2, 32);
1094 }
1095 #endif
1096
1097 /* printf("[%d-%d|%d-%d|%d-%d]%s", return_value8_1, return_value16_1, ovector8_1[0], ovector8_1[1], ovector16_1[0], ovector16_1[1], (current->flags & PCRE_CASELESS) ? "C" : ""); */
1098
1099 /* If F_DIFF is set, just run the test, but do not compare the results.
1100 Segfaults can still be captured. */
1101
1102 is_successful = 1;
1103 if (!(current->start_offset & F_DIFF)) {
1104 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1105 if (utf8 == utf16 && !(current->start_offset & F_FORCECONV)) {
1106 /* All results must be the same. */
1107 if (return_value8_1 != return_value8_2 || return_value8_1 != return_value16_1 || return_value8_1 != return_value16_2) {
1108 printf("\n8 and 16 bit: Return value differs(%d:%d:%d:%d): [%d] '%s' @ '%s'\n",
1109 return_value8_1, return_value8_2, return_value16_1, return_value16_2,
1110 total, current->pattern, current->input);
1111 is_successful = 0;
1112 } else if (return_value8_1 >= 0 || return_value8_1 == PCRE_ERROR_PARTIAL) {
1113 if (return_value8_1 == PCRE_ERROR_PARTIAL) {
1114 return_value8_1 = 2;
1115 return_value16_1 = 2;
1116 } else {
1117 return_value8_1 *= 2;
1118 return_value16_1 *= 2;
1119 }
1120
1121 /* Transform back the results. */
1122 if (current->flags & PCRE_UTF8) {
1123 for (i = 0; i < return_value8_1; ++i) {
1124 if (ovector16_1[i] >= 0)
1125 ovector16_1[i] = regtest_offsetmap[ovector16_1[i]];
1126 if (ovector16_2[i] >= 0)
1127 ovector16_2[i] = regtest_offsetmap[ovector16_2[i]];
1128 }
1129 }
1130
1131 for (i = 0; i < return_value8_1; ++i)
1132 if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
1133 printf("\n8 and 16 bit: Ovector[%d] value differs(%d:%d:%d:%d): [%d] '%s' @ '%s' \n",
1134 i, ovector8_1[i], ovector8_2[i], ovector16_1[i], ovector16_2[i],
1135 total, current->pattern, current->input);
1136 is_successful = 0;
1137 }
1138 }
1139 } else {
1140 #endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */
1141 /* Only the 8 bit and 16 bit results must be equal. */
1142 #ifdef SUPPORT_PCRE8
1143 if (return_value8_1 != return_value8_2) {
1144 printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1145 return_value8_1, return_value8_2, total, current->pattern, current->input);
1146 is_successful = 0;
1147 } else if (return_value8_1 >= 0 || return_value8_1 == PCRE_ERROR_PARTIAL) {
1148 if (return_value8_1 == PCRE_ERROR_PARTIAL)
1149 return_value8_1 = 2;
1150 else
1151 return_value8_1 *= 2;
1152
1153 for (i = 0; i < return_value8_1; ++i)
1154 if (ovector8_1[i] != ovector8_2[i]) {
1155 printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1156 i, ovector8_1[i], ovector8_2[i], total, current->pattern, current->input);
1157 is_successful = 0;
1158 }
1159 }
1160 #endif
1161
1162 #ifdef SUPPORT_PCRE16
1163 if (return_value16_1 != return_value16_2) {
1164 printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1165 return_value16_1, return_value16_2, total, current->pattern, current->input);
1166 is_successful = 0;
1167 } else if (return_value16_1 >= 0 || return_value16_1 == PCRE_ERROR_PARTIAL) {
1168 if (return_value16_1 == PCRE_ERROR_PARTIAL)
1169 return_value16_1 = 2;
1170 else
1171 return_value16_1 *= 2;
1172
1173 for (i = 0; i < return_value16_1; ++i)
1174 if (ovector16_1[i] != ovector16_2[i]) {
1175 printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1176 i, ovector16_1[i], ovector16_2[i], total, current->pattern, current->input);
1177 is_successful = 0;
1178 }
1179 }
1180 #endif
1181
1182 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1183 }
1184 #endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */
1185 }
1186
1187 if (is_successful) {
1188 #ifdef SUPPORT_PCRE8
1189 if (!(current->start_offset & F_NO8) && ((utf8 && ucp8) || is_ascii_input)) {
1190 if (return_value8_1 < 0 && !(current->start_offset & F_NOMATCH)) {
1191 printf("8 bit: Test should match: [%d] '%s' @ '%s'\n",
1192 total, current->pattern, current->input);
1193 is_successful = 0;
1194 }
1195
1196 if (return_value8_1 >= 0 && (current->start_offset & F_NOMATCH)) {
1197 printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n",
1198 total, current->pattern, current->input);
1199 is_successful = 0;
1200 }
1201 }
1202 #endif
1203 #ifdef SUPPORT_PCRE16
1204 if (!(current->start_offset & F_NO16) && ((utf16 && ucp16) || is_ascii_input)) {
1205 if (return_value16_1 < 0 && !(current->start_offset & F_NOMATCH)) {
1206 printf("16 bit: Test should match: [%d] '%s' @ '%s'\n",
1207 total, current->pattern, current->input);
1208 is_successful = 0;
1209 }
1210
1211 if (return_value16_1 >= 0 && (current->start_offset & F_NOMATCH)) {
1212 printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n",
1213 total, current->pattern, current->input);
1214 is_successful = 0;
1215 }
1216 }
1217 #endif
1218 }
1219
1220 if (is_successful) {
1221 #ifdef SUPPORT_PCRE8
1222 if (mark8_1 != mark8_2) {
1223 printf("8 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1224 total, current->pattern, current->input);
1225 is_successful = 0;
1226 }
1227 #endif
1228 #ifdef SUPPORT_PCRE16
1229 if (mark16_1 != mark16_2) {
1230 printf("16 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1231 total, current->pattern, current->input);
1232 is_successful = 0;
1233 }
1234 #endif
1235 }
1236
1237 #ifdef SUPPORT_PCRE8
1238 if (re8) {
1239 pcre_free_study(extra8);
1240 pcre_free(re8);
1241 }
1242 #endif
1243 #ifdef SUPPORT_PCRE16
1244 if (re16) {
1245 pcre16_free_study(extra16);
1246 pcre16_free(re16);
1247 }
1248 #endif
1249
1250 if (is_successful) {
1251 successful++;
1252 successful_row++;
1253 printf(".");
1254 if (successful_row >= 60) {
1255 successful_row = 0;
1256 printf("\n");
1257 }
1258 } else
1259 successful_row = 0;
1260
1261 fflush(stdout);
1262 current++;
1263 }
1264 tables(1);
1265 #ifdef SUPPORT_PCRE8
1266 setstack8(NULL);
1267 #endif
1268 #ifdef SUPPORT_PCRE16
1269 setstack16(NULL);
1270 #endif
1271
1272 if (total == successful) {
1273 printf("\nAll JIT regression tests are successfully passed.\n");
1274 return 0;
1275 } else {
1276 printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
1277 return 1;
1278 }
1279 }
1280
1281 /* End of pcre_jit_test.c */

  ViewVC Help
Powered by ViewVC 1.1.5