/[pcre]/code/trunk/pcre_jit_test.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_test.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 977 - (show annotations)
Sun Jun 17 06:20:52 2012 UTC (7 years, 4 months ago) by zherczeg
File MIME type: text/plain
File size: 53544 byte(s)
Optimizing single character iterators in JIT
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Main Library written by Philip Hazel
9 Copyright (c) 1997-2012 University of Cambridge
10
11 This JIT compiler regression test program was written by Zoltan Herczeg
12 Copyright (c) 2010-2012
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include <stdio.h>
48 #include <string.h>
49 #include "pcre.h"
50
51 #define PCRE_BUG 0x80000000
52
53 /*
54 Letter characters:
55 \xe6\x92\xad = 0x64ad = 25773 (kanji)
56 Non-letter characters:
57 \xc2\xa1 = 0xa1 = (Inverted Exclamation Mark)
58 \xf3\xa9\xb7\x80 = 0xe9dc0 = 957888
59 \xed\xa0\x80 = 55296 = 0xd800 (Invalid UTF character)
60 \xed\xb0\x80 = 56320 = 0xdc00 (Invalid UTF character)
61 Newlines:
62 \xc2\x85 = 0x85 = 133 (NExt Line = NEL)
63 \xe2\x80\xa8 = 0x2028 = 8232 (Line Separator)
64 Othercase pairs:
65 \xc3\xa9 = 0xe9 = 233 (e')
66 \xc3\x89 = 0xc9 = 201 (E')
67 \xc3\xa1 = 0xe1 = 225 (a')
68 \xc3\x81 = 0xc1 = 193 (A')
69 \xc8\xba = 0x23a = 570
70 \xe2\xb1\xa5 = 0x2c65 = 11365
71 \xe1\xbd\xb8 = 0x1f78 = 8056
72 \xe1\xbf\xb8 = 0x1ff8 = 8184
73 \xf0\x90\x90\x80 = 0x10400 = 66560
74 \xf0\x90\x90\xa8 = 0x10428 = 66600
75 Mark property:
76 \xcc\x8d = 0x30d = 781
77 Special:
78 \xdf\xbf = 0x7ff = 2047 (highest 2 byte character)
79 \xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character)
80 \xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character)
81 \xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character)
82 \xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character)
83 */
84
85 static int regression_tests(void);
86
87 int main(void)
88 {
89 int jit = 0;
90 #ifdef SUPPORT_PCRE8
91 pcre_config(PCRE_CONFIG_JIT, &jit);
92 #else
93 pcre16_config(PCRE_CONFIG_JIT, &jit);
94 #endif
95 if (!jit) {
96 printf("JIT must be enabled to run pcre_jit_test\n");
97 return 1;
98 }
99 return regression_tests();
100 }
101
102 /* --------------------------------------------------------------------------------------- */
103
104 #if !(defined SUPPORT_PCRE8) && !(defined SUPPORT_PCRE16)
105 #error SUPPORT_PCRE8 or SUPPORT_PCRE16 must be defined
106 #endif
107
108 #define MUA (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
109 #define MUAP (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
110 #define CMUA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
111 #define CMUAP (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
112 #define MA (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
113 #define MAP (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
114 #define CMA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
115
116 #define OFFSET_MASK 0x00ffff
117 #define F_NO8 0x010000
118 #define F_NO16 0x020000
119 #define F_NOMATCH 0x040000
120 #define F_DIFF 0x080000
121 #define F_FORCECONV 0x100000
122 #define F_PROPERTY 0x200000
123
124 struct regression_test_case {
125 int flags;
126 int start_offset;
127 const char *pattern;
128 const char *input;
129 };
130
131 static struct regression_test_case regression_test_cases[] = {
132 /* Constant strings. */
133 { MUA, 0, "AbC", "AbAbC" },
134 { MUA, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
135 { CMUA, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
136 { MA, 0, "[^a]", "aAbB" },
137 { CMA, 0, "[^m]", "mMnN" },
138 { MA, 0, "a[^b][^#]", "abacd" },
139 { CMA, 0, "A[^B][^E]", "abacd" },
140 { CMUA, 0, "[^x][^#]", "XxBll" },
141 { MUA, 0, "[^a]", "aaa\xc3\xa1#Ab" },
142 { CMUA, 0, "[^A]", "aA\xe6\x92\xad" },
143 { MUA, 0, "\\W(\\W)?\\w", "\r\n+bc" },
144 { MUA, 0, "\\W(\\W)?\\w", "\n\r+bc" },
145 { MUA, 0, "\\W(\\W)?\\w", "\r\r+bc" },
146 { MUA, 0, "\\W(\\W)?\\w", "\n\n+bc" },
147 { MUA, 0, "[axd]", "sAXd" },
148 { CMUA, 0, "[axd]", "sAXd" },
149 { CMUA, 0 | F_NOMATCH, "[^axd]", "DxA" },
150 { MUA, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
151 { MUA, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
152 { CMUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
153 { MUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
154 { MUA, 0, "[^a]", "\xc2\x80[]" },
155 { CMUA, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
156 { CMA, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
157 { PCRE_CASELESS, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
158 { PCRE_CASELESS, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
159 { PCRE_CASELESS, 0, "a1", "Aa1" },
160 { MA, 0, "\\Ca", "cda" },
161 { CMA, 0, "\\Ca", "CDA" },
162 { MA, 0 | F_NOMATCH, "\\Cx", "cda" },
163 { CMA, 0 | F_NOMATCH, "\\Cx", "CDA" },
164 { CMUAP, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
165 { CMUAP, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
166 { CMUAP, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
167 { CMUAP, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
168
169 /* Assertions. */
170 { MUA, 0, "\\b[^A]", "A_B#" },
171 { MA, 0 | F_NOMATCH, "\\b\\W", "\n*" },
172 { MUA, 0, "\\B[^,]\\b[^s]\\b", "#X" },
173 { MAP, 0, "\\B", "_\xa1" },
174 { MAP, 0, "\\b_\\b[,A]\\B", "_," },
175 { MUAP, 0, "\\b", "\xe6\x92\xad!" },
176 { MUAP, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
177 { MUAP, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
178 { MUAP, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
179 { MUA, 0 | F_NOMATCH, "\\b.", "\xcd\xbe" },
180 { CMUAP, 0, "\\By", "\xf0\x90\x90\xa8y" },
181 { MA, 0 | F_NOMATCH, "\\R^", "\n" },
182 { MA, 1 | F_NOMATCH, "^", "\n" },
183 { 0, 0, "^ab", "ab" },
184 { 0, 0 | F_NOMATCH, "^ab", "aab" },
185 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "^a", "\r\raa\n\naa\r\naa" },
186 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
187 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^-", "a--b--\x85--" },
188 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xe2\x80\xa8--" },
189 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xc2\x85--" },
190 { 0, 0, "ab$", "ab" },
191 { 0, 0 | F_NOMATCH, "ab$", "abab\n\n" },
192 { PCRE_DOLLAR_ENDONLY, 0 | F_NOMATCH, "ab$", "abab\r\n" },
193 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "a$", "\r\raa\n\naa\r\naa" },
194 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aaa" },
195 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "#$", "#\xc2\x85###\r#" },
196 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "#$", "#\xe2\x80\xa9" },
197 { PCRE_NOTBOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "^a", "aa\naa" },
198 { PCRE_NOTBOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },
199 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\naa" },
200 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\r\n" },
201 { PCRE_UTF8 | PCRE_DOLLAR_ENDONLY | PCRE_NEWLINE_ANY, 0 | F_PROPERTY, "\\p{Any}{2,}$", "aa\r\n" },
202 { PCRE_NOTEOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },
203 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa" },
204 { PCRE_NEWLINE_CR | PCRE_UTF8, 0, "a\\Z", "aaa\r" },
205 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa\n" },
206 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r" },
207 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
208 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r\n" },
209 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
210 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
211 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
212 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
213 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
214 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
215 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
216 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
217 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
218 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xc2\x85" },
219 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
220 { MA, 0, "\\Aa", "aaa" },
221 { MA, 1 | F_NOMATCH, "\\Aa", "aaa" },
222 { MA, 1, "\\Ga", "aaa" },
223 { MA, 1 | F_NOMATCH, "\\Ga", "aba" },
224 { MA, 0, "a\\z", "aaa" },
225 { MA, 0 | F_NOMATCH, "a\\z", "aab" },
226
227 /* Brackets. */
228 { MUA, 0, "(ab|bb|cd)", "bacde" },
229 { MUA, 0, "(?:ab|a)(bc|c)", "ababc" },
230 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
231 { CMUA, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
232 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
233 { MUA, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
234
235 /* Greedy and non-greedy ? operators. */
236 { MUA, 0, "(?:a)?a", "laab" },
237 { CMUA, 0, "(A)?A", "llaab" },
238 { MUA, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
239 { MUA, 0, "(a)?a", "manm" },
240 { CMUA, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
241 { MUA, 0, "(a|b)?\?d((?:e)?)", "abcde" },
242 { MUA, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
243
244 /* Greedy and non-greedy + operators */
245 { MUA, 0, "(aa)+aa", "aaaaaaa" },
246 { MUA, 0, "(aa)+?aa", "aaaaaaa" },
247 { MUA, 0, "(?:aba|ab|a)+l", "ababamababal" },
248 { MUA, 0, "(?:aba|ab|a)+?l", "ababamababal" },
249 { MUA, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
250 { MUA, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
251 { MUA, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
252
253 /* Greedy and non-greedy * operators */
254 { CMUA, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
255 { MUA, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
256 { MUA, 0, "(aa|ab)*ab", "aaabaaab" },
257 { CMUA, 0, "(aa|Ab)*?aB", "aaabaaab" },
258 { MUA, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
259 { MUA, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
260 { MA, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
261 { MA, 0, "((?:a|)*){0}a", "a" },
262
263 /* Combining ? + * operators */
264 { MUA, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
265 { MUA, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
266 { MUA, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
267 { MUA, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
268 { MUA, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
269
270 /* Single character iterators. */
271 { MUA, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
272 { MUA, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
273 { MUA, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
274 { MUA, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
275 { MUA, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
276 { MUA, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
277 { MUA, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
278 { MUA, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
279 { MUA, 0, "(ba{2})+c", "baabaaabacbaabaac" },
280 { MUA, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
281 { MUA, 0, "(a?+[^b])+", "babaacacb" },
282 { MUA, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
283 { CMUA, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
284 { CMUA, 0, "[c-f]+k", "DemmFke" },
285 { MUA, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
286 { MUA, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
287 { CMUA, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
288 { CMUA, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
289 { CMUA, 0, "[ace]{3,}", "AcbDAcEEcEd" },
290 { CMUA, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
291 { MUA, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
292 { CMUA, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
293 { MUA, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
294 { MUA, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
295 { MUA, 0, "\\b\\w+\\B", "x,a_cd" },
296 { MUAP, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
297 { CMUA, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
298 { CMUAP, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" },
299 { CMUA, 0, "[^\xf0\x90\x90\x80]{2,4}@", "\xf0\x90\x90\xa8\xf0\x90\x90\x80###\xf0\x90\x90\x80@@@" },
300 { CMUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
301 { MUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
302 { MUA, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" },
303
304 /* Basic character sets. */
305 { MUA, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
306 { MUA, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
307 { MUA, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
308 { MUA, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
309 { MUA, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
310 { MUA, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
311
312 /* Unicode properties. */
313 { MUAP, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
314 { MUAP, 0 | F_PROPERTY, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
315 { MUAP, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
316 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}]", "abc" },
317 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}]", "abc" },
318 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
319 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
320 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
321 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
322 { MUAP, 0 | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
323 { MUAP, 0 | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
324 { MUAP, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
325 { CMUAP, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
326 { MUAP, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
327 { MUAP, 0 | F_PROPERTY, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
328 { MUA, 0 | F_PROPERTY, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
329 { CMUAP, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
330 { MUAP, 0 | F_PROPERTY, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
331 { MUAP, 0 | F_PROPERTY, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
332 { PCRE_UCP, 0, "[a-b\\s]{2,5}[^a]", "AB baaa" },
333
334 /* Possible empty brackets. */
335 { MUA, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
336 { MUA, 0, "(|ab||bc|a)+d", "abcxabcabd" },
337 { MUA, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
338 { MUA, 0, "(|ab||bc|a)*d", "abcxabcabd" },
339 { MUA, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
340 { MUA, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
341 { MUA, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
342 { MUA, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
343 { MUA, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
344 { MUA, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
345
346 /* Start offset. */
347 { MUA, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
348 { MUA, 4 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
349 { MUA, 2 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
350 { MUA, 1, "(\\w\\W\\w)+", "ab#d" },
351
352 /* Newline. */
353 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
354 { PCRE_MULTILINE | PCRE_NEWLINE_CR, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
355 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{1,3}[^#]", "\r\n##...." },
356
357 /* Any character except newline or any newline. */
358 { PCRE_NEWLINE_CRLF, 0, ".", "\r" },
359 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
360 { PCRE_NEWLINE_ANYCRLF, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
361 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
362 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
363 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0 | F_NOMATCH, ".(.).", "\xe2\x80\xa8\nb\r" },
364 { PCRE_NEWLINE_ANY, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
365 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
366 { PCRE_BSR_ANYCRLF, 0, "\\R", "\r" },
367 { PCRE_BSR_ANYCRLF, 0, "\\R", "\x85#\r\n#" },
368 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\xe2\x80\xa8#c" },
369 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\r\nc" },
370 { PCRE_NEWLINE_CRLF | PCRE_BSR_UNICODE | PCRE_UTF8, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
371 { MUA, 0 | F_NOMATCH, "\\R+", "ab" },
372 { MUA, 0, "\\R+", "ab\r\n\r" },
373 { MUA, 0, "\\R*", "ab\r\n\r" },
374 { MUA, 0, "\\R*", "\r\n\r" },
375 { MUA, 0, "\\R{2,4}", "\r\nab\r\r" },
376 { MUA, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
377 { MUA, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
378 { MUA, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
379 { MUA, 0 | F_NOMATCH, "\\R+\\R\\R", "\r\n\r\n" },
380 { MUA, 0, "\\R+\\R\\R", "\r\r\r" },
381 { MUA, 0, "\\R*\\R\\R", "\n\r" },
382 { MUA, 0 | F_NOMATCH, "\\R{2,4}\\R\\R", "\r\r\r" },
383 { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
384
385 /* Atomic groups (no fallback from "next" direction). */
386 { MUA, 0 | F_NOMATCH, "(?>ab)ab", "bab" },
387 { MUA, 0 | F_NOMATCH, "(?>(ab))ab", "bab" },
388 { MUA, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
389 "bababcdedefgheijijklmlmnop" },
390 { MUA, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
391 { MUA, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
392 { MUA, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
393 { MUA, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
394 { MUA, 0, "((?>a|)+?)b", "aaacaaab" },
395 { MUA, 0, "(?>x|)*$", "aaa" },
396 { MUA, 0, "(?>(x)|)*$", "aaa" },
397 { MUA, 0, "(?>x|())*$", "aaa" },
398 { MUA, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
399 { MUA, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
400 { MUA, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
401 { MUA, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
402 { MUA, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
403 { MUA, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
404 { MUA, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
405 { MUA, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
406 { MUA, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
407 { MUA, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
408 { MUA, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
409 { MUA, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
410 { MUA, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
411 { MUA, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
412 { CMA, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
413 { MUA, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
414 { MUA, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
415 { MUA, 0 | F_NOMATCH | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d" },
416 { MUA, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
417 { MUA, 0 | F_PROPERTY, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
418 { MUA, 0 | F_PROPERTY, "\\X{2,4}", "abcdef" },
419 { MUA, 0 | F_PROPERTY, "\\X{2,4}?", "abcdef" },
420 { MUA, 0 | F_NOMATCH | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d##" },
421 { MUA, 0 | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
422 { MUA, 0, "(c(ab)?+ab)+", "cabcababcab" },
423 { MUA, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
424
425 /* Possessive quantifiers. */
426 { MUA, 0, "(?:a|b)++m", "mababbaaxababbaam" },
427 { MUA, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
428 { MUA, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
429 { MUA, 0, "(a|b)++m", "mababbaaxababbaam" },
430 { MUA, 0, "(a|b)*+m", "mababbaaxababbaam" },
431 { MUA, 0, "(a|b)*+m", "ababbaaxababbaam" },
432 { MUA, 0, "(a|b(*ACCEPT))++m", "maaxab" },
433 { MUA, 0, "(?:b*)++m", "bxbbxbbbxm" },
434 { MUA, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
435 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxm" },
436 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
437 { MUA, 0, "(b*)++m", "bxbbxbbbxm" },
438 { MUA, 0, "(b*)++m", "bxbbxbbbxbbm" },
439 { MUA, 0, "(b*)*+m", "bxbbxbbbxm" },
440 { MUA, 0, "(b*)*+m", "bxbbxbbbxbbm" },
441 { MUA, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
442 { MUA, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
443 { MUA, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
444 { MUA, 0, "(a|(b))++m", "mababbaaxababbaam" },
445 { MUA, 0, "((a)|b)*+m", "mababbaaxababbaam" },
446 { MUA, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
447 { MUA, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
448 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxm" },
449 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
450 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
451 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
452 { MUA, 0, "((b*))++m", "bxbbxbbbxm" },
453 { MUA, 0, "((b*))++m", "bxbbxbbbxbbm" },
454 { MUA, 0, "((b*))*+m", "bxbbxbbbxm" },
455 { MUA, 0, "((b*))*+m", "bxbbxbbbxbbm" },
456 { MUA, 0 | F_NOMATCH, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
457 { MUA, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
458 { MUA, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
459 { MUA, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
460 { MUA, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
461
462 /* Back references. */
463 { MUA, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
464 { CMUA, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
465 { CMA, 0, "(a{2,4})\\1", "AaAaaAaA" },
466 { MUA, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
467 { MUA, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
468 { MUA, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
469 { MUA, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
470 { MUA, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
471 { MUA, 0, "(?:(aa)|b)\\1?b", "bb" },
472 { CMUA, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
473 { MUA, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
474 { CMUA, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
475 { MUA, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
476 { CMA, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
477 { MUA, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
478 { MUA, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
479 { MA, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
480 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
481 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{0,2}", "wwwww." },
482 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwww" },
483 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwwww" },
484 { PCRE_UCP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
485 { CMUAP, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" },
486
487 /* Assertions. */
488 { MUA, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
489 { MUA, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
490 { MUA, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
491 { MUA, 0, "(?<=aaa|aa|a)a", "aaa" },
492 { MUA, 2, "(?<=aaa|aa|a)a", "aaa" },
493 { MA, 0, "(?<=aaa|aa|a)a", "aaa" },
494 { MA, 2, "(?<=aaa|aa|a)a", "aaa" },
495 { MUA, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
496 { MUA, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
497 { MUA, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
498 { MUA, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
499 { MUA, 0, "((?(?=(a))a)+k)", "bbak" },
500 { MUA, 0, "((?(?=a)a)+k)", "bbak" },
501 { MUA, 0 | F_NOMATCH, "(?=(?>(a))m)amk", "a k" },
502 { MUA, 0 | F_NOMATCH, "(?!(?>(a))m)amk", "a k" },
503 { MUA, 0 | F_NOMATCH, "(?>(?=(a))am)amk", "a k" },
504 { MUA, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
505 { MUA, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
506 { MUA, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
507 { MUA, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
508 { MUA, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
509 { MUA, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
510 { MUA, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
511 { MUA, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
512
513 /* Not empty, ACCEPT, FAIL */
514 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*", "bcx" },
515 { MUA | PCRE_NOTEMPTY, 0, "a*", "bcaad" },
516 { MUA | PCRE_NOTEMPTY, 0, "a*?", "bcaad" },
517 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
518 { MUA, 0, "a(*ACCEPT)b", "ab" },
519 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*(*ACCEPT)b", "bcx" },
520 { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
521 { MUA | PCRE_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
522 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "(?:z|a*(*ACCEPT)b)", "bcx" },
523 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
524 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
525 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
526 { MUA | PCRE_NOTEMPTY_ATSTART, 0 | F_NOMATCH, "a*(*ACCEPT)b", "" },
527 { MUA, 0, "((a(*ACCEPT)b))", "ab" },
528 { MUA, 0, "(a(*FAIL)a|a)", "aaa" },
529 { MUA, 0, "(?=ab(*ACCEPT)b)a", "ab" },
530 { MUA, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
531 { MUA, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
532 { MUA | PCRE_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
533
534 /* Conditional blocks. */
535 { MUA, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
536 { MUA, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
537 { MUA, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
538 { MUA, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
539 { MUA, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
540 { MUA, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
541 { MUA, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
542 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
543 { MUA, 0 | F_DIFF, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
544 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
545 { MUA, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
546 { MUA, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
547 { MUA, 0, "(?(?=a)ab)", "a" },
548 { MUA, 0, "(?(?<!b)c)", "b" },
549 { MUA, 0, "(?(DEFINE)a(b))", "a" },
550 { MUA, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
551 { MUA, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
552 { MUA, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
553 { MUA, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
554 { MUA, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
555 { MUA, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
556 { MUA, 0, "(c)?\?(?(1)a|b)", "cbb" },
557 { MUA, 0 | F_DIFF, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
558 { MUA, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
559 { MUA, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
560 { MUA, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
561 { MUA, 0 | F_DIFF, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
562 { MUA, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
563 { MUA, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
564 { MUA, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
565 { MUA, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
566 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)*l", "bc ddd abccabccl" },
567 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+?dd", "bcabcacdb bdddd" },
568 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+l", "ababccddabdbccd abcccl" },
569
570 /* Set start of match. */
571 { MUA, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
572 { MUA, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
573 { MUA, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
574 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a\\K(*ACCEPT)b", "aa" },
575 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
576
577 /* First line. */
578 { MUA | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}a", "bb\naaa" },
579 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}a", "bb\r\naaa" },
580 { MUA | PCRE_FIRSTLINE, 0, "(?<=a)", "a" },
581 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[^a][^b]", "ab" },
582 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "a", "\na" },
583 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[abc]", "\na" },
584 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^a", "\na" },
585 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^(?<=\n)", "\na" },
586 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\xc2\x85#" },
587 { PCRE_MULTILINE | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\x85#" },
588 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^#", "\xe2\x80\xa8#" },
589 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}", "\r\na" },
590 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, ".", "\r" },
591 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "a", "\ra" },
592 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH, "ba", "bbb\r\nba" },
593 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}{4}|a", "\r\na" },
594 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 1, ".", "\r\n" },
595
596 /* Recurse. */
597 { MUA, 0, "(a)(?1)", "aa" },
598 { MUA, 0, "((a))(?1)", "aa" },
599 { MUA, 0, "(b|a)(?1)", "aa" },
600 { MUA, 0, "(b|(a))(?1)", "aa" },
601 { MUA, 0 | F_NOMATCH, "((a)(b)(?:a*))(?1)", "aba" },
602 { MUA, 0, "((a)(b)(?:a*))(?1)", "abab" },
603 { MUA, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
604 { MUA, 0, "((?2)b|(a)){2}(?1)", "aabab" },
605 { MUA, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
606 { MUA, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
607 { MUA, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
608 { MUA, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
609 { MUA, 0 | F_NOMATCH, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
610 { MUA, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
611 { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
612 { MUA, 0 | F_NOMATCH, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
613 { MUA, 0, "b|<(?R)*>", "<<b>" },
614 { MUA, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
615 { MUA, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
616 { MUA, 0, "(a)((?(R)a|b))(?2)", "aabbabaa" },
617 { MUA, 0, "(a)((?(R2)a|b))(?2)", "aabbabaa" },
618 { MUA, 0, "(a)((?(R1)a|b))(?2)", "ababba" },
619 { MUA, 0, "(?(R0)aa|bb(?R))", "abba aabb bbaa" },
620 { MUA, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" },
621 { MUA, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" },
622
623 /* 16 bit specific tests. */
624 { CMA, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" },
625 { CMA, 0 | F_FORCECONV, "\xe1\xbd\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
626 { CMA, 0 | F_FORCECONV, "[\xc3\xa1]", "\xc3\x81\xc3\xa1" },
627 { CMA, 0 | F_FORCECONV, "[\xe1\xbd\xb8]", "\xe1\xbf\xb8\xe1\xbd\xb8" },
628 { CMA, 0 | F_FORCECONV, "[a-\xed\xb0\x80]", "A" },
629 { CMA, 0 | F_NO8 | F_FORCECONV, "[a-\\x{dc00}]", "B" },
630 { CMA, 0 | F_NO8 | F_NOMATCH | F_FORCECONV, "[b-\\x{dc00}]", "a" },
631 { CMA, 0 | F_NO8 | F_FORCECONV, "\xed\xa0\x80\\x{d800}\xed\xb0\x80\\x{dc00}", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80" },
632 { CMA, 0 | F_NO8 | F_FORCECONV, "[\xed\xa0\x80\\x{d800}]{1,2}?[\xed\xb0\x80\\x{dc00}]{1,2}?#", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80#" },
633 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80\xed\xb0\x80#]{0,3}(?<=\xed\xb0\x80.)", "\xed\xa0\x80#\xed\xa0\x80##\xed\xb0\x80\xed\xa0\x80" },
634 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\x9f\xbf\xed\xa0\x83" },
635 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\xb4\x80\xed\xb3\xb0" },
636 { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" },
637 { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" },
638 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" },
639 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80][\xed\xb0\x80]{2,}", "\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" },
640 { MA, 0 | F_FORCECONV, "[^\xed\xb0\x80]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
641 { MA, 0 | F_NO8 | F_FORCECONV, "[^\\x{dc00}]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
642 { CMA, 0 | F_FORCECONV, ".\\B.", "\xed\xa0\x80\xed\xb0\x80" },
643 { CMA, 0 | F_FORCECONV, "\\D+(?:\\d+|.)\\S+(?:\\s+|.)\\W+(?:\\w+|.)\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80" },
644 { CMA, 0 | F_FORCECONV, "\\d*\\s*\\w*\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80" },
645 { CMA, 0 | F_FORCECONV | F_NOMATCH, "\\d*?\\D*?\\s*?\\S*?\\w*?\\W*?##", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80#" },
646 { CMA | PCRE_EXTENDED, 0 | F_FORCECONV, "\xed\xa0\x80 \xed\xb0\x80 !", "\xed\xa0\x80\xed\xb0\x80!" },
647 { CMA, 0 | F_FORCECONV, "\xed\xa0\x80+#[^#]+\xed\xa0\x80", "\xed\xa0\x80#a\xed\xa0\x80" },
648 { CMA, 0 | F_FORCECONV, "(\xed\xa0\x80+)#\\1", "\xed\xa0\x80\xed\xa0\x80#\xed\xa0\x80\xed\xa0\x80" },
649 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0 | F_NO8 | F_FORCECONV, "^-", "a--\xe2\x80\xa8--" },
650 { PCRE_BSR_UNICODE, 0 | F_NO8 | F_FORCECONV, "\\R", "ab\xe2\x80\xa8" },
651 { 0, 0 | F_NO8 | F_FORCECONV, "\\v", "ab\xe2\x80\xa9" },
652 { 0, 0 | F_NO8 | F_FORCECONV, "\\h", "ab\xe1\xa0\x8e" },
653 { 0, 0 | F_NO8 | F_FORCECONV, "\\v+?\\V+?#", "\xe2\x80\xa9\xe2\x80\xa9\xef\xbf\xbf\xef\xbf\xbf#" },
654 { 0, 0 | F_NO8 | F_FORCECONV, "\\h+?\\H+?#", "\xe1\xa0\x8e\xe1\xa0\x8e\xef\xbf\xbf\xef\xbf\xbf#" },
655
656 /* Partial matching. */
657 { MUA | PCRE_PARTIAL_SOFT, 0, "ab", "a" },
658 { MUA | PCRE_PARTIAL_SOFT, 0, "ab|a", "a" },
659 { MUA | PCRE_PARTIAL_HARD, 0, "ab|a", "a" },
660 { MUA | PCRE_PARTIAL_SOFT, 0, "\\b#", "a" },
661 { MUA | PCRE_PARTIAL_SOFT, 0, "(?<=a)b", "a" },
662 { MUA | PCRE_PARTIAL_SOFT, 0, "abc|(?<=xxa)bc", "xxab" },
663 { MUA | PCRE_PARTIAL_SOFT, 0, "a\\B", "a" },
664 { MUA | PCRE_PARTIAL_HARD, 0, "a\\b", "a" },
665
666 /* (*MARK) verb. */
667 { MUA, 0, "a(*MARK:aa)a", "ababaa" },
668 { MUA, 0 | F_NOMATCH, "a(*:aa)a", "abab" },
669 { MUA, 0, "a(*:aa)(b(*:bb)b|bc)", "abc" },
670 { MUA, 0 | F_NOMATCH, "a(*:1)x|b(*:2)y", "abc" },
671 { MUA, 0, "(?>a(*:aa))b|ac", "ac" },
672 { MUA, 0, "(?(DEFINE)(a(*:aa)))(?1)", "a" },
673 { MUA, 0 | F_NOMATCH, "(?(DEFINE)((a)(*:aa)))(?1)b", "aa" },
674 { MUA, 0, "(?(DEFINE)(a(*:aa)))a(?1)b|aac", "aac" },
675 { MUA, 0, "(a(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
676 { MUA, 0, "(a(*:aa)){0}(?:b(?1)b)+", "babba" },
677 { MUA, 0 | F_NOMATCH, "(a(*:aa)){0}(?:b(?1)b)+", "ba" },
678 { MUA, 0, "(a\\K(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
679 { MUA, 0, "(a\\K(*:aa)){0}(?:b(?1)b)+", "babba" },
680 { MUA, 0 | F_NOMATCH, "(a\\K(*:aa)){0}(?:b(?1)b)+", "ba" },
681
682 /* (*COMMIT) verb. */
683 { MUA, 0 | F_NOMATCH, "a(*COMMIT)b", "ac" },
684 { MUA, 0, "aa(*COMMIT)b", "xaxaab" },
685 { MUA, 0 | F_NOMATCH, "a(*COMMIT)(*:msg)b|ac", "ac" },
686 { MUA, 0, "(?=a(*COMMIT)b|ac)ac|(*:m)(a)c", "ac" },
687 { MUA, 0, "(?!a(*COMMIT)(*:msg)b)a(c)|cd", "acd" },
688
689 /* Deep recursion. */
690 { MUA, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
691 { MUA, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
692 { MUA, 0, "((a?)+)+b", "aaaaaaaaaaaa b" },
693
694 /* Deep recursion: Stack limit reached. */
695 { MA, 0 | F_NOMATCH, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
696 { MA, 0 | F_NOMATCH, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
697 { MA, 0 | F_NOMATCH, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
698 { MA, 0 | F_NOMATCH, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
699 { MA, 0 | F_NOMATCH, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
700
701 { 0, 0, NULL, NULL }
702 };
703
704 static const unsigned char *tables(int mode)
705 {
706 /* The purpose of this function to allow valgrind
707 for reporting invalid reads and writes. */
708 static unsigned char *tables_copy;
709 const char *errorptr;
710 int erroroffset;
711 unsigned char *default_tables;
712 #ifdef SUPPORT_PCRE8
713 pcre *regex;
714 char null_str[1] = { 0 };
715 #else
716 pcre16 *regex;
717 PCRE_UCHAR16 null_str[1] = { 0 };
718 #endif
719
720 if (mode) {
721 if (tables_copy)
722 free(tables_copy);
723 tables_copy = NULL;
724 return NULL;
725 }
726
727 if (tables_copy)
728 return tables_copy;
729
730 default_tables = NULL;
731 #ifdef SUPPORT_PCRE8
732 regex = pcre_compile(null_str, 0, &errorptr, &erroroffset, NULL);
733 if (regex) {
734 pcre_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
735 pcre_free(regex);
736 }
737 #else
738 regex = pcre16_compile(null_str, 0, &errorptr, &erroroffset, NULL);
739 if (regex) {
740 pcre16_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
741 pcre16_free(regex);
742 }
743 #endif
744 /* Shouldn't ever happen. */
745 if (!default_tables)
746 return NULL;
747
748 /* Unfortunately this value cannot get from pcre_fullinfo.
749 Since this is a test program, this is acceptable at the moment. */
750 tables_copy = (unsigned char *)malloc(1088);
751 if (!tables_copy)
752 return NULL;
753
754 memcpy(tables_copy, default_tables, 1088);
755 return tables_copy;
756 }
757
758 #ifdef SUPPORT_PCRE8
759 static pcre_jit_stack* callback8(void *arg)
760 {
761 return (pcre_jit_stack *)arg;
762 }
763 #endif
764
765 #ifdef SUPPORT_PCRE16
766 static pcre16_jit_stack* callback16(void *arg)
767 {
768 return (pcre16_jit_stack *)arg;
769 }
770 #endif
771
772 #ifdef SUPPORT_PCRE8
773 static void setstack8(pcre_extra *extra)
774 {
775 static pcre_jit_stack *stack;
776
777 if (!extra) {
778 if (stack)
779 pcre_jit_stack_free(stack);
780 stack = NULL;
781 return;
782 }
783
784 if (!stack)
785 stack = pcre_jit_stack_alloc(1, 1024 * 1024);
786 /* Extra can be NULL. */
787 pcre_assign_jit_stack(extra, callback8, stack);
788 }
789 #endif /* SUPPORT_PCRE8 */
790
791 #ifdef SUPPORT_PCRE16
792 static void setstack16(pcre16_extra *extra)
793 {
794 static pcre16_jit_stack *stack;
795
796 if (!extra) {
797 if (stack)
798 pcre16_jit_stack_free(stack);
799 stack = NULL;
800 return;
801 }
802
803 if (!stack)
804 stack = pcre16_jit_stack_alloc(1, 1024 * 1024);
805 /* Extra can be NULL. */
806 pcre16_assign_jit_stack(extra, callback16, stack);
807 }
808 #endif /* SUPPORT_PCRE8 */
809
810 #ifdef SUPPORT_PCRE16
811
812 static int convert_utf8_to_utf16(const char *input, PCRE_UCHAR16 *output, int *offsetmap, int max_length)
813 {
814 unsigned char *iptr = (unsigned char*)input;
815 unsigned short *optr = (unsigned short *)output;
816 unsigned int c;
817
818 if (max_length == 0)
819 return 0;
820
821 while (*iptr && max_length > 1) {
822 c = 0;
823 if (offsetmap)
824 *offsetmap++ = (int)(iptr - (unsigned char*)input);
825
826 if (!(*iptr & 0x80))
827 c = *iptr++;
828 else if (!(*iptr & 0x20)) {
829 c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
830 iptr += 2;
831 } else if (!(*iptr & 0x10)) {
832 c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
833 iptr += 3;
834 } else if (!(*iptr & 0x08)) {
835 c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
836 iptr += 4;
837 }
838
839 if (c < 65536) {
840 *optr++ = c;
841 max_length--;
842 } else if (max_length <= 2) {
843 *optr = '\0';
844 return (int)(optr - (unsigned short *)output);
845 } else {
846 c -= 0x10000;
847 *optr++ = 0xd800 | ((c >> 10) & 0x3ff);
848 *optr++ = 0xdc00 | (c & 0x3ff);
849 max_length -= 2;
850 if (offsetmap)
851 offsetmap++;
852 }
853 }
854 if (offsetmap)
855 *offsetmap = (int)(iptr - (unsigned char*)input);
856 *optr = '\0';
857 return (int)(optr - (unsigned short *)output);
858 }
859
860 static int copy_char8_to_char16(const char *input, PCRE_UCHAR16 *output, int max_length)
861 {
862 unsigned char *iptr = (unsigned char*)input;
863 unsigned short *optr = (unsigned short *)output;
864
865 if (max_length == 0)
866 return 0;
867
868 while (*iptr && max_length > 1) {
869 *optr++ = *iptr++;
870 max_length--;
871 }
872 *optr = '\0';
873 return (int)(optr - (unsigned short *)output);
874 }
875
876 #define REGTEST_MAX_LENGTH 4096
877 static PCRE_UCHAR16 regtest_buf[REGTEST_MAX_LENGTH];
878 static int regtest_offsetmap[REGTEST_MAX_LENGTH];
879
880 #endif /* SUPPORT_PCRE16 */
881
882 static int check_ascii(const char *input)
883 {
884 const unsigned char *ptr = (unsigned char *)input;
885 while (*ptr) {
886 if (*ptr > 127)
887 return 0;
888 ptr++;
889 }
890 return 1;
891 }
892
893 static int regression_tests(void)
894 {
895 struct regression_test_case *current = regression_test_cases;
896 const char *error;
897 char *cpu_info;
898 int i, err_offs;
899 int is_successful, is_ascii_pattern, is_ascii_input;
900 int total = 0;
901 int successful = 0;
902 int successful_row = 0;
903 int counter = 0;
904 int study_mode;
905 #ifdef SUPPORT_PCRE8
906 pcre *re8;
907 pcre_extra *extra8;
908 pcre_extra dummy_extra8;
909 int ovector8_1[32];
910 int ovector8_2[32];
911 int return_value8_1, return_value8_2;
912 unsigned char *mark8_1, *mark8_2;
913 int utf8 = 0, ucp8 = 0;
914 int disabled_flags8 = 0;
915 #endif
916 #ifdef SUPPORT_PCRE16
917 pcre16 *re16;
918 pcre16_extra *extra16;
919 pcre16_extra dummy_extra16;
920 int ovector16_1[32];
921 int ovector16_2[32];
922 int return_value16_1, return_value16_2;
923 PCRE_UCHAR16 *mark16_1, *mark16_2;
924 int utf16 = 0, ucp16 = 0;
925 int disabled_flags16 = 0;
926 int length16;
927 #endif
928
929 /* This test compares the behaviour of interpreter and JIT. Although disabling
930 utf or ucp may make tests fail, if the pcre_exec result is the SAME, it is
931 still considered successful from pcre_jit_test point of view. */
932
933 #ifdef SUPPORT_PCRE8
934 pcre_config(PCRE_CONFIG_JITTARGET, &cpu_info);
935 #else
936 pcre16_config(PCRE_CONFIG_JITTARGET, &cpu_info);
937 #endif
938
939 printf("Running JIT regression tests\n");
940 printf(" target CPU of SLJIT compiler: %s\n", cpu_info);
941
942 #ifdef SUPPORT_PCRE8
943 pcre_config(PCRE_CONFIG_UTF8, &utf8);
944 pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp8);
945 if (!utf8)
946 disabled_flags8 |= PCRE_UTF8;
947 if (!ucp8)
948 disabled_flags8 |= PCRE_UCP;
949 printf(" in 8 bit mode with utf8 %s and ucp %s:\n", utf8 ? "enabled" : "disabled", ucp8 ? "enabled" : "disabled");
950 #endif
951 #ifdef SUPPORT_PCRE16
952 pcre16_config(PCRE_CONFIG_UTF16, &utf16);
953 pcre16_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp16);
954 if (!utf16)
955 disabled_flags16 |= PCRE_UTF8;
956 if (!ucp16)
957 disabled_flags16 |= PCRE_UCP;
958 printf(" in 16 bit mode with utf16 %s and ucp %s:\n", utf16 ? "enabled" : "disabled", ucp16 ? "enabled" : "disabled");
959 #endif
960
961 while (current->pattern) {
962 /* printf("\nPattern: %s :\n", current->pattern); */
963 total++;
964 if (current->start_offset & F_PROPERTY) {
965 is_ascii_pattern = 0;
966 is_ascii_input = 0;
967 } else {
968 is_ascii_pattern = check_ascii(current->pattern);
969 is_ascii_input = check_ascii(current->input);
970 }
971
972 if (current->flags & PCRE_PARTIAL_SOFT)
973 study_mode = PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE;
974 else if (current->flags & PCRE_PARTIAL_HARD)
975 study_mode = PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE;
976 else
977 study_mode = PCRE_STUDY_JIT_COMPILE;
978 error = NULL;
979 #ifdef SUPPORT_PCRE8
980 re8 = NULL;
981 if (!(current->start_offset & F_NO8))
982 re8 = pcre_compile(current->pattern,
983 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags8),
984 &error, &err_offs, tables(0));
985
986 extra8 = NULL;
987 if (re8) {
988 error = NULL;
989 extra8 = pcre_study(re8, study_mode, &error);
990 if (!extra8) {
991 printf("\n8 bit: Cannot study pattern: %s\n", current->pattern);
992 pcre_free(re8);
993 re8 = NULL;
994 }
995 if (!(extra8->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
996 printf("\n8 bit: JIT compiler does not support: %s\n", current->pattern);
997 pcre_free_study(extra8);
998 pcre_free(re8);
999 re8 = NULL;
1000 }
1001 extra8->flags |= PCRE_EXTRA_MARK;
1002 } else if (((utf8 && ucp8) || is_ascii_pattern) && !(current->start_offset & F_NO8))
1003 printf("\n8 bit: Cannot compile pattern: %s\n", current->pattern);
1004 #endif
1005 #ifdef SUPPORT_PCRE16
1006 if ((current->flags & PCRE_UTF8) || (current->start_offset & F_FORCECONV))
1007 convert_utf8_to_utf16(current->pattern, regtest_buf, NULL, REGTEST_MAX_LENGTH);
1008 else
1009 copy_char8_to_char16(current->pattern, regtest_buf, REGTEST_MAX_LENGTH);
1010
1011 re16 = NULL;
1012 if (!(current->start_offset & F_NO16))
1013 re16 = pcre16_compile(regtest_buf,
1014 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags16),
1015 &error, &err_offs, tables(0));
1016
1017 extra16 = NULL;
1018 if (re16) {
1019 error = NULL;
1020 extra16 = pcre16_study(re16, study_mode, &error);
1021 if (!extra16) {
1022 printf("\n16 bit: Cannot study pattern: %s\n", current->pattern);
1023 pcre16_free(re16);
1024 re16 = NULL;
1025 }
1026 if (!(extra16->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
1027 printf("\n16 bit: JIT compiler does not support: %s\n", current->pattern);
1028 pcre16_free_study(extra16);
1029 pcre16_free(re16);
1030 re16 = NULL;
1031 }
1032 extra16->flags |= PCRE_EXTRA_MARK;
1033 } else if (((utf16 && ucp16) || is_ascii_pattern) && !(current->start_offset & F_NO16))
1034 printf("\n16 bit: Cannot compile pattern: %s\n", current->pattern);
1035 #endif
1036
1037 counter++;
1038 if ((counter & 0x3) != 0) {
1039 #ifdef SUPPORT_PCRE8
1040 setstack8(NULL);
1041 #endif
1042 #ifdef SUPPORT_PCRE16
1043 setstack16(NULL);
1044 #endif
1045 }
1046
1047 #ifdef SUPPORT_PCRE8
1048 return_value8_1 = -1000;
1049 return_value8_2 = -1000;
1050 for (i = 0; i < 32; ++i)
1051 ovector8_1[i] = -2;
1052 for (i = 0; i < 32; ++i)
1053 ovector8_2[i] = -2;
1054 if (re8) {
1055 mark8_1 = NULL;
1056 mark8_2 = NULL;
1057 setstack8(extra8);
1058 extra8->mark = &mark8_1;
1059 return_value8_1 = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1060 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_1, 32);
1061 memset(&dummy_extra8, 0, sizeof(pcre_extra));
1062 dummy_extra8.flags = PCRE_EXTRA_MARK;
1063 dummy_extra8.mark = &mark8_2;
1064 return_value8_2 = pcre_exec(re8, &dummy_extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1065 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_2, 32);
1066 }
1067 #endif
1068
1069 #ifdef SUPPORT_PCRE16
1070 return_value16_1 = -1000;
1071 return_value16_2 = -1000;
1072 for (i = 0; i < 32; ++i)
1073 ovector16_1[i] = -2;
1074 for (i = 0; i < 32; ++i)
1075 ovector16_2[i] = -2;
1076 if (re16) {
1077 mark16_1 = NULL;
1078 mark16_2 = NULL;
1079 setstack16(extra16);
1080 if ((current->flags & PCRE_UTF8) || (current->start_offset & F_FORCECONV))
1081 length16 = convert_utf8_to_utf16(current->input, regtest_buf, regtest_offsetmap, REGTEST_MAX_LENGTH);
1082 else
1083 length16 = copy_char8_to_char16(current->input, regtest_buf, REGTEST_MAX_LENGTH);
1084 extra16->mark = &mark16_1;
1085 return_value16_1 = pcre16_exec(re16, extra16, regtest_buf, length16, current->start_offset & OFFSET_MASK,
1086 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_1, 32);
1087 memset(&dummy_extra16, 0, sizeof(pcre16_extra));
1088 dummy_extra16.flags = PCRE_EXTRA_MARK;
1089 dummy_extra16.mark = &mark16_2;
1090 return_value16_2 = pcre16_exec(re16, &dummy_extra16, regtest_buf, length16, current->start_offset & OFFSET_MASK,
1091 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_2, 32);
1092 }
1093 #endif
1094
1095 /* printf("[%d-%d|%d-%d|%d-%d]%s", return_value8_1, return_value16_1, ovector8_1[0], ovector8_1[1], ovector16_1[0], ovector16_1[1], (current->flags & PCRE_CASELESS) ? "C" : ""); */
1096
1097 /* If F_DIFF is set, just run the test, but do not compare the results.
1098 Segfaults can still be captured. */
1099
1100 is_successful = 1;
1101 if (!(current->start_offset & F_DIFF)) {
1102 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1103 if (utf8 == utf16 && !(current->start_offset & F_FORCECONV)) {
1104 /* All results must be the same. */
1105 if (return_value8_1 != return_value8_2 || return_value8_1 != return_value16_1 || return_value8_1 != return_value16_2) {
1106 printf("\n8 and 16 bit: Return value differs(%d:%d:%d:%d): [%d] '%s' @ '%s'\n",
1107 return_value8_1, return_value8_2, return_value16_1, return_value16_2,
1108 total, current->pattern, current->input);
1109 is_successful = 0;
1110 } else if (return_value8_1 >= 0 || return_value8_1 == PCRE_ERROR_PARTIAL) {
1111 if (return_value8_1 == PCRE_ERROR_PARTIAL) {
1112 return_value8_1 = 2;
1113 return_value16_1 = 2;
1114 } else {
1115 return_value8_1 *= 2;
1116 return_value16_1 *= 2;
1117 }
1118
1119 /* Transform back the results. */
1120 if (current->flags & PCRE_UTF8) {
1121 for (i = 0; i < return_value8_1; ++i) {
1122 if (ovector16_1[i] >= 0)
1123 ovector16_1[i] = regtest_offsetmap[ovector16_1[i]];
1124 if (ovector16_2[i] >= 0)
1125 ovector16_2[i] = regtest_offsetmap[ovector16_2[i]];
1126 }
1127 }
1128
1129 for (i = 0; i < return_value8_1; ++i)
1130 if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
1131 printf("\n8 and 16 bit: Ovector[%d] value differs(%d:%d:%d:%d): [%d] '%s' @ '%s' \n",
1132 i, ovector8_1[i], ovector8_2[i], ovector16_1[i], ovector16_2[i],
1133 total, current->pattern, current->input);
1134 is_successful = 0;
1135 }
1136 }
1137 } else {
1138 #endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */
1139 /* Only the 8 bit and 16 bit results must be equal. */
1140 #ifdef SUPPORT_PCRE8
1141 if (return_value8_1 != return_value8_2) {
1142 printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1143 return_value8_1, return_value8_2, total, current->pattern, current->input);
1144 is_successful = 0;
1145 } else if (return_value8_1 >= 0 || return_value8_1 == PCRE_ERROR_PARTIAL) {
1146 if (return_value8_1 == PCRE_ERROR_PARTIAL)
1147 return_value8_1 = 2;
1148 else
1149 return_value8_1 *= 2;
1150
1151 for (i = 0; i < return_value8_1; ++i)
1152 if (ovector8_1[i] != ovector8_2[i]) {
1153 printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1154 i, ovector8_1[i], ovector8_2[i], total, current->pattern, current->input);
1155 is_successful = 0;
1156 }
1157 }
1158 #endif
1159
1160 #ifdef SUPPORT_PCRE16
1161 if (return_value16_1 != return_value16_2) {
1162 printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1163 return_value16_1, return_value16_2, total, current->pattern, current->input);
1164 is_successful = 0;
1165 } else if (return_value16_1 >= 0 || return_value16_1 == PCRE_ERROR_PARTIAL) {
1166 if (return_value16_1 == PCRE_ERROR_PARTIAL)
1167 return_value16_1 = 2;
1168 else
1169 return_value16_1 *= 2;
1170
1171 for (i = 0; i < return_value16_1; ++i)
1172 if (ovector16_1[i] != ovector16_2[i]) {
1173 printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1174 i, ovector16_1[i], ovector16_2[i], total, current->pattern, current->input);
1175 is_successful = 0;
1176 }
1177 }
1178 #endif
1179
1180 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1181 }
1182 #endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */
1183 }
1184
1185 if (is_successful) {
1186 #ifdef SUPPORT_PCRE8
1187 if (!(current->start_offset & F_NO8) && ((utf8 && ucp8) || is_ascii_input)) {
1188 if (return_value8_1 < 0 && !(current->start_offset & F_NOMATCH)) {
1189 printf("8 bit: Test should match: [%d] '%s' @ '%s'\n",
1190 total, current->pattern, current->input);
1191 is_successful = 0;
1192 }
1193
1194 if (return_value8_1 >= 0 && (current->start_offset & F_NOMATCH)) {
1195 printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n",
1196 total, current->pattern, current->input);
1197 is_successful = 0;
1198 }
1199 }
1200 #endif
1201 #ifdef SUPPORT_PCRE16
1202 if (!(current->start_offset & F_NO16) && ((utf16 && ucp16) || is_ascii_input)) {
1203 if (return_value16_1 < 0 && !(current->start_offset & F_NOMATCH)) {
1204 printf("16 bit: Test should match: [%d] '%s' @ '%s'\n",
1205 total, current->pattern, current->input);
1206 is_successful = 0;
1207 }
1208
1209 if (return_value16_1 >= 0 && (current->start_offset & F_NOMATCH)) {
1210 printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n",
1211 total, current->pattern, current->input);
1212 is_successful = 0;
1213 }
1214 }
1215 #endif
1216 }
1217
1218 if (is_successful) {
1219 #ifdef SUPPORT_PCRE8
1220 if (mark8_1 != mark8_2) {
1221 printf("8 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1222 total, current->pattern, current->input);
1223 is_successful = 0;
1224 }
1225 #endif
1226 #ifdef SUPPORT_PCRE16
1227 if (mark16_1 != mark16_2) {
1228 printf("16 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1229 total, current->pattern, current->input);
1230 is_successful = 0;
1231 }
1232 #endif
1233 }
1234
1235 #ifdef SUPPORT_PCRE8
1236 if (re8) {
1237 pcre_free_study(extra8);
1238 pcre_free(re8);
1239 }
1240 #endif
1241 #ifdef SUPPORT_PCRE16
1242 if (re16) {
1243 pcre16_free_study(extra16);
1244 pcre16_free(re16);
1245 }
1246 #endif
1247
1248 if (is_successful) {
1249 successful++;
1250 successful_row++;
1251 printf(".");
1252 if (successful_row >= 60) {
1253 successful_row = 0;
1254 printf("\n");
1255 }
1256 } else
1257 successful_row = 0;
1258
1259 fflush(stdout);
1260 current++;
1261 }
1262 tables(1);
1263 #ifdef SUPPORT_PCRE8
1264 setstack8(NULL);
1265 #endif
1266 #ifdef SUPPORT_PCRE16
1267 setstack16(NULL);
1268 #endif
1269
1270 if (total == successful) {
1271 printf("\nAll JIT regression tests are successfully passed.\n");
1272 return 0;
1273 } else {
1274 printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
1275 return 1;
1276 }
1277 }
1278
1279 /* End of pcre_jit_test.c */

  ViewVC Help
Powered by ViewVC 1.1.5