/[pcre]/code/trunk/pcre_jit_test.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_test.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 911 - (show annotations)
Fri Feb 10 08:05:30 2012 UTC (7 years, 7 months ago) by zherczeg
File MIME type: text/plain
File size: 49971 byte(s)
Error occurred while calculating annotation data.
Fixing a wrong JIT test case
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Main Library written by Philip Hazel
9 Copyright (c) 1997-2012 University of Cambridge
10
11 This JIT compiler regression test program was written by Zoltan Herczeg
12 Copyright (c) 2010-2012
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include <stdio.h>
48 #include <string.h>
49 #include "pcre.h"
50
51 #define PCRE_BUG 0x80000000
52
53 /*
54 Letter characters:
55 \xe6\x92\xad = 0x64ad = 25773 (kanji)
56 Non-letter characters:
57 \xc2\xa1 = 0xa1 = (Inverted Exclamation Mark)
58 \xf3\xa9\xb7\x80 = 0xe9dc0 = 957888
59 \xed\xa0\x80 = 55296 = 0xd800 (Invalid UTF character)
60 \xed\xb0\x80 = 56320 = 0xdc00 (Invalid UTF character)
61 Newlines:
62 \xc2\x85 = 0x85 = 133 (NExt Line = NEL)
63 \xe2\x80\xa8 = 0x2028 = 8232 (Line Separator)
64 Othercase pairs:
65 \xc3\xa9 = 0xe9 = 233 (e')
66 \xc3\x89 = 0xc9 = 201 (E')
67 \xc3\xa1 = 0xe1 = 225 (a')
68 \xc3\x81 = 0xc1 = 193 (A')
69 \xc8\xba = 0x23a = 570
70 \xe2\xb1\xa5 = 0x2c65 = 11365
71 \xe1\xbd\xb8 = 0x1f78 = 8056
72 \xe1\xbf\xb8 = 0x1ff8 = 8184
73 \xf0\x90\x90\x80 = 0x10400 = 66560
74 \xf0\x90\x90\xa8 = 0x10428 = 66600
75 Mark property:
76 \xcc\x8d = 0x30d = 781
77 Special:
78 \xdf\xbf = 0x7ff = 2047 (highest 2 byte character)
79 \xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character)
80 \xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character)
81 \xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character)
82 \xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character)
83 */
84
85 static int regression_tests(void);
86
87 int main(void)
88 {
89 int jit = 0;
90 #ifdef SUPPORT_PCRE8
91 pcre_config(PCRE_CONFIG_JIT, &jit);
92 #else
93 pcre16_config(PCRE_CONFIG_JIT, &jit);
94 #endif
95 if (!jit) {
96 printf("JIT must be enabled to run pcre_jit_test\n");
97 return 1;
98 }
99 return regression_tests();
100 }
101
102 /* --------------------------------------------------------------------------------------- */
103
104 #if !(defined SUPPORT_PCRE8) && !(defined SUPPORT_PCRE16)
105 #error SUPPORT_PCRE8 or SUPPORT_PCRE16 must be defined
106 #endif
107
108 #define MUA (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
109 #define MUAP (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
110 #define CMUA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
111 #define CMUAP (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
112 #define MA (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
113 #define MAP (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
114 #define CMA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
115
116 #define OFFSET_MASK 0x00ffff
117 #define F_NO8 0x010000
118 #define F_NO16 0x020000
119 #define F_NOMATCH 0x040000
120 #define F_DIFF 0x080000
121 #define F_FORCECONV 0x100000
122 #define F_PROPERTY 0x200000
123
124 struct regression_test_case {
125 int flags;
126 int start_offset;
127 const char *pattern;
128 const char *input;
129 };
130
131 static struct regression_test_case regression_test_cases[] = {
132 /* Constant strings. */
133 { MUA, 0, "AbC", "AbAbC" },
134 { MUA, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
135 { CMUA, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
136 { MA, 0, "[^a]", "aAbB" },
137 { CMA, 0, "[^m]", "mMnN" },
138 { MA, 0, "a[^b][^#]", "abacd" },
139 { CMA, 0, "A[^B][^E]", "abacd" },
140 { CMUA, 0, "[^x][^#]", "XxBll" },
141 { MUA, 0, "[^a]", "aaa\xc3\xa1#Ab" },
142 { CMUA, 0, "[^A]", "aA\xe6\x92\xad" },
143 { MUA, 0, "\\W(\\W)?\\w", "\r\n+bc" },
144 { MUA, 0, "\\W(\\W)?\\w", "\n\r+bc" },
145 { MUA, 0, "\\W(\\W)?\\w", "\r\r+bc" },
146 { MUA, 0, "\\W(\\W)?\\w", "\n\n+bc" },
147 { MUA, 0, "[axd]", "sAXd" },
148 { CMUA, 0, "[axd]", "sAXd" },
149 { CMUA, 0 | F_NOMATCH, "[^axd]", "DxA" },
150 { MUA, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
151 { MUA, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
152 { CMUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
153 { MUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
154 { MUA, 0, "[^a]", "\xc2\x80[]" },
155 { CMUA, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
156 { CMA, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
157 { PCRE_CASELESS, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
158 { PCRE_CASELESS, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
159 { PCRE_CASELESS, 0, "a1", "Aa1" },
160 { MA, 0, "\\Ca", "cda" },
161 { CMA, 0, "\\Ca", "CDA" },
162 { MA, 0 | F_NOMATCH, "\\Cx", "cda" },
163 { CMA, 0 | F_NOMATCH, "\\Cx", "CDA" },
164 { CMUAP, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
165 { CMUAP, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
166 { CMUAP, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
167 { CMUAP, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
168
169 /* Assertions. */
170 { MUA, 0, "\\b[^A]", "A_B#" },
171 { MA, 0 | F_NOMATCH, "\\b\\W", "\n*" },
172 { MUA, 0, "\\B[^,]\\b[^s]\\b", "#X" },
173 { MAP, 0, "\\B", "_\xa1" },
174 { MAP, 0, "\\b_\\b[,A]\\B", "_," },
175 { MUAP, 0, "\\b", "\xe6\x92\xad!" },
176 { MUAP, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
177 { MUAP, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
178 { MUAP, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
179 { MUA, 0 | F_NOMATCH, "\\b.", "\xcd\xbe" },
180 { CMUAP, 0, "\\By", "\xf0\x90\x90\xa8y" },
181 { MA, 0 | F_NOMATCH, "\\R^", "\n" },
182 { MA, 1 | F_NOMATCH, "^", "\n" },
183 { 0, 0, "^ab", "ab" },
184 { 0, 0 | F_NOMATCH, "^ab", "aab" },
185 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "^a", "\r\raa\n\naa\r\naa" },
186 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
187 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^-", "a--b--\x85--" },
188 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xe2\x80\xa8--" },
189 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xc2\x85--" },
190 { 0, 0, "ab$", "ab" },
191 { 0, 0 | F_NOMATCH, "ab$", "abab\n\n" },
192 { PCRE_DOLLAR_ENDONLY, 0 | F_NOMATCH, "ab$", "abab\r\n" },
193 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "a$", "\r\raa\n\naa\r\naa" },
194 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aaa" },
195 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "#$", "#\xc2\x85###\r#" },
196 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "#$", "#\xe2\x80\xa9" },
197 { PCRE_NOTBOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "^a", "aa\naa" },
198 { PCRE_NOTBOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },
199 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\naa" },
200 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\r\n" },
201 { PCRE_UTF8 | PCRE_DOLLAR_ENDONLY | PCRE_NEWLINE_ANY, 0 | F_PROPERTY, "\\p{Any}{2,}$", "aa\r\n" },
202 { PCRE_NOTEOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },
203 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa" },
204 { PCRE_NEWLINE_CR | PCRE_UTF8, 0, "a\\Z", "aaa\r" },
205 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa\n" },
206 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r" },
207 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
208 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r\n" },
209 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
210 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
211 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
212 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
213 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
214 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
215 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
216 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
217 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
218 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xc2\x85" },
219 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
220 { MA, 0, "\\Aa", "aaa" },
221 { MA, 1 | F_NOMATCH, "\\Aa", "aaa" },
222 { MA, 1, "\\Ga", "aaa" },
223 { MA, 1 | F_NOMATCH, "\\Ga", "aba" },
224 { MA, 0, "a\\z", "aaa" },
225 { MA, 0 | F_NOMATCH, "a\\z", "aab" },
226
227 /* Brackets. */
228 { MUA, 0, "(ab|bb|cd)", "bacde" },
229 { MUA, 0, "(?:ab|a)(bc|c)", "ababc" },
230 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
231 { CMUA, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
232 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
233 { MUA, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
234
235 /* Greedy and non-greedy ? operators. */
236 { MUA, 0, "(?:a)?a", "laab" },
237 { CMUA, 0, "(A)?A", "llaab" },
238 { MUA, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
239 { MUA, 0, "(a)?a", "manm" },
240 { CMUA, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
241 { MUA, 0, "(a|b)?\?d((?:e)?)", "abcde" },
242 { MUA, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
243
244 /* Greedy and non-greedy + operators */
245 { MUA, 0, "(aa)+aa", "aaaaaaa" },
246 { MUA, 0, "(aa)+?aa", "aaaaaaa" },
247 { MUA, 0, "(?:aba|ab|a)+l", "ababamababal" },
248 { MUA, 0, "(?:aba|ab|a)+?l", "ababamababal" },
249 { MUA, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
250 { MUA, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
251 { MUA, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
252
253 /* Greedy and non-greedy * operators */
254 { CMUA, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
255 { MUA, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
256 { MUA, 0, "(aa|ab)*ab", "aaabaaab" },
257 { CMUA, 0, "(aa|Ab)*?aB", "aaabaaab" },
258 { MUA, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
259 { MUA, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
260 { MA, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
261 { MA, 0, "((?:a|)*){0}a", "a" },
262
263 /* Combining ? + * operators */
264 { MUA, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
265 { MUA, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
266 { MUA, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
267 { MUA, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
268 { MUA, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
269
270 /* Single character iterators. */
271 { MUA, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
272 { MUA, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
273 { MUA, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
274 { MUA, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
275 { MUA, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
276 { MUA, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
277 { MUA, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
278 { MUA, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
279 { MUA, 0, "(ba{2})+c", "baabaaabacbaabaac" },
280 { MUA, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
281 { MUA, 0, "(a?+[^b])+", "babaacacb" },
282 { MUA, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
283 { CMUA, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
284 { CMUA, 0, "[c-f]+k", "DemmFke" },
285 { MUA, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
286 { MUA, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
287 { CMUA, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
288 { CMUA, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
289 { CMUA, 0, "[ace]{3,}", "AcbDAcEEcEd" },
290 { CMUA, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
291 { MUA, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
292 { CMUA, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
293 { MUA, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
294 { MUA, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
295 { MUA, 0, "\\b\\w+\\B", "x,a_cd" },
296 { MUAP, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
297 { CMUA, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
298 { CMUAP, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" },
299 { CMUA, 0, "[^\xf0\x90\x90\x80]{2,4}@", "\xf0\x90\x90\xa8\xf0\x90\x90\x80###\xf0\x90\x90\x80@@@" },
300 { CMUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
301 { MUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
302 { MUA, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" },
303
304 /* Basic character sets. */
305 { MUA, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
306 { MUA, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
307 { MUA, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
308 { MUA, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
309 { MUA, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
310 { MUA, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
311
312 /* Unicode properties. */
313 { MUAP, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
314 { MUAP, 0 | F_PROPERTY, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
315 { MUAP, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
316 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}]", "abc" },
317 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}]", "abc" },
318 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
319 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
320 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
321 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
322 { MUAP, 0 | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
323 { MUAP, 0 | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
324 { MUAP, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
325 { CMUAP, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
326 { MUAP, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
327 { MUAP, 0 | F_PROPERTY, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
328 { MUA, 0 | F_PROPERTY, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
329 { CMUAP, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
330 { MUAP, 0 | F_PROPERTY, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
331 { MUAP, 0 | F_PROPERTY, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
332 { PCRE_UCP, 0, "[a-b\\s]{2,5}[^a]", "AB baaa" },
333
334 /* Possible empty brackets. */
335 { MUA, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
336 { MUA, 0, "(|ab||bc|a)+d", "abcxabcabd" },
337 { MUA, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
338 { MUA, 0, "(|ab||bc|a)*d", "abcxabcabd" },
339 { MUA, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
340 { MUA, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
341 { MUA, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
342 { MUA, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
343 { MUA, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
344 { MUA, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
345
346 /* Start offset. */
347 { MUA, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
348 { MUA, 4 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
349 { MUA, 2 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
350 { MUA, 1, "(\\w\\W\\w)+", "ab#d" },
351
352 /* Newline. */
353 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
354 { PCRE_MULTILINE | PCRE_NEWLINE_CR, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
355 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{1,3}[^#]", "\r\n##...." },
356
357 /* Any character except newline or any newline. */
358 { PCRE_NEWLINE_CRLF, 0, ".", "\r" },
359 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
360 { PCRE_NEWLINE_ANYCRLF, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
361 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
362 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
363 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0 | F_NOMATCH, ".(.).", "\xe2\x80\xa8\nb\r" },
364 { PCRE_NEWLINE_ANY, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
365 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
366 { PCRE_BSR_ANYCRLF, 0, "\\R", "\r" },
367 { PCRE_BSR_ANYCRLF, 0, "\\R", "\x85#\r\n#" },
368 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\xe2\x80\xa8#c" },
369 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\r\nc" },
370 { PCRE_NEWLINE_CRLF | PCRE_BSR_UNICODE | PCRE_UTF8, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
371 { MUA, 0 | F_NOMATCH, "\\R+", "ab" },
372 { MUA, 0, "\\R+", "ab\r\n\r" },
373 { MUA, 0, "\\R*", "ab\r\n\r" },
374 { MUA, 0, "\\R*", "\r\n\r" },
375 { MUA, 0, "\\R{2,4}", "\r\nab\r\r" },
376 { MUA, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
377 { MUA, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
378 { MUA, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
379 { MUA, 0 | F_NOMATCH, "\\R+\\R\\R", "\r\n\r\n" },
380 { MUA, 0, "\\R+\\R\\R", "\r\r\r" },
381 { MUA, 0, "\\R*\\R\\R", "\n\r" },
382 { MUA, 0 | F_NOMATCH, "\\R{2,4}\\R\\R", "\r\r\r" },
383 { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
384
385 /* Atomic groups (no fallback from "next" direction). */
386 { MUA, 0 | F_NOMATCH, "(?>ab)ab", "bab" },
387 { MUA, 0 | F_NOMATCH, "(?>(ab))ab", "bab" },
388 { MUA, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
389 "bababcdedefgheijijklmlmnop" },
390 { MUA, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
391 { MUA, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
392 { MUA, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
393 { MUA, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
394 { MUA, 0, "((?>a|)+?)b", "aaacaaab" },
395 { MUA, 0, "(?>x|)*$", "aaa" },
396 { MUA, 0, "(?>(x)|)*$", "aaa" },
397 { MUA, 0, "(?>x|())*$", "aaa" },
398 { MUA, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
399 { MUA, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
400 { MUA, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
401 { MUA, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
402 { MUA, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
403 { MUA, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
404 { MUA, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
405 { MUA, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
406 { MUA, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
407 { MUA, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
408 { MUA, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
409 { MUA, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
410 { MUA, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
411 { MUA, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
412 { CMA, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
413 { MUA, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
414 { MUA, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
415 { MUA, 0 | F_NOMATCH | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d" },
416 { MUA, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
417 { MUA, 0 | F_PROPERTY, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
418 { MUA, 0 | F_PROPERTY, "\\X{2,4}", "abcdef" },
419 { MUA, 0 | F_PROPERTY, "\\X{2,4}?", "abcdef" },
420 { MUA, 0 | F_NOMATCH | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d##" },
421 { MUA, 0 | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
422 { MUA, 0, "(c(ab)?+ab)+", "cabcababcab" },
423 { MUA, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
424
425 /* Possessive quantifiers. */
426 { MUA, 0, "(?:a|b)++m", "mababbaaxababbaam" },
427 { MUA, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
428 { MUA, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
429 { MUA, 0, "(a|b)++m", "mababbaaxababbaam" },
430 { MUA, 0, "(a|b)*+m", "mababbaaxababbaam" },
431 { MUA, 0, "(a|b)*+m", "ababbaaxababbaam" },
432 { MUA, 0, "(a|b(*ACCEPT))++m", "maaxab" },
433 { MUA, 0, "(?:b*)++m", "bxbbxbbbxm" },
434 { MUA, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
435 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxm" },
436 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
437 { MUA, 0, "(b*)++m", "bxbbxbbbxm" },
438 { MUA, 0, "(b*)++m", "bxbbxbbbxbbm" },
439 { MUA, 0, "(b*)*+m", "bxbbxbbbxm" },
440 { MUA, 0, "(b*)*+m", "bxbbxbbbxbbm" },
441 { MUA, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
442 { MUA, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
443 { MUA, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
444 { MUA, 0, "(a|(b))++m", "mababbaaxababbaam" },
445 { MUA, 0, "((a)|b)*+m", "mababbaaxababbaam" },
446 { MUA, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
447 { MUA, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
448 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxm" },
449 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
450 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
451 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
452 { MUA, 0, "((b*))++m", "bxbbxbbbxm" },
453 { MUA, 0, "((b*))++m", "bxbbxbbbxbbm" },
454 { MUA, 0, "((b*))*+m", "bxbbxbbbxm" },
455 { MUA, 0, "((b*))*+m", "bxbbxbbbxbbm" },
456 { MUA, 0 | F_NOMATCH, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
457 { MUA, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
458 { MUA, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
459 { MUA, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
460 { MUA, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
461
462 /* Back references. */
463 { MUA, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
464 { CMUA, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
465 { CMA, 0, "(a{2,4})\\1", "AaAaaAaA" },
466 { MUA, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
467 { MUA, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
468 { MUA, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
469 { MUA, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
470 { MUA, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
471 { MUA, 0, "(?:(aa)|b)\\1?b", "bb" },
472 { CMUA, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
473 { MUA, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
474 { CMUA, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
475 { MUA, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
476 { CMA, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
477 { MUA, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
478 { MUA, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
479 { MA, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
480 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
481 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{0,2}", "wwwww." },
482 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwww" },
483 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwwww" },
484 { PCRE_UCP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
485 { CMUAP, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" },
486
487 /* Assertions. */
488 { MUA, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
489 { MUA, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
490 { MUA, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
491 { MUA, 0, "(?<=aaa|aa|a)a", "aaa" },
492 { MUA, 2, "(?<=aaa|aa|a)a", "aaa" },
493 { MA, 0, "(?<=aaa|aa|a)a", "aaa" },
494 { MA, 2, "(?<=aaa|aa|a)a", "aaa" },
495 { MUA, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
496 { MUA, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
497 { MUA, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
498 { MUA, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
499 { MUA, 0, "((?(?=(a))a)+k)", "bbak" },
500 { MUA, 0, "((?(?=a)a)+k)", "bbak" },
501 { MUA, 0 | F_NOMATCH, "(?=(?>(a))m)amk", "a k" },
502 { MUA, 0 | F_NOMATCH, "(?!(?>(a))m)amk", "a k" },
503 { MUA, 0 | F_NOMATCH, "(?>(?=(a))am)amk", "a k" },
504 { MUA, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
505 { MUA, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
506 { MUA, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
507 { MUA, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
508 { MUA, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
509 { MUA, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
510 { MUA, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
511 { MUA, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
512
513 /* Not empty, ACCEPT, FAIL */
514 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*", "bcx" },
515 { MUA | PCRE_NOTEMPTY, 0, "a*", "bcaad" },
516 { MUA | PCRE_NOTEMPTY, 0, "a*?", "bcaad" },
517 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
518 { MUA, 0, "a(*ACCEPT)b", "ab" },
519 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*(*ACCEPT)b", "bcx" },
520 { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
521 { MUA | PCRE_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
522 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "(?:z|a*(*ACCEPT)b)", "bcx" },
523 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
524 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
525 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
526 { MUA | PCRE_NOTEMPTY_ATSTART, 0 | F_NOMATCH, "a*(*ACCEPT)b", "" },
527 { MUA, 0, "((a(*ACCEPT)b))", "ab" },
528 { MUA, 0, "(a(*FAIL)a|a)", "aaa" },
529 { MUA, 0, "(?=ab(*ACCEPT)b)a", "ab" },
530 { MUA, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
531 { MUA, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
532 { MUA | PCRE_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
533
534 /* Conditional blocks. */
535 { MUA, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
536 { MUA, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
537 { MUA, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
538 { MUA, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
539 { MUA, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
540 { MUA, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
541 { MUA, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
542 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
543 { MUA, 0 | F_DIFF, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
544 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
545 { MUA, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
546 { MUA, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
547 { MUA, 0, "(?(?=a)ab)", "a" },
548 { MUA, 0, "(?(?<!b)c)", "b" },
549 { MUA, 0, "(?(DEFINE)a(b))", "a" },
550 { MUA, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
551 { MUA, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
552 { MUA, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
553 { MUA, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
554 { MUA, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
555 { MUA, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
556 { MUA, 0, "(c)?\?(?(1)a|b)", "cbb" },
557 { MUA, 0 | F_DIFF, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
558 { MUA, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
559 { MUA, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
560 { MUA, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
561 { MUA, 0 | F_DIFF, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
562 { MUA, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
563 { MUA, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
564 { MUA, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
565 { MUA, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
566 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)*l", "bc ddd abccabccl" },
567 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+?dd", "bcabcacdb bdddd" },
568 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+l", "ababccddabdbccd abcccl" },
569
570 /* Set start of match. */
571 { MUA, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
572 { MUA, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
573 { MUA, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
574 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a\\K(*ACCEPT)b", "aa" },
575 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
576
577 /* First line. */
578 { MUA | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}a", "bb\naaa" },
579 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}a", "bb\r\naaa" },
580 { MUA | PCRE_FIRSTLINE, 0, "(?<=a)", "a" },
581 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[^a][^b]", "ab" },
582 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "a", "\na" },
583 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[abc]", "\na" },
584 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^a", "\na" },
585 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^(?<=\n)", "\na" },
586 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\xc2\x85#" },
587 { PCRE_MULTILINE | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\x85#" },
588 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^#", "\xe2\x80\xa8#" },
589 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}", "\r\na" },
590 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, ".", "\r" },
591 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "a", "\ra" },
592 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH, "ba", "bbb\r\nba" },
593 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}{4}|a", "\r\na" },
594 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 1, ".", "\r\n" },
595
596 /* Recurse. */
597 { MUA, 0, "(a)(?1)", "aa" },
598 { MUA, 0, "((a))(?1)", "aa" },
599 { MUA, 0, "(b|a)(?1)", "aa" },
600 { MUA, 0, "(b|(a))(?1)", "aa" },
601 { MUA, 0 | F_NOMATCH, "((a)(b)(?:a*))(?1)", "aba" },
602 { MUA, 0, "((a)(b)(?:a*))(?1)", "abab" },
603 { MUA, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
604 { MUA, 0, "((?2)b|(a)){2}(?1)", "aabab" },
605 { MUA, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
606 { MUA, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
607 { MUA, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
608 { MUA, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
609 { MUA, 0 | F_NOMATCH, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
610 { MUA, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
611 { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
612 { MUA, 0 | F_NOMATCH, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
613 { MUA, 0, "b|<(?R)*>", "<<b>" },
614 { MUA, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
615 { MUA, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
616 { MUA, 0, "(a)((?(R)a|b))(?2)", "aabbabaa" },
617 { MUA, 0, "(a)((?(R2)a|b))(?2)", "aabbabaa" },
618 { MUA, 0, "(a)((?(R1)a|b))(?2)", "ababba" },
619 { MUA, 0, "(?(R0)aa|bb(?R))", "abba aabb bbaa" },
620 { MUA, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" },
621 { MUA, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" },
622
623 /* 16 bit specific tests. */
624 { CMA, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" },
625 { CMA, 0 | F_FORCECONV, "\xe1\xbd\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
626 { CMA, 0 | F_FORCECONV, "[\xc3\xa1]", "\xc3\x81\xc3\xa1" },
627 { CMA, 0 | F_FORCECONV, "[\xe1\xbd\xb8]", "\xe1\xbf\xb8\xe1\xbd\xb8" },
628 { CMA, 0 | F_FORCECONV, "[a-\xed\xb0\x80]", "A" },
629 { CMA, 0 | F_NO8 | F_FORCECONV, "[a-\\x{dc00}]", "B" },
630 { CMA, 0 | F_NO8 | F_NOMATCH | F_FORCECONV, "[b-\\x{dc00}]", "a" },
631 { CMA, 0 | F_NO8 | F_FORCECONV, "\xed\xa0\x80\\x{d800}\xed\xb0\x80\\x{dc00}", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80" },
632 { CMA, 0 | F_NO8 | F_FORCECONV, "[\xed\xa0\x80\\x{d800}]{1,2}?[\xed\xb0\x80\\x{dc00}]{1,2}?#", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80#" },
633 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80\xed\xb0\x80#]{0,3}(?<=\xed\xb0\x80.)", "\xed\xa0\x80#\xed\xa0\x80##\xed\xb0\x80\xed\xa0\x80" },
634 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\x9f\xbf\xed\xa0\x83" },
635 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\xb4\x80\xed\xb3\xb0" },
636 { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" },
637 { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" },
638 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" },
639 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80][\xed\xb0\x80]{2,}", "\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" },
640 { MA, 0 | F_FORCECONV, "[^\xed\xb0\x80]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
641 { MA, 0 | F_NO8 | F_FORCECONV, "[^\\x{dc00}]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
642 { CMA, 0 | F_FORCECONV, ".\\B.", "\xed\xa0\x80\xed\xb0\x80" },
643 { CMA, 0 | F_FORCECONV, "\\D+(?:\\d+|.)\\S+(?:\\s+|.)\\W+(?:\\w+|.)\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80" },
644 { CMA, 0 | F_FORCECONV, "\\d*\\s*\\w*\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80" },
645 { CMA, 0 | F_FORCECONV | F_NOMATCH, "\\d*?\\D*?\\s*?\\S*?\\w*?\\W*?##", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80#" },
646 { CMA | PCRE_EXTENDED, 0 | F_FORCECONV, "\xed\xa0\x80 \xed\xb0\x80 !", "\xed\xa0\x80\xed\xb0\x80!" },
647 { CMA, 0 | F_FORCECONV, "\xed\xa0\x80+#[^#]+\xed\xa0\x80", "\xed\xa0\x80#a\xed\xa0\x80" },
648 { CMA, 0 | F_FORCECONV, "(\xed\xa0\x80+)#\\1", "\xed\xa0\x80\xed\xa0\x80#\xed\xa0\x80\xed\xa0\x80" },
649 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0 | F_NO8 | F_FORCECONV, "^-", "a--\xe2\x80\xa8--" },
650 { PCRE_BSR_UNICODE, 0 | F_NO8 | F_FORCECONV, "\\R", "ab\xe2\x80\xa8" },
651 { 0, 0 | F_NO8 | F_FORCECONV, "\\v", "ab\xe2\x80\xa9" },
652 { 0, 0 | F_NO8 | F_FORCECONV, "\\h", "ab\xe1\xa0\x8e" },
653 { 0, 0 | F_NO8 | F_FORCECONV, "\\v+?\\V+?#", "\xe2\x80\xa9\xe2\x80\xa9\xef\xbf\xbf\xef\xbf\xbf#" },
654 { 0, 0 | F_NO8 | F_FORCECONV, "\\h+?\\H+?#", "\xe1\xa0\x8e\xe1\xa0\x8e\xef\xbf\xbf\xef\xbf\xbf#" },
655
656 /* Deep recursion. */
657 { MUA, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
658 { MUA, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
659 { MUA, 0, "((a?)+)+b", "aaaaaaaaaaaaa b" },
660
661 /* Deep recursion: Stack limit reached. */
662 { MA, 0 | F_NOMATCH, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
663 { MA, 0 | F_NOMATCH, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
664 { MA, 0 | F_NOMATCH, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
665 { MA, 0 | F_NOMATCH, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
666 { MA, 0 | F_NOMATCH, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
667
668 { 0, 0, NULL, NULL }
669 };
670
671 static const unsigned char *tables(int mode)
672 {
673 /* The purpose of this function to allow valgrind
674 for reporting invalid reads and writes. */
675 static unsigned char *tables_copy;
676 const char *errorptr;
677 int erroroffset;
678 unsigned char *default_tables;
679 #ifdef SUPPORT_PCRE8
680 pcre *regex;
681 char null_str[1] = { 0 };
682 #else
683 pcre16 *regex;
684 PCRE_UCHAR16 null_str[1] = { 0 };
685 #endif
686
687 if (mode) {
688 if (tables_copy)
689 free(tables_copy);
690 tables_copy = NULL;
691 return NULL;
692 }
693
694 if (tables_copy)
695 return tables_copy;
696
697 default_tables = NULL;
698 #ifdef SUPPORT_PCRE8
699 regex = pcre_compile(null_str, 0, &errorptr, &erroroffset, NULL);
700 if (regex) {
701 pcre_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
702 pcre_free(regex);
703 }
704 #else
705 regex = pcre16_compile(null_str, 0, &errorptr, &erroroffset, NULL);
706 if (regex) {
707 pcre16_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
708 pcre16_free(regex);
709 }
710 #endif
711 /* Shouldn't ever happen. */
712 if (!default_tables)
713 return NULL;
714
715 /* Unfortunately this value cannot get from pcre_fullinfo.
716 Since this is a test program, this is acceptable at the moment. */
717 tables_copy = (unsigned char *)malloc(1088);
718 if (!tables_copy)
719 return NULL;
720
721 memcpy(tables_copy, default_tables, 1088);
722 return tables_copy;
723 }
724
725 #ifdef SUPPORT_PCRE8
726 static pcre_jit_stack* callback8(void *arg)
727 {
728 return (pcre_jit_stack *)arg;
729 }
730 #endif
731
732 #ifdef SUPPORT_PCRE16
733 static pcre16_jit_stack* callback16(void *arg)
734 {
735 return (pcre16_jit_stack *)arg;
736 }
737 #endif
738
739 #ifdef SUPPORT_PCRE8
740 static void setstack8(pcre_extra *extra)
741 {
742 static pcre_jit_stack *stack;
743
744 if (!extra) {
745 if (stack)
746 pcre_jit_stack_free(stack);
747 stack = NULL;
748 return;
749 }
750
751 if (!stack)
752 stack = pcre_jit_stack_alloc(1, 1024 * 1024);
753 /* Extra can be NULL. */
754 pcre_assign_jit_stack(extra, callback8, stack);
755 }
756 #endif /* SUPPORT_PCRE8 */
757
758 #ifdef SUPPORT_PCRE16
759 static void setstack16(pcre16_extra *extra)
760 {
761 static pcre16_jit_stack *stack;
762
763 if (!extra) {
764 if (stack)
765 pcre16_jit_stack_free(stack);
766 stack = NULL;
767 return;
768 }
769
770 if (!stack)
771 stack = pcre16_jit_stack_alloc(1, 1024 * 1024);
772 /* Extra can be NULL. */
773 pcre16_assign_jit_stack(extra, callback16, stack);
774 }
775 #endif /* SUPPORT_PCRE8 */
776
777 #ifdef SUPPORT_PCRE16
778
779 static int convert_utf8_to_utf16(const char *input, PCRE_UCHAR16 *output, int *offsetmap, int max_length)
780 {
781 unsigned char *iptr = (unsigned char*)input;
782 unsigned short *optr = (unsigned short *)output;
783 unsigned int c;
784
785 if (max_length == 0)
786 return 0;
787
788 while (*iptr && max_length > 1) {
789 c = 0;
790 if (offsetmap)
791 *offsetmap++ = (int)(iptr - (unsigned char*)input);
792
793 if (!(*iptr & 0x80))
794 c = *iptr++;
795 else if (!(*iptr & 0x20)) {
796 c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
797 iptr += 2;
798 } else if (!(*iptr & 0x10)) {
799 c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
800 iptr += 3;
801 } else if (!(*iptr & 0x08)) {
802 c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
803 iptr += 4;
804 }
805
806 if (c < 65536) {
807 *optr++ = c;
808 max_length--;
809 } else if (max_length <= 2) {
810 *optr = '\0';
811 return (int)(optr - (unsigned short *)output);
812 } else {
813 c -= 0x10000;
814 *optr++ = 0xd800 | ((c >> 10) & 0x3ff);
815 *optr++ = 0xdc00 | (c & 0x3ff);
816 max_length -= 2;
817 if (offsetmap)
818 offsetmap++;
819 }
820 }
821 if (offsetmap)
822 *offsetmap = (int)(iptr - (unsigned char*)input);
823 *optr = '\0';
824 return (int)(optr - (unsigned short *)output);
825 }
826
827 static int copy_char8_to_char16(const char *input, PCRE_UCHAR16 *output, int max_length)
828 {
829 unsigned char *iptr = (unsigned char*)input;
830 unsigned short *optr = (unsigned short *)output;
831
832 if (max_length == 0)
833 return 0;
834
835 while (*iptr && max_length > 1) {
836 *optr++ = *iptr++;
837 max_length--;
838 }
839 *optr = '\0';
840 return (int)(optr - (unsigned short *)output);
841 }
842
843 #define REGTEST_MAX_LENGTH 4096
844 static PCRE_UCHAR16 regtest_buf[REGTEST_MAX_LENGTH];
845 static int regtest_offsetmap[REGTEST_MAX_LENGTH];
846
847 #endif /* SUPPORT_PCRE16 */
848
849 static int check_ascii(const char *input)
850 {
851 const unsigned char *ptr = (unsigned char *)input;
852 while (*ptr) {
853 if (*ptr > 127)
854 return 0;
855 ptr++;
856 }
857 return 1;
858 }
859
860 static int regression_tests(void)
861 {
862 struct regression_test_case *current = regression_test_cases;
863 const char *error;
864 char *cpu_info;
865 int i, err_offs;
866 int is_successful, is_ascii_pattern, is_ascii_input;
867 int total = 0;
868 int successful = 0;
869 int counter = 0;
870 #ifdef SUPPORT_PCRE8
871 pcre *re8;
872 pcre_extra *extra8;
873 int ovector8_1[32];
874 int ovector8_2[32];
875 int return_value8_1, return_value8_2;
876 int utf8 = 0, ucp8 = 0;
877 int disabled_flags8 = 0;
878 #endif
879 #ifdef SUPPORT_PCRE16
880 pcre16 *re16;
881 pcre16_extra *extra16;
882 int ovector16_1[32];
883 int ovector16_2[32];
884 int return_value16_1, return_value16_2;
885 int utf16 = 0, ucp16 = 0;
886 int disabled_flags16 = 0;
887 int length16;
888 #endif
889
890 /* This test compares the behaviour of interpreter and JIT. Although disabling
891 utf or ucp may make tests fail, if the pcre_exec result is the SAME, it is
892 still considered successful from pcre_jit_test point of view. */
893
894 #ifdef SUPPORT_PCRE8
895 pcre_config(PCRE_CONFIG_JITTARGET, &cpu_info);
896 #else
897 pcre16_config(PCRE_CONFIG_JITTARGET, &cpu_info);
898 #endif
899
900 printf("Running JIT regression tests\n");
901 printf(" target CPU of SLJIT compiler: %s\n", cpu_info);
902
903 #ifdef SUPPORT_PCRE8
904 pcre_config(PCRE_CONFIG_UTF8, &utf8);
905 pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp8);
906 if (!utf8)
907 disabled_flags8 |= PCRE_UTF8;
908 if (!ucp8)
909 disabled_flags8 |= PCRE_UCP;
910 printf(" in 8 bit mode with utf8 %s and ucp %s:\n", utf8 ? "enabled" : "disabled", ucp8 ? "enabled" : "disabled");
911 #endif
912 #ifdef SUPPORT_PCRE16
913 pcre16_config(PCRE_CONFIG_UTF16, &utf16);
914 pcre16_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp16);
915 if (!utf16)
916 disabled_flags16 |= PCRE_UTF8;
917 if (!ucp16)
918 disabled_flags16 |= PCRE_UCP;
919 printf(" in 16 bit mode with utf16 %s and ucp %s:\n", utf16 ? "enabled" : "disabled", ucp16 ? "enabled" : "disabled");
920 #endif
921
922 while (current->pattern) {
923 /* printf("\nPattern: %s :\n", current->pattern); */
924 total++;
925 if (current->start_offset & F_PROPERTY) {
926 is_ascii_pattern = 0;
927 is_ascii_input = 0;
928 } else {
929 is_ascii_pattern = check_ascii(current->pattern);
930 is_ascii_input = check_ascii(current->input);
931 }
932
933 error = NULL;
934 #ifdef SUPPORT_PCRE8
935 re8 = NULL;
936 if (!(current->start_offset & F_NO8))
937 re8 = pcre_compile(current->pattern,
938 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags8),
939 &error, &err_offs, tables(0));
940
941 extra8 = NULL;
942 if (re8) {
943 error = NULL;
944 extra8 = pcre_study(re8, PCRE_STUDY_JIT_COMPILE, &error);
945 if (!extra8) {
946 printf("\n8 bit: Cannot study pattern: %s\n", current->pattern);
947 pcre_free(re8);
948 re8 = NULL;
949 }
950 if (!(extra8->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
951 printf("\n8 bit: JIT compiler does not support: %s\n", current->pattern);
952 pcre_free_study(extra8);
953 pcre_free(re8);
954 re8 = NULL;
955 }
956 } else if (((utf8 && ucp8) || is_ascii_pattern) && !(current->start_offset & F_NO8))
957 printf("\n8 bit: Cannot compile pattern: %s\n", current->pattern);
958 #endif
959 #ifdef SUPPORT_PCRE16
960 if ((current->flags & PCRE_UTF8) || (current->start_offset & F_FORCECONV))
961 convert_utf8_to_utf16(current->pattern, regtest_buf, NULL, REGTEST_MAX_LENGTH);
962 else
963 copy_char8_to_char16(current->pattern, regtest_buf, REGTEST_MAX_LENGTH);
964
965 re16 = NULL;
966 if (!(current->start_offset & F_NO16))
967 re16 = pcre16_compile(regtest_buf,
968 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags16),
969 &error, &err_offs, tables(0));
970
971 extra16 = NULL;
972 if (re16) {
973 error = NULL;
974 extra16 = pcre16_study(re16, PCRE_STUDY_JIT_COMPILE, &error);
975 if (!extra16) {
976 printf("\n16 bit: Cannot study pattern: %s\n", current->pattern);
977 pcre16_free(re16);
978 re16 = NULL;
979 }
980 if (!(extra16->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
981 printf("\n16 bit: JIT compiler does not support: %s\n", current->pattern);
982 pcre16_free_study(extra16);
983 pcre16_free(re16);
984 re16 = NULL;
985 }
986 } else if (((utf16 && ucp16) || is_ascii_pattern) && !(current->start_offset & F_NO16))
987 printf("\n16 bit: Cannot compile pattern: %s\n", current->pattern);
988 #endif
989
990 counter++;
991 if ((counter & 0x3) != 0) {
992 #ifdef SUPPORT_PCRE8
993 setstack8(NULL);
994 #endif
995 #ifdef SUPPORT_PCRE16
996 setstack16(NULL);
997 #endif
998 }
999
1000 #ifdef SUPPORT_PCRE8
1001 return_value8_1 = -1000;
1002 return_value8_2 = -1000;
1003 for (i = 0; i < 32; ++i)
1004 ovector8_1[i] = -2;
1005 for (i = 0; i < 32; ++i)
1006 ovector8_2[i] = -2;
1007 if (re8) {
1008 setstack8(extra8);
1009 return_value8_1 = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1010 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector8_1, 32);
1011 return_value8_2 = pcre_exec(re8, NULL, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1012 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector8_2, 32);
1013 }
1014 #endif
1015
1016 #ifdef SUPPORT_PCRE16
1017 return_value16_1 = -1000;
1018 return_value16_2 = -1000;
1019 for (i = 0; i < 32; ++i)
1020 ovector16_1[i] = -2;
1021 for (i = 0; i < 32; ++i)
1022 ovector16_2[i] = -2;
1023 if (re16) {
1024 setstack16(extra16);
1025 if ((current->flags & PCRE_UTF8) || (current->start_offset & F_FORCECONV))
1026 length16 = convert_utf8_to_utf16(current->input, regtest_buf, regtest_offsetmap, REGTEST_MAX_LENGTH);
1027 else
1028 length16 = copy_char8_to_char16(current->input, regtest_buf, REGTEST_MAX_LENGTH);
1029 return_value16_1 = pcre16_exec(re16, extra16, regtest_buf, length16, current->start_offset & OFFSET_MASK,
1030 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector16_1, 32);
1031 return_value16_2 = pcre16_exec(re16, NULL, regtest_buf, length16, current->start_offset & OFFSET_MASK,
1032 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector16_2, 32);
1033 }
1034 #endif
1035
1036 /* If F_DIFF is set, just run the test, but do not compare the results.
1037 Segfaults can still be captured. */
1038
1039 is_successful = 1;
1040 if (!(current->start_offset & F_DIFF)) {
1041 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1042 if (utf8 == utf16 && !(current->start_offset & F_FORCECONV)) {
1043 /* All results must be the same. */
1044 if (return_value8_1 != return_value8_2 || return_value8_1 != return_value16_1 || return_value8_1 != return_value16_2) {
1045 printf("\n8 and 16 bit: Return value differs(%d:%d:%d:%d): [%d] '%s' @ '%s'\n",
1046 return_value8_1, return_value8_2, return_value16_1, return_value16_2,
1047 total, current->pattern, current->input);
1048 is_successful = 0;
1049 } else if (return_value8_1 >= 0) {
1050 return_value8_1 *= 2;
1051 /* Transform back the results. */
1052 if (current->flags & PCRE_UTF8) {
1053 for (i = 0; i < return_value8_1; ++i) {
1054 if (ovector16_1[i] >= 0)
1055 ovector16_1[i] = regtest_offsetmap[ovector16_1[i]];
1056 if (ovector16_2[i] >= 0)
1057 ovector16_2[i] = regtest_offsetmap[ovector16_2[i]];
1058 }
1059 }
1060
1061 for (i = 0; i < return_value8_1; ++i)
1062 if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
1063 printf("\n8 and 16 bit: Ovector[%d] value differs(%d:%d:%d:%d): [%d] '%s' @ '%s' \n",
1064 i, ovector8_1[i], ovector8_2[i], ovector16_1[i], ovector16_2[i],
1065 total, current->pattern, current->input);
1066 is_successful = 0;
1067 }
1068 }
1069 } else {
1070 #endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */
1071 /* Only the 8 bit and 16 bit results must be equal. */
1072 #ifdef SUPPORT_PCRE8
1073 if (return_value8_1 != return_value8_2) {
1074 printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1075 return_value8_1, return_value8_2, total, current->pattern, current->input);
1076 is_successful = 0;
1077 } else if (return_value8_1 >= 0) {
1078 return_value8_1 *= 2;
1079 for (i = 0; i < return_value8_1; ++i)
1080 if (ovector8_1[i] != ovector8_2[i]) {
1081 printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1082 i, ovector8_1[i], ovector8_2[i], total, current->pattern, current->input);
1083 is_successful = 0;
1084 }
1085 }
1086 #endif
1087
1088 #ifdef SUPPORT_PCRE16
1089 if (return_value16_1 != return_value16_2) {
1090 printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1091 return_value16_1, return_value16_2, total, current->pattern, current->input);
1092 is_successful = 0;
1093 } else if (return_value16_1 >= 0) {
1094 return_value16_1 *= 2;
1095 for (i = 0; i < return_value16_1; ++i)
1096 if (ovector16_1[i] != ovector16_2[i]) {
1097 printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1098 i, ovector16_1[i], ovector16_2[i], total, current->pattern, current->input);
1099 is_successful = 0;
1100 }
1101 }
1102 #endif
1103
1104 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1105 }
1106 #endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */
1107 }
1108
1109 if (is_successful) {
1110 #ifdef SUPPORT_PCRE8
1111 if (!(current->start_offset & F_NO8) && ((utf8 && ucp8) || is_ascii_input)) {
1112 if (return_value8_1 < 0 && !(current->start_offset & F_NOMATCH)) {
1113 printf("8 bit: Test should match: [%d] '%s' @ '%s'\n",
1114 total, current->pattern, current->input);
1115 is_successful = 0;
1116 }
1117
1118 if (return_value8_1 >= 0 && (current->start_offset & F_NOMATCH)) {
1119 printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n",
1120 total, current->pattern, current->input);
1121 is_successful = 0;
1122 }
1123 }
1124 #endif
1125 #ifdef SUPPORT_PCRE16
1126 if (!(current->start_offset & F_NO16) && ((utf16 && ucp16) || is_ascii_input)) {
1127 if (return_value16_1 < 0 && !(current->start_offset & F_NOMATCH)) {
1128 printf("16 bit: Test should match: [%d] '%s' @ '%s'\n",
1129 total, current->pattern, current->input);
1130 is_successful = 0;
1131 }
1132
1133 if (return_value16_1 >= 0 && (current->start_offset & F_NOMATCH)) {
1134 printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n",
1135 total, current->pattern, current->input);
1136 is_successful = 0;
1137 }
1138 }
1139 #endif
1140 }
1141
1142 if (is_successful)
1143 successful++;
1144
1145 #ifdef SUPPORT_PCRE8
1146 if (re8) {
1147 pcre_free_study(extra8);
1148 pcre_free(re8);
1149 }
1150 #endif
1151 #ifdef SUPPORT_PCRE16
1152 if (re16) {
1153 pcre16_free_study(extra16);
1154 pcre16_free(re16);
1155 }
1156 #endif
1157
1158 /* printf("[%d-%d|%d-%d]%s", ovector8_1[0], ovector8_1[1], ovector16_1[0], ovector16_1[1], (current->flags & PCRE_CASELESS) ? "C" : ""); */
1159 printf(".");
1160 fflush(stdout);
1161 current++;
1162 }
1163 tables(1);
1164 #ifdef SUPPORT_PCRE8
1165 setstack8(NULL);
1166 #endif
1167 #ifdef SUPPORT_PCRE16
1168 setstack16(NULL);
1169 #endif
1170
1171 if (total == successful) {
1172 printf("\nAll JIT regression tests are successfully passed.\n");
1173 return 0;
1174 } else {
1175 printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
1176 return 1;
1177 }
1178 }
1179
1180 /* End of pcre_jit_test.c */

  ViewVC Help
Powered by ViewVC 1.1.5