/[pcre]/code/trunk/pcre_jit_test.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_test.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1245 - (show annotations)
Sat Feb 9 11:30:51 2013 UTC (6 years, 6 months ago) by zherczeg
File MIME type: text/plain
File size: 65461 byte(s)
Adding experimental support for callouts in JIT.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Main Library written by Philip Hazel
9 Copyright (c) 1997-2012 University of Cambridge
10
11 This JIT compiler regression test program was written by Zoltan Herczeg
12 Copyright (c) 2010-2012
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include <stdio.h>
48 #include <string.h>
49 #include "pcre.h"
50
51
52 #include "pcre_internal.h"
53
54 #define PCRE_BUG 0x80000000
55
56 /*
57 Letter characters:
58 \xe6\x92\xad = 0x64ad = 25773 (kanji)
59 Non-letter characters:
60 \xc2\xa1 = 0xa1 = (Inverted Exclamation Mark)
61 \xf3\xa9\xb7\x80 = 0xe9dc0 = 957888
62 \xed\xa0\x80 = 55296 = 0xd800 (Invalid UTF character)
63 \xed\xb0\x80 = 56320 = 0xdc00 (Invalid UTF character)
64 Newlines:
65 \xc2\x85 = 0x85 = 133 (NExt Line = NEL)
66 \xe2\x80\xa8 = 0x2028 = 8232 (Line Separator)
67 Othercase pairs:
68 \xc3\xa9 = 0xe9 = 233 (e')
69 \xc3\x89 = 0xc9 = 201 (E')
70 \xc3\xa1 = 0xe1 = 225 (a')
71 \xc3\x81 = 0xc1 = 193 (A')
72 \xc8\xba = 0x23a = 570
73 \xe2\xb1\xa5 = 0x2c65 = 11365
74 \xe1\xbd\xb8 = 0x1f78 = 8056
75 \xe1\xbf\xb8 = 0x1ff8 = 8184
76 \xf0\x90\x90\x80 = 0x10400 = 66560
77 \xf0\x90\x90\xa8 = 0x10428 = 66600
78 Mark property:
79 \xcc\x8d = 0x30d = 781
80 Special:
81 \xdf\xbf = 0x7ff = 2047 (highest 2 byte character)
82 \xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character)
83 \xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character)
84 \xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character)
85 \xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character)
86 */
87
88 static int regression_tests(void);
89
90 int main(void)
91 {
92 int jit = 0;
93 #if defined SUPPORT_PCRE8
94 pcre_config(PCRE_CONFIG_JIT, &jit);
95 #elif defined SUPPORT_PCRE16
96 pcre16_config(PCRE_CONFIG_JIT, &jit);
97 #elif defined SUPPORT_PCRE32
98 pcre32_config(PCRE_CONFIG_JIT, &jit);
99 #endif
100 if (!jit) {
101 printf("JIT must be enabled to run pcre_jit_test\n");
102 return 1;
103 }
104 return regression_tests();
105 }
106
107 /* --------------------------------------------------------------------------------------- */
108
109 #if !(defined SUPPORT_PCRE8) && !(defined SUPPORT_PCRE16) && !(defined SUPPORT_PCRE32)
110 #error SUPPORT_PCRE8 or SUPPORT_PCRE16 or SUPPORT_PCRE32 must be defined
111 #endif
112
113 #define MUA (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
114 #define MUAP (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
115 #define CMUA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
116 #define CMUAP (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
117 #define MA (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
118 #define MAP (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
119 #define CMA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
120
121 #define OFFSET_MASK 0x00ffff
122 #define F_NO8 0x010000
123 #define F_NO16 0x020000
124 #define F_NO32 0x020000
125 #define F_NOMATCH 0x040000
126 #define F_DIFF 0x080000
127 #define F_FORCECONV 0x100000
128 #define F_PROPERTY 0x200000
129 #define F_STUDY 0x400000
130
131 struct regression_test_case {
132 int flags;
133 int start_offset;
134 const char *pattern;
135 const char *input;
136 };
137
138 static struct regression_test_case regression_test_cases[] = {
139 /* Constant strings. */
140 { MUA, 0, "AbC", "AbAbC" },
141 { MUA, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
142 { CMUA, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
143 { MA, 0, "[^a]", "aAbB" },
144 { CMA, 0, "[^m]", "mMnN" },
145 { MA, 0, "a[^b][^#]", "abacd" },
146 { CMA, 0, "A[^B][^E]", "abacd" },
147 { CMUA, 0, "[^x][^#]", "XxBll" },
148 { MUA, 0, "[^a]", "aaa\xc3\xa1#Ab" },
149 { CMUA, 0, "[^A]", "aA\xe6\x92\xad" },
150 { MUA, 0, "\\W(\\W)?\\w", "\r\n+bc" },
151 { MUA, 0, "\\W(\\W)?\\w", "\n\r+bc" },
152 { MUA, 0, "\\W(\\W)?\\w", "\r\r+bc" },
153 { MUA, 0, "\\W(\\W)?\\w", "\n\n+bc" },
154 { MUA, 0, "[axd]", "sAXd" },
155 { CMUA, 0, "[axd]", "sAXd" },
156 { CMUA, 0 | F_NOMATCH, "[^axd]", "DxA" },
157 { MUA, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
158 { MUA, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
159 { CMUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
160 { MUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
161 { MUA, 0, "[^a]", "\xc2\x80[]" },
162 { CMUA, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
163 { CMA, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
164 { PCRE_CASELESS, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
165 { PCRE_CASELESS, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
166 { PCRE_CASELESS, 0, "a1", "Aa1" },
167 { MA, 0, "\\Ca", "cda" },
168 { CMA, 0, "\\Ca", "CDA" },
169 { MA, 0 | F_NOMATCH, "\\Cx", "cda" },
170 { CMA, 0 | F_NOMATCH, "\\Cx", "CDA" },
171 { CMUAP, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
172 { CMUAP, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
173 { CMUAP, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
174 { CMUAP, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
175
176 /* Assertions. */
177 { MUA, 0, "\\b[^A]", "A_B#" },
178 { MA, 0 | F_NOMATCH, "\\b\\W", "\n*" },
179 { MUA, 0, "\\B[^,]\\b[^s]\\b", "#X" },
180 { MAP, 0, "\\B", "_\xa1" },
181 { MAP, 0, "\\b_\\b[,A]\\B", "_," },
182 { MUAP, 0, "\\b", "\xe6\x92\xad!" },
183 { MUAP, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
184 { MUAP, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
185 { MUAP, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
186 { MUA, 0 | F_NOMATCH, "\\b.", "\xcd\xbe" },
187 { CMUAP, 0, "\\By", "\xf0\x90\x90\xa8y" },
188 { MA, 0 | F_NOMATCH, "\\R^", "\n" },
189 { MA, 1 | F_NOMATCH, "^", "\n" },
190 { 0, 0, "^ab", "ab" },
191 { 0, 0 | F_NOMATCH, "^ab", "aab" },
192 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "^a", "\r\raa\n\naa\r\naa" },
193 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
194 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^-", "a--b--\x85--" },
195 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xe2\x80\xa8--" },
196 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xc2\x85--" },
197 { 0, 0, "ab$", "ab" },
198 { 0, 0 | F_NOMATCH, "ab$", "abab\n\n" },
199 { PCRE_DOLLAR_ENDONLY, 0 | F_NOMATCH, "ab$", "abab\r\n" },
200 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "a$", "\r\raa\n\naa\r\naa" },
201 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aaa" },
202 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "#$", "#\xc2\x85###\r#" },
203 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "#$", "#\xe2\x80\xa9" },
204 { PCRE_NOTBOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "^a", "aa\naa" },
205 { PCRE_NOTBOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },
206 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\naa" },
207 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\r\n" },
208 { PCRE_UTF8 | PCRE_DOLLAR_ENDONLY | PCRE_NEWLINE_ANY, 0 | F_PROPERTY, "\\p{Any}{2,}$", "aa\r\n" },
209 { PCRE_NOTEOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },
210 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa" },
211 { PCRE_NEWLINE_CR | PCRE_UTF8, 0, "a\\Z", "aaa\r" },
212 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa\n" },
213 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r" },
214 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
215 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r\n" },
216 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
217 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
218 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
219 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
220 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
221 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
222 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
223 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
224 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
225 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xc2\x85" },
226 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
227 { MA, 0, "\\Aa", "aaa" },
228 { MA, 1 | F_NOMATCH, "\\Aa", "aaa" },
229 { MA, 1, "\\Ga", "aaa" },
230 { MA, 1 | F_NOMATCH, "\\Ga", "aba" },
231 { MA, 0, "a\\z", "aaa" },
232 { MA, 0 | F_NOMATCH, "a\\z", "aab" },
233
234 /* Brackets. */
235 { MUA, 0, "(ab|bb|cd)", "bacde" },
236 { MUA, 0, "(?:ab|a)(bc|c)", "ababc" },
237 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
238 { CMUA, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
239 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
240 { MUA, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
241
242 /* Greedy and non-greedy ? operators. */
243 { MUA, 0, "(?:a)?a", "laab" },
244 { CMUA, 0, "(A)?A", "llaab" },
245 { MUA, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
246 { MUA, 0, "(a)?a", "manm" },
247 { CMUA, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
248 { MUA, 0, "(a|b)?\?d((?:e)?)", "abcde" },
249 { MUA, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
250
251 /* Greedy and non-greedy + operators */
252 { MUA, 0, "(aa)+aa", "aaaaaaa" },
253 { MUA, 0, "(aa)+?aa", "aaaaaaa" },
254 { MUA, 0, "(?:aba|ab|a)+l", "ababamababal" },
255 { MUA, 0, "(?:aba|ab|a)+?l", "ababamababal" },
256 { MUA, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
257 { MUA, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
258 { MUA, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
259
260 /* Greedy and non-greedy * operators */
261 { CMUA, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
262 { MUA, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
263 { MUA, 0, "(aa|ab)*ab", "aaabaaab" },
264 { CMUA, 0, "(aa|Ab)*?aB", "aaabaaab" },
265 { MUA, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
266 { MUA, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
267 { MA, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
268 { MA, 0, "((?:a|)*){0}a", "a" },
269
270 /* Combining ? + * operators */
271 { MUA, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
272 { MUA, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
273 { MUA, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
274 { MUA, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
275 { MUA, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
276
277 /* Single character iterators. */
278 { MUA, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
279 { MUA, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
280 { MUA, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
281 { MUA, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
282 { MUA, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
283 { MUA, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
284 { MUA, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
285 { MUA, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
286 { MUA, 0, "(ba{2})+c", "baabaaabacbaabaac" },
287 { MUA, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
288 { MUA, 0, "(a?+[^b])+", "babaacacb" },
289 { MUA, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
290 { CMUA, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
291 { CMUA, 0, "[c-f]+k", "DemmFke" },
292 { MUA, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
293 { MUA, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
294 { CMUA, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
295 { CMUA, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
296 { CMUA, 0, "[ace]{3,}", "AcbDAcEEcEd" },
297 { CMUA, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
298 { MUA, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
299 { CMUA, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
300 { MUA, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
301 { MUA, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
302 { MUA, 0, "\\b\\w+\\B", "x,a_cd" },
303 { MUAP, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
304 { CMUA, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
305 { CMUAP, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" },
306 { CMUA, 0, "[^\xf0\x90\x90\x80]{2,4}@", "\xf0\x90\x90\xa8\xf0\x90\x90\x80###\xf0\x90\x90\x80@@@" },
307 { CMUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
308 { MUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
309 { MUA, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" },
310
311 /* Basic character sets. */
312 { MUA, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
313 { MUA, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
314 { MUA, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
315 { MUA, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
316 { MUA, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
317 { MUA, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
318
319 /* Unicode properties. */
320 { MUAP, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
321 { MUAP, 0 | F_PROPERTY, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
322 { MUAP, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
323 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}]", "abc" },
324 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}]", "abc" },
325 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
326 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
327 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
328 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
329 { MUAP, 0 | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
330 { MUAP, 0 | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
331 { MUAP, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
332 { CMUAP, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
333 { MUAP, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
334 { MUAP, 0 | F_PROPERTY, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
335 { MUA, 0 | F_PROPERTY, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
336 { CMUAP, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
337 { MUAP, 0 | F_PROPERTY, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
338 { MUAP, 0 | F_PROPERTY, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
339 { PCRE_UCP, 0, "[a-b\\s]{2,5}[^a]", "AB baaa" },
340
341 /* Possible empty brackets. */
342 { MUA, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
343 { MUA, 0, "(|ab||bc|a)+d", "abcxabcabd" },
344 { MUA, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
345 { MUA, 0, "(|ab||bc|a)*d", "abcxabcabd" },
346 { MUA, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
347 { MUA, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
348 { MUA, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
349 { MUA, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
350 { MUA, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
351 { MUA, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
352
353 /* Start offset. */
354 { MUA, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
355 { MUA, 4 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
356 { MUA, 2 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
357 { MUA, 1, "(\\w\\W\\w)+", "ab#d" },
358
359 /* Newline. */
360 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
361 { PCRE_MULTILINE | PCRE_NEWLINE_CR, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
362 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{1,3}[^#]", "\r\n##...." },
363
364 /* Any character except newline or any newline. */
365 { PCRE_NEWLINE_CRLF, 0, ".", "\r" },
366 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
367 { PCRE_NEWLINE_ANYCRLF, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
368 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
369 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
370 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0 | F_NOMATCH, ".(.).", "\xe2\x80\xa8\nb\r" },
371 { PCRE_NEWLINE_ANY, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
372 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
373 { PCRE_BSR_ANYCRLF, 0, "\\R", "\r" },
374 { PCRE_BSR_ANYCRLF, 0, "\\R", "\x85#\r\n#" },
375 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\xe2\x80\xa8#c" },
376 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\r\nc" },
377 { PCRE_NEWLINE_CRLF | PCRE_BSR_UNICODE | PCRE_UTF8, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
378 { MUA, 0 | F_NOMATCH, "\\R+", "ab" },
379 { MUA, 0, "\\R+", "ab\r\n\r" },
380 { MUA, 0, "\\R*", "ab\r\n\r" },
381 { MUA, 0, "\\R*", "\r\n\r" },
382 { MUA, 0, "\\R{2,4}", "\r\nab\r\r" },
383 { MUA, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
384 { MUA, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
385 { MUA, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
386 { MUA, 0 | F_NOMATCH, "\\R+\\R\\R", "\r\n\r\n" },
387 { MUA, 0, "\\R+\\R\\R", "\r\r\r" },
388 { MUA, 0, "\\R*\\R\\R", "\n\r" },
389 { MUA, 0 | F_NOMATCH, "\\R{2,4}\\R\\R", "\r\r\r" },
390 { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
391
392 /* Atomic groups (no fallback from "next" direction). */
393 { MUA, 0 | F_NOMATCH, "(?>ab)ab", "bab" },
394 { MUA, 0 | F_NOMATCH, "(?>(ab))ab", "bab" },
395 { MUA, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
396 "bababcdedefgheijijklmlmnop" },
397 { MUA, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
398 { MUA, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
399 { MUA, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
400 { MUA, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
401 { MUA, 0, "((?>a|)+?)b", "aaacaaab" },
402 { MUA, 0, "(?>x|)*$", "aaa" },
403 { MUA, 0, "(?>(x)|)*$", "aaa" },
404 { MUA, 0, "(?>x|())*$", "aaa" },
405 { MUA, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
406 { MUA, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
407 { MUA, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
408 { MUA, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
409 { MUA, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
410 { MUA, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
411 { MUA, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
412 { MUA, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
413 { MUA, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
414 { MUA, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
415 { MUA, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
416 { MUA, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
417 { MUA, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
418 { MUA, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
419 { CMA, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
420 { MUA, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
421 { MUA, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
422 { MUA, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d" },
423 { MUA, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
424 { MUA, 0 | F_PROPERTY, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
425 { MUA, 0 | F_PROPERTY, "\\X{2,4}", "abcdef" },
426 { MUA, 0 | F_PROPERTY, "\\X{2,4}?", "abcdef" },
427 { MUA, 0 | F_NOMATCH | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d##" },
428 { MUA, 0 | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
429 { MUA, 0, "(c(ab)?+ab)+", "cabcababcab" },
430 { MUA, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
431
432 /* Possessive quantifiers. */
433 { MUA, 0, "(?:a|b)++m", "mababbaaxababbaam" },
434 { MUA, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
435 { MUA, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
436 { MUA, 0, "(a|b)++m", "mababbaaxababbaam" },
437 { MUA, 0, "(a|b)*+m", "mababbaaxababbaam" },
438 { MUA, 0, "(a|b)*+m", "ababbaaxababbaam" },
439 { MUA, 0, "(a|b(*ACCEPT))++m", "maaxab" },
440 { MUA, 0, "(?:b*)++m", "bxbbxbbbxm" },
441 { MUA, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
442 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxm" },
443 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
444 { MUA, 0, "(b*)++m", "bxbbxbbbxm" },
445 { MUA, 0, "(b*)++m", "bxbbxbbbxbbm" },
446 { MUA, 0, "(b*)*+m", "bxbbxbbbxm" },
447 { MUA, 0, "(b*)*+m", "bxbbxbbbxbbm" },
448 { MUA, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
449 { MUA, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
450 { MUA, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
451 { MUA, 0, "(a|(b))++m", "mababbaaxababbaam" },
452 { MUA, 0, "((a)|b)*+m", "mababbaaxababbaam" },
453 { MUA, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
454 { MUA, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
455 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxm" },
456 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
457 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
458 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
459 { MUA, 0, "((b*))++m", "bxbbxbbbxm" },
460 { MUA, 0, "((b*))++m", "bxbbxbbbxbbm" },
461 { MUA, 0, "((b*))*+m", "bxbbxbbbxm" },
462 { MUA, 0, "((b*))*+m", "bxbbxbbbxbbm" },
463 { MUA, 0 | F_NOMATCH, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
464 { MUA, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
465 { MUA, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
466 { MUA, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
467 { MUA, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
468
469 /* Back references. */
470 { MUA, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
471 { CMUA, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
472 { CMA, 0, "(a{2,4})\\1", "AaAaaAaA" },
473 { MUA, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
474 { MUA, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
475 { MUA, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
476 { MUA, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
477 { MUA, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
478 { MUA, 0, "(?:(aa)|b)\\1?b", "bb" },
479 { CMUA, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
480 { MUA, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
481 { CMUA, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
482 { MUA, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
483 { CMA, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
484 { MUA, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
485 { MUA, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
486 { MA, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
487 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
488 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{0,2}", "wwwww." },
489 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwww" },
490 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwwww" },
491 { PCRE_UCP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
492 { CMUAP, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" },
493
494 /* Assertions. */
495 { MUA, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
496 { MUA, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
497 { MUA, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
498 { MUA, 0, "(?<=aaa|aa|a)a", "aaa" },
499 { MUA, 2, "(?<=aaa|aa|a)a", "aaa" },
500 { MA, 0, "(?<=aaa|aa|a)a", "aaa" },
501 { MA, 2, "(?<=aaa|aa|a)a", "aaa" },
502 { MUA, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
503 { MUA, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
504 { MUA, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
505 { MUA, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
506 { MUA, 0, "((?(?=(a))a)+k)", "bbak" },
507 { MUA, 0, "((?(?=a)a)+k)", "bbak" },
508 { MUA, 0 | F_NOMATCH, "(?=(?>(a))m)amk", "a k" },
509 { MUA, 0 | F_NOMATCH, "(?!(?>(a))m)amk", "a k" },
510 { MUA, 0 | F_NOMATCH, "(?>(?=(a))am)amk", "a k" },
511 { MUA, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
512 { MUA, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
513 { MUA, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
514 { MUA, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
515 { MUA, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
516 { MUA, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
517 { MUA, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
518 { MUA, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
519
520 /* Not empty, ACCEPT, FAIL */
521 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*", "bcx" },
522 { MUA | PCRE_NOTEMPTY, 0, "a*", "bcaad" },
523 { MUA | PCRE_NOTEMPTY, 0, "a*?", "bcaad" },
524 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
525 { MUA, 0, "a(*ACCEPT)b", "ab" },
526 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*(*ACCEPT)b", "bcx" },
527 { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
528 { MUA | PCRE_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
529 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "(?:z|a*(*ACCEPT)b)", "bcx" },
530 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
531 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
532 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
533 { MUA | PCRE_NOTEMPTY_ATSTART, 0 | F_NOMATCH, "a*(*ACCEPT)b", "" },
534 { MUA, 0, "((a(*ACCEPT)b))", "ab" },
535 { MUA, 0, "(a(*FAIL)a|a)", "aaa" },
536 { MUA, 0, "(?=ab(*ACCEPT)b)a", "ab" },
537 { MUA, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
538 { MUA, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
539 { MUA | PCRE_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
540
541 /* Conditional blocks. */
542 { MUA, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
543 { MUA, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
544 { MUA, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
545 { MUA, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
546 { MUA, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
547 { MUA, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
548 { MUA, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
549 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
550 { MUA, 0 | F_DIFF, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
551 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
552 { MUA, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
553 { MUA, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
554 { MUA, 0, "(?(?=a)ab)", "a" },
555 { MUA, 0, "(?(?<!b)c)", "b" },
556 { MUA, 0, "(?(DEFINE)a(b))", "a" },
557 { MUA, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
558 { MUA, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
559 { MUA, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
560 { MUA, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
561 { MUA, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
562 { MUA, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
563 { MUA, 0, "(c)?\?(?(1)a|b)", "cbb" },
564 { MUA, 0 | F_DIFF, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
565 { MUA, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
566 { MUA, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
567 { MUA, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
568 { MUA, 0 | F_DIFF, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
569 { MUA, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
570 { MUA, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
571 { MUA, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
572 { MUA, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
573 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)*l", "bc ddd abccabccl" },
574 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+?dd", "bcabcacdb bdddd" },
575 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+l", "ababccddabdbccd abcccl" },
576 { MUA, 0, "((?:a|aa)(?(1)aaa))x", "aax" },
577
578 /* Set start of match. */
579 { MUA, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
580 { MUA, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
581 { MUA, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
582 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a\\K(*ACCEPT)b", "aa" },
583 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
584
585 /* First line. */
586 { MUA | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}a", "bb\naaa" },
587 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}a", "bb\r\naaa" },
588 { MUA | PCRE_FIRSTLINE, 0, "(?<=a)", "a" },
589 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[^a][^b]", "ab" },
590 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "a", "\na" },
591 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[abc]", "\na" },
592 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^a", "\na" },
593 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^(?<=\n)", "\na" },
594 { MUA | PCRE_FIRSTLINE, 0, "\xf0\x90\x90\x80", "\xf0\x90\x90\x80" },
595 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\xc2\x85#" },
596 { PCRE_MULTILINE | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\x85#" },
597 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^#", "\xe2\x80\xa8#" },
598 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}", "\r\na" },
599 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, ".", "\r" },
600 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "a", "\ra" },
601 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH, "ba", "bbb\r\nba" },
602 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}{4}|a", "\r\na" },
603 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 1, ".", "\r\n" },
604 { PCRE_FIRSTLINE | PCRE_NEWLINE_LF | PCRE_DOTALL, 0 | F_NOMATCH, "ab.", "ab" },
605
606 /* Recurse. */
607 { MUA, 0, "(a)(?1)", "aa" },
608 { MUA, 0, "((a))(?1)", "aa" },
609 { MUA, 0, "(b|a)(?1)", "aa" },
610 { MUA, 0, "(b|(a))(?1)", "aa" },
611 { MUA, 0 | F_NOMATCH, "((a)(b)(?:a*))(?1)", "aba" },
612 { MUA, 0, "((a)(b)(?:a*))(?1)", "abab" },
613 { MUA, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
614 { MUA, 0, "((?2)b|(a)){2}(?1)", "aabab" },
615 { MUA, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
616 { MUA, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
617 { MUA, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
618 { MUA, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
619 { MUA, 0 | F_NOMATCH, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
620 { MUA, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
621 { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
622 { MUA, 0 | F_NOMATCH, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
623 { MUA, 0, "b|<(?R)*>", "<<b>" },
624 { MUA, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
625 { MUA, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
626 { MUA, 0, "(a)((?(R)a|b))(?2)", "aabbabaa" },
627 { MUA, 0, "(a)((?(R2)a|b))(?2)", "aabbabaa" },
628 { MUA, 0, "(a)((?(R1)a|b))(?2)", "ababba" },
629 { MUA, 0, "(?(R0)aa|bb(?R))", "abba aabb bbaa" },
630 { MUA, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" },
631 { MUA, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" },
632
633 /* 16 bit specific tests. */
634 { CMA, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" },
635 { CMA, 0 | F_FORCECONV, "\xe1\xbd\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
636 { CMA, 0 | F_FORCECONV, "[\xc3\xa1]", "\xc3\x81\xc3\xa1" },
637 { CMA, 0 | F_FORCECONV, "[\xe1\xbd\xb8]", "\xe1\xbf\xb8\xe1\xbd\xb8" },
638 { CMA, 0 | F_FORCECONV, "[a-\xed\xb0\x80]", "A" },
639 { CMA, 0 | F_NO8 | F_FORCECONV, "[a-\\x{dc00}]", "B" },
640 { CMA, 0 | F_NO8 | F_NOMATCH | F_FORCECONV, "[b-\\x{dc00}]", "a" },
641 { CMA, 0 | F_NO8 | F_FORCECONV, "\xed\xa0\x80\\x{d800}\xed\xb0\x80\\x{dc00}", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80" },
642 { CMA, 0 | F_NO8 | F_FORCECONV, "[\xed\xa0\x80\\x{d800}]{1,2}?[\xed\xb0\x80\\x{dc00}]{1,2}?#", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80#" },
643 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80\xed\xb0\x80#]{0,3}(?<=\xed\xb0\x80.)", "\xed\xa0\x80#\xed\xa0\x80##\xed\xb0\x80\xed\xa0\x80" },
644 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\x9f\xbf\xed\xa0\x83" },
645 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\xb4\x80\xed\xb3\xb0" },
646 { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" },
647 { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" },
648 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" },
649 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80][\xed\xb0\x80]{2,}", "\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" },
650 { MA, 0 | F_FORCECONV, "[^\xed\xb0\x80]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
651 { MA, 0 | F_NO8 | F_FORCECONV, "[^\\x{dc00}]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
652 { CMA, 0 | F_FORCECONV, ".\\B.", "\xed\xa0\x80\xed\xb0\x80" },
653 { CMA, 0 | F_FORCECONV, "\\D+(?:\\d+|.)\\S+(?:\\s+|.)\\W+(?:\\w+|.)\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80" },
654 { CMA, 0 | F_FORCECONV, "\\d*\\s*\\w*\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80" },
655 { CMA, 0 | F_FORCECONV | F_NOMATCH, "\\d*?\\D*?\\s*?\\S*?\\w*?\\W*?##", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80#" },
656 { CMA | PCRE_EXTENDED, 0 | F_FORCECONV, "\xed\xa0\x80 \xed\xb0\x80 !", "\xed\xa0\x80\xed\xb0\x80!" },
657 { CMA, 0 | F_FORCECONV, "\xed\xa0\x80+#[^#]+\xed\xa0\x80", "\xed\xa0\x80#a\xed\xa0\x80" },
658 { CMA, 0 | F_FORCECONV, "(\xed\xa0\x80+)#\\1", "\xed\xa0\x80\xed\xa0\x80#\xed\xa0\x80\xed\xa0\x80" },
659 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0 | F_NO8 | F_FORCECONV, "^-", "a--\xe2\x80\xa8--" },
660 { PCRE_BSR_UNICODE, 0 | F_NO8 | F_FORCECONV, "\\R", "ab\xe2\x80\xa8" },
661 { 0, 0 | F_NO8 | F_FORCECONV, "\\v", "ab\xe2\x80\xa9" },
662 { 0, 0 | F_NO8 | F_FORCECONV, "\\h", "ab\xe1\xa0\x8e" },
663 { 0, 0 | F_NO8 | F_FORCECONV, "\\v+?\\V+?#", "\xe2\x80\xa9\xe2\x80\xa9\xef\xbf\xbf\xef\xbf\xbf#" },
664 { 0, 0 | F_NO8 | F_FORCECONV, "\\h+?\\H+?#", "\xe1\xa0\x8e\xe1\xa0\x8e\xef\xbf\xbf\xef\xbf\xbf#" },
665
666 /* Partial matching. */
667 { MUA | PCRE_PARTIAL_SOFT, 0, "ab", "a" },
668 { MUA | PCRE_PARTIAL_SOFT, 0, "ab|a", "a" },
669 { MUA | PCRE_PARTIAL_HARD, 0, "ab|a", "a" },
670 { MUA | PCRE_PARTIAL_SOFT, 0, "\\b#", "a" },
671 { MUA | PCRE_PARTIAL_SOFT, 0, "(?<=a)b", "a" },
672 { MUA | PCRE_PARTIAL_SOFT, 0, "abc|(?<=xxa)bc", "xxab" },
673 { MUA | PCRE_PARTIAL_SOFT, 0, "a\\B", "a" },
674 { MUA | PCRE_PARTIAL_HARD, 0, "a\\b", "a" },
675
676 /* (*MARK) verb. */
677 { MUA, 0, "a(*MARK:aa)a", "ababaa" },
678 { MUA, 0 | F_NOMATCH, "a(*:aa)a", "abab" },
679 { MUA, 0, "a(*:aa)(b(*:bb)b|bc)", "abc" },
680 { MUA, 0 | F_NOMATCH, "a(*:1)x|b(*:2)y", "abc" },
681 { MUA, 0, "(?>a(*:aa))b|ac", "ac" },
682 { MUA, 0, "(?(DEFINE)(a(*:aa)))(?1)", "a" },
683 { MUA, 0 | F_NOMATCH, "(?(DEFINE)((a)(*:aa)))(?1)b", "aa" },
684 { MUA, 0, "(?(DEFINE)(a(*:aa)))a(?1)b|aac", "aac" },
685 { MUA, 0, "(a(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
686 { MUA, 0, "(a(*:aa)){0}(?:b(?1)b)+", "babba" },
687 { MUA, 0 | F_NOMATCH | F_STUDY, "(a(*:aa)){0}(?:b(?1)b)+", "ba" },
688 { MUA, 0, "(a\\K(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
689 { MUA, 0, "(a\\K(*:aa)){0}(?:b(?1)b)+", "babba" },
690 { MUA, 0 | F_NOMATCH | F_STUDY, "(a\\K(*:aa)){0}(?:b(?1)b)+", "ba" },
691 { MUA, 0 | F_NOMATCH | F_STUDY, "(*:mark)m", "a" },
692
693 /* (*COMMIT) verb. */
694 { MUA, 0 | F_NOMATCH, "a(*COMMIT)b", "ac" },
695 { MUA, 0, "aa(*COMMIT)b", "xaxaab" },
696 { MUA, 0 | F_NOMATCH, "a(*COMMIT)(*:msg)b|ac", "ac" },
697 { MUA, 0, "(?=a(*COMMIT)b|ac)ac|(*:m)(a)c", "ac" },
698 { MUA, 0, "(?!a(*COMMIT)(*:msg)b)a(c)|cd", "acd" },
699
700 /* Deep recursion. */
701 { MUA, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
702 { MUA, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
703 { MUA, 0, "((a?)+)+b", "aaaaaaaaaaaa b" },
704
705 /* Deep recursion: Stack limit reached. */
706 { MA, 0 | F_NOMATCH, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
707 { MA, 0 | F_NOMATCH, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
708 { MA, 0 | F_NOMATCH, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
709 { MA, 0 | F_NOMATCH, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
710 { MA, 0 | F_NOMATCH, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
711
712 { 0, 0, NULL, NULL }
713 };
714
715 static const unsigned char *tables(int mode)
716 {
717 /* The purpose of this function to allow valgrind
718 for reporting invalid reads and writes. */
719 static unsigned char *tables_copy;
720 const char *errorptr;
721 int erroroffset;
722 unsigned char *default_tables;
723 #if defined SUPPORT_PCRE8
724 pcre *regex;
725 char null_str[1] = { 0 };
726 #elif defined SUPPORT_PCRE16
727 pcre16 *regex;
728 PCRE_UCHAR16 null_str[1] = { 0 };
729 #elif defined SUPPORT_PCRE32
730 pcre32 *regex;
731 PCRE_UCHAR32 null_str[1] = { 0 };
732 #endif
733
734 if (mode) {
735 if (tables_copy)
736 free(tables_copy);
737 tables_copy = NULL;
738 return NULL;
739 }
740
741 if (tables_copy)
742 return tables_copy;
743
744 default_tables = NULL;
745 #if defined SUPPORT_PCRE8
746 regex = pcre_compile(null_str, 0, &errorptr, &erroroffset, NULL);
747 if (regex) {
748 pcre_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
749 pcre_free(regex);
750 }
751 #elif defined SUPPORT_PCRE16
752 regex = pcre16_compile(null_str, 0, &errorptr, &erroroffset, NULL);
753 if (regex) {
754 pcre16_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
755 pcre16_free(regex);
756 }
757 #elif defined SUPPORT_PCRE32
758 regex = pcre32_compile(null_str, 0, &errorptr, &erroroffset, NULL);
759 if (regex) {
760 pcre32_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
761 pcre32_free(regex);
762 }
763 #endif
764 /* Shouldn't ever happen. */
765 if (!default_tables)
766 return NULL;
767
768 /* Unfortunately this value cannot get from pcre_fullinfo.
769 Since this is a test program, this is acceptable at the moment. */
770 tables_copy = (unsigned char *)malloc(1088);
771 if (!tables_copy)
772 return NULL;
773
774 memcpy(tables_copy, default_tables, 1088);
775 return tables_copy;
776 }
777
778 #ifdef SUPPORT_PCRE8
779 static pcre_jit_stack* callback8(void *arg)
780 {
781 return (pcre_jit_stack *)arg;
782 }
783 #endif
784
785 #ifdef SUPPORT_PCRE16
786 static pcre16_jit_stack* callback16(void *arg)
787 {
788 return (pcre16_jit_stack *)arg;
789 }
790 #endif
791
792 #ifdef SUPPORT_PCRE32
793 static pcre32_jit_stack* callback32(void *arg)
794 {
795 return (pcre32_jit_stack *)arg;
796 }
797 #endif
798
799 #ifdef SUPPORT_PCRE8
800 static pcre_jit_stack *stack8;
801
802 static pcre_jit_stack *getstack8(void)
803 {
804 if (!stack8)
805 stack8 = pcre_jit_stack_alloc(1, 1024 * 1024);
806 return stack8;
807 }
808
809 static void setstack8(pcre_extra *extra)
810 {
811 if (!extra) {
812 if (stack8)
813 pcre_jit_stack_free(stack8);
814 stack8 = NULL;
815 return;
816 }
817
818 pcre_assign_jit_stack(extra, callback8, getstack8());
819 }
820 #endif /* SUPPORT_PCRE8 */
821
822 #ifdef SUPPORT_PCRE16
823 static pcre16_jit_stack *stack16;
824
825 static pcre16_jit_stack *getstack16(void)
826 {
827 if (!stack16)
828 stack16 = pcre16_jit_stack_alloc(1, 1024 * 1024);
829 return stack16;
830 }
831
832 static void setstack16(pcre16_extra *extra)
833 {
834 if (!extra) {
835 if (stack16)
836 pcre16_jit_stack_free(stack16);
837 stack16 = NULL;
838 return;
839 }
840
841 pcre16_assign_jit_stack(extra, callback16, getstack16());
842 }
843 #endif /* SUPPORT_PCRE8 */
844
845 #ifdef SUPPORT_PCRE32
846 static pcre32_jit_stack *stack32;
847
848 static pcre32_jit_stack *getstack32(void)
849 {
850 if (!stack32)
851 stack32 = pcre32_jit_stack_alloc(1, 1024 * 1024);
852 return stack32;
853 }
854
855 static void setstack32(pcre32_extra *extra)
856 {
857 if (!extra) {
858 if (stack32)
859 pcre32_jit_stack_free(stack32);
860 stack32 = NULL;
861 return;
862 }
863
864 pcre32_assign_jit_stack(extra, callback32, getstack32());
865 }
866 #endif /* SUPPORT_PCRE8 */
867
868 #ifdef SUPPORT_PCRE16
869
870 static int convert_utf8_to_utf16(const char *input, PCRE_UCHAR16 *output, int *offsetmap, int max_length)
871 {
872 unsigned char *iptr = (unsigned char*)input;
873 PCRE_UCHAR16 *optr = output;
874 unsigned int c;
875
876 if (max_length == 0)
877 return 0;
878
879 while (*iptr && max_length > 1) {
880 c = 0;
881 if (offsetmap)
882 *offsetmap++ = (int)(iptr - (unsigned char*)input);
883
884 if (!(*iptr & 0x80))
885 c = *iptr++;
886 else if (!(*iptr & 0x20)) {
887 c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
888 iptr += 2;
889 } else if (!(*iptr & 0x10)) {
890 c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
891 iptr += 3;
892 } else if (!(*iptr & 0x08)) {
893 c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
894 iptr += 4;
895 }
896
897 if (c < 65536) {
898 *optr++ = c;
899 max_length--;
900 } else if (max_length <= 2) {
901 *optr = '\0';
902 return (int)(optr - output);
903 } else {
904 c -= 0x10000;
905 *optr++ = 0xd800 | ((c >> 10) & 0x3ff);
906 *optr++ = 0xdc00 | (c & 0x3ff);
907 max_length -= 2;
908 if (offsetmap)
909 offsetmap++;
910 }
911 }
912 if (offsetmap)
913 *offsetmap = (int)(iptr - (unsigned char*)input);
914 *optr = '\0';
915 return (int)(optr - output);
916 }
917
918 static int copy_char8_to_char16(const char *input, PCRE_UCHAR16 *output, int max_length)
919 {
920 unsigned char *iptr = (unsigned char*)input;
921 PCRE_UCHAR16 *optr = output;
922
923 if (max_length == 0)
924 return 0;
925
926 while (*iptr && max_length > 1) {
927 *optr++ = *iptr++;
928 max_length--;
929 }
930 *optr = '\0';
931 return (int)(optr - output);
932 }
933
934 #define REGTEST_MAX_LENGTH16 4096
935 static PCRE_UCHAR16 regtest_buf16[REGTEST_MAX_LENGTH16];
936 static int regtest_offsetmap16[REGTEST_MAX_LENGTH16];
937
938 #endif /* SUPPORT_PCRE16 */
939
940 #ifdef SUPPORT_PCRE32
941
942 static int convert_utf8_to_utf32(const char *input, PCRE_UCHAR32 *output, int *offsetmap, int max_length)
943 {
944 unsigned char *iptr = (unsigned char*)input;
945 PCRE_UCHAR32 *optr = output;
946 unsigned int c;
947
948 if (max_length == 0)
949 return 0;
950
951 while (*iptr && max_length > 1) {
952 c = 0;
953 if (offsetmap)
954 *offsetmap++ = (int)(iptr - (unsigned char*)input);
955
956 if (!(*iptr & 0x80))
957 c = *iptr++;
958 else if (!(*iptr & 0x20)) {
959 c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
960 iptr += 2;
961 } else if (!(*iptr & 0x10)) {
962 c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
963 iptr += 3;
964 } else if (!(*iptr & 0x08)) {
965 c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
966 iptr += 4;
967 }
968
969 *optr++ = c;
970 max_length--;
971 }
972 if (offsetmap)
973 *offsetmap = (int)(iptr - (unsigned char*)input);
974 *optr = 0;
975 return (int)(optr - output);
976 }
977
978 static int copy_char8_to_char32(const char *input, PCRE_UCHAR32 *output, int max_length)
979 {
980 unsigned char *iptr = (unsigned char*)input;
981 PCRE_UCHAR32 *optr = output;
982
983 if (max_length == 0)
984 return 0;
985
986 while (*iptr && max_length > 1) {
987 *optr++ = *iptr++;
988 max_length--;
989 }
990 *optr = '\0';
991 return (int)(optr - output);
992 }
993
994 #define REGTEST_MAX_LENGTH32 4096
995 static PCRE_UCHAR32 regtest_buf32[REGTEST_MAX_LENGTH32];
996 static int regtest_offsetmap32[REGTEST_MAX_LENGTH32];
997
998 #endif /* SUPPORT_PCRE32 */
999
1000 static int check_ascii(const char *input)
1001 {
1002 const unsigned char *ptr = (unsigned char *)input;
1003 while (*ptr) {
1004 if (*ptr > 127)
1005 return 0;
1006 ptr++;
1007 }
1008 return 1;
1009 }
1010
1011 static int regression_tests(void)
1012 {
1013 struct regression_test_case *current = regression_test_cases;
1014 const char *error;
1015 char *cpu_info;
1016 int i, err_offs;
1017 int is_successful, is_ascii_pattern, is_ascii_input;
1018 int total = 0;
1019 int successful = 0;
1020 int successful_row = 0;
1021 int counter = 0;
1022 int study_mode;
1023 int utf = 0, ucp = 0;
1024 int disabled_flags = 0;
1025 #ifdef SUPPORT_PCRE8
1026 pcre *re8;
1027 pcre_extra *extra8;
1028 pcre_extra dummy_extra8;
1029 int ovector8_1[32];
1030 int ovector8_2[32];
1031 int return_value8[2];
1032 unsigned char *mark8_1, *mark8_2;
1033 #endif
1034 #ifdef SUPPORT_PCRE16
1035 pcre16 *re16;
1036 pcre16_extra *extra16;
1037 pcre16_extra dummy_extra16;
1038 int ovector16_1[32];
1039 int ovector16_2[32];
1040 int return_value16[2];
1041 PCRE_UCHAR16 *mark16_1, *mark16_2;
1042 int length16;
1043 #endif
1044 #ifdef SUPPORT_PCRE32
1045 pcre32 *re32;
1046 pcre32_extra *extra32;
1047 pcre32_extra dummy_extra32;
1048 int ovector32_1[32];
1049 int ovector32_2[32];
1050 int return_value32[2];
1051 PCRE_UCHAR32 *mark32_1, *mark32_2;
1052 int length32;
1053 #endif
1054
1055 /* This test compares the behaviour of interpreter and JIT. Although disabling
1056 utf or ucp may make tests fail, if the pcre_exec result is the SAME, it is
1057 still considered successful from pcre_jit_test point of view. */
1058
1059 #if defined SUPPORT_PCRE8
1060 pcre_config(PCRE_CONFIG_JITTARGET, &cpu_info);
1061 #elif defined SUPPORT_PCRE16
1062 pcre16_config(PCRE_CONFIG_JITTARGET, &cpu_info);
1063 #elif defined SUPPORT_PCRE32
1064 pcre32_config(PCRE_CONFIG_JITTARGET, &cpu_info);
1065 #endif
1066
1067 printf("Running JIT regression tests\n");
1068 printf(" target CPU of SLJIT compiler: %s\n", cpu_info);
1069
1070 #if defined SUPPORT_PCRE8
1071 pcre_config(PCRE_CONFIG_UTF8, &utf);
1072 pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
1073 #elif defined SUPPORT_PCRE16
1074 pcre16_config(PCRE_CONFIG_UTF16, &utf);
1075 pcre16_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
1076 #elif defined SUPPORT_PCRE16
1077 pcre32_config(PCRE_CONFIG_UTF32, &utf);
1078 pcre32_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
1079 #endif
1080
1081 if (!utf)
1082 disabled_flags |= PCRE_UTF8 | PCRE_UTF16 | PCRE_UTF32;
1083 if (!ucp)
1084 disabled_flags |= PCRE_UCP;
1085 #ifdef SUPPORT_PCRE8
1086 printf(" in 8 bit mode with UTF-8 %s and ucp %s:\n", utf ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
1087 #endif
1088 #ifdef SUPPORT_PCRE16
1089 printf(" in 16 bit mode with UTF-16 %s and ucp %s:\n", utf ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
1090 #endif
1091 #ifdef SUPPORT_PCRE32
1092 printf(" in 32 bit mode with UTF-32 %s and ucp %s:\n", utf ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
1093 #endif
1094
1095 while (current->pattern) {
1096 /* printf("\nPattern: %s :\n", current->pattern); */
1097 total++;
1098 if (current->start_offset & F_PROPERTY) {
1099 is_ascii_pattern = 0;
1100 is_ascii_input = 0;
1101 } else {
1102 is_ascii_pattern = check_ascii(current->pattern);
1103 is_ascii_input = check_ascii(current->input);
1104 }
1105
1106 if (current->flags & PCRE_PARTIAL_SOFT)
1107 study_mode = PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE;
1108 else if (current->flags & PCRE_PARTIAL_HARD)
1109 study_mode = PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE;
1110 else
1111 study_mode = PCRE_STUDY_JIT_COMPILE;
1112 error = NULL;
1113 #ifdef SUPPORT_PCRE8
1114 re8 = NULL;
1115 if (!(current->start_offset & F_NO8))
1116 re8 = pcre_compile(current->pattern,
1117 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags),
1118 &error, &err_offs, tables(0));
1119
1120 extra8 = NULL;
1121 if (re8) {
1122 error = NULL;
1123 extra8 = pcre_study(re8, study_mode, &error);
1124 if (!extra8) {
1125 printf("\n8 bit: Cannot study pattern: %s\n", current->pattern);
1126 pcre_free(re8);
1127 re8 = NULL;
1128 }
1129 else if (!(extra8->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
1130 printf("\n8 bit: JIT compiler does not support: %s\n", current->pattern);
1131 pcre_free_study(extra8);
1132 pcre_free(re8);
1133 re8 = NULL;
1134 }
1135 extra8->flags |= PCRE_EXTRA_MARK;
1136 } else if (((utf && ucp) || is_ascii_pattern) && !(current->start_offset & F_NO8))
1137 printf("\n8 bit: Cannot compile pattern \"%s\": %s\n", current->pattern, error);
1138 #endif
1139 #ifdef SUPPORT_PCRE16
1140 if ((current->flags & PCRE_UTF16) || (current->start_offset & F_FORCECONV))
1141 convert_utf8_to_utf16(current->pattern, regtest_buf16, NULL, REGTEST_MAX_LENGTH16);
1142 else
1143 copy_char8_to_char16(current->pattern, regtest_buf16, REGTEST_MAX_LENGTH16);
1144
1145 re16 = NULL;
1146 if (!(current->start_offset & F_NO16))
1147 re16 = pcre16_compile(regtest_buf16,
1148 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags),
1149 &error, &err_offs, tables(0));
1150
1151 extra16 = NULL;
1152 if (re16) {
1153 error = NULL;
1154 extra16 = pcre16_study(re16, study_mode, &error);
1155 if (!extra16) {
1156 printf("\n16 bit: Cannot study pattern: %s\n", current->pattern);
1157 pcre16_free(re16);
1158 re16 = NULL;
1159 }
1160 else if (!(extra16->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
1161 printf("\n16 bit: JIT compiler does not support: %s\n", current->pattern);
1162 pcre16_free_study(extra16);
1163 pcre16_free(re16);
1164 re16 = NULL;
1165 }
1166 extra16->flags |= PCRE_EXTRA_MARK;
1167 } else if (((utf && ucp) || is_ascii_pattern) && !(current->start_offset & F_NO16))
1168 printf("\n16 bit: Cannot compile pattern \"%s\": %s\n", current->pattern, error);
1169 #endif
1170 #ifdef SUPPORT_PCRE32
1171 if ((current->flags & PCRE_UTF32) || (current->start_offset & F_FORCECONV))
1172 convert_utf8_to_utf32(current->pattern, regtest_buf32, NULL, REGTEST_MAX_LENGTH32);
1173 else
1174 copy_char8_to_char32(current->pattern, regtest_buf32, REGTEST_MAX_LENGTH32);
1175
1176 re32 = NULL;
1177 if (!(current->start_offset & F_NO32))
1178 re32 = pcre32_compile(regtest_buf32,
1179 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags),
1180 &error, &err_offs, tables(0));
1181
1182 extra32 = NULL;
1183 if (re32) {
1184 error = NULL;
1185 extra32 = pcre32_study(re32, study_mode, &error);
1186 if (!extra32) {
1187 printf("\n32 bit: Cannot study pattern: %s\n", current->pattern);
1188 pcre32_free(re32);
1189 re32 = NULL;
1190 }
1191 if (!(extra32->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
1192 printf("\n32 bit: JIT compiler does not support: %s\n", current->pattern);
1193 pcre32_free_study(extra32);
1194 pcre32_free(re32);
1195 re32 = NULL;
1196 }
1197 extra32->flags |= PCRE_EXTRA_MARK;
1198 } else if (((utf && ucp) || is_ascii_pattern) && !(current->start_offset & F_NO32))
1199 printf("\n32 bit: Cannot compile pattern \"%s\": %s\n", current->pattern, error);
1200 #endif
1201
1202 counter++;
1203 if ((counter & 0x3) != 0) {
1204 #ifdef SUPPORT_PCRE8
1205 setstack8(NULL);
1206 #endif
1207 #ifdef SUPPORT_PCRE16
1208 setstack16(NULL);
1209 #endif
1210 #ifdef SUPPORT_PCRE32
1211 setstack32(NULL);
1212 #endif
1213 }
1214
1215 #ifdef SUPPORT_PCRE8
1216 return_value8[0] = -1000;
1217 return_value8[1] = -1000;
1218 for (i = 0; i < 32; ++i)
1219 ovector8_1[i] = -2;
1220 for (i = 0; i < 32; ++i)
1221 ovector8_2[i] = -2;
1222 if (re8) {
1223 mark8_1 = NULL;
1224 mark8_2 = NULL;
1225 extra8->mark = &mark8_1;
1226
1227 if ((counter & 0x1) != 0) {
1228 setstack8(extra8);
1229 return_value8[0] = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1230 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_1, 32);
1231 } else
1232 return_value8[0] = pcre_jit_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1233 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_1, 32, getstack8());
1234 memset(&dummy_extra8, 0, sizeof(pcre_extra));
1235 dummy_extra8.flags = PCRE_EXTRA_MARK;
1236 if (current->start_offset & F_STUDY) {
1237 dummy_extra8.flags |= PCRE_EXTRA_STUDY_DATA;
1238 dummy_extra8.study_data = extra8->study_data;
1239 }
1240 dummy_extra8.mark = &mark8_2;
1241 return_value8[1] = pcre_exec(re8, &dummy_extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1242 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_2, 32);
1243 }
1244 #endif
1245
1246 #ifdef SUPPORT_PCRE16
1247 return_value16[0] = -1000;
1248 return_value16[1] = -1000;
1249 for (i = 0; i < 32; ++i)
1250 ovector16_1[i] = -2;
1251 for (i = 0; i < 32; ++i)
1252 ovector16_2[i] = -2;
1253 if (re16) {
1254 mark16_1 = NULL;
1255 mark16_2 = NULL;
1256 if ((current->flags & PCRE_UTF16) || (current->start_offset & F_FORCECONV))
1257 length16 = convert_utf8_to_utf16(current->input, regtest_buf16, regtest_offsetmap16, REGTEST_MAX_LENGTH16);
1258 else
1259 length16 = copy_char8_to_char16(current->input, regtest_buf16, REGTEST_MAX_LENGTH16);
1260 extra16->mark = &mark16_1;
1261 if ((counter & 0x1) != 0) {
1262 setstack16(extra16);
1263 return_value16[0] = pcre16_exec(re16, extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
1264 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_1, 32);
1265 } else
1266 return_value16[0] = pcre16_jit_exec(re16, extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
1267 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_1, 32, getstack16());
1268 memset(&dummy_extra16, 0, sizeof(pcre16_extra));
1269 dummy_extra16.flags = PCRE_EXTRA_MARK;
1270 if (current->start_offset & F_STUDY) {
1271 dummy_extra16.flags |= PCRE_EXTRA_STUDY_DATA;
1272 dummy_extra16.study_data = extra16->study_data;
1273 }
1274 dummy_extra16.mark = &mark16_2;
1275 return_value16[1] = pcre16_exec(re16, &dummy_extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
1276 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_2, 32);
1277 }
1278 #endif
1279
1280 #ifdef SUPPORT_PCRE32
1281 return_value32[0] = -1000;
1282 return_value32[1] = -1000;
1283 for (i = 0; i < 32; ++i)
1284 ovector32_1[i] = -2;
1285 for (i = 0; i < 32; ++i)
1286 ovector32_2[i] = -2;
1287 if (re32) {
1288 mark32_1 = NULL;
1289 mark32_2 = NULL;
1290 if ((current->flags & PCRE_UTF32) || (current->start_offset & F_FORCECONV))
1291 length32 = convert_utf8_to_utf32(current->input, regtest_buf32, regtest_offsetmap32, REGTEST_MAX_LENGTH32);
1292 else
1293 length32 = copy_char8_to_char32(current->input, regtest_buf32, REGTEST_MAX_LENGTH32);
1294 extra32->mark = &mark32_1;
1295 if ((counter & 0x1) != 0) {
1296 setstack32(extra32);
1297 return_value32[0] = pcre32_exec(re32, extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
1298 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector32_1, 32);
1299 } else
1300 return_value32[0] = pcre32_jit_exec(re32, extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
1301 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector32_1, 32, getstack32());
1302 memset(&dummy_extra32, 0, sizeof(pcre32_extra));
1303 dummy_extra32.flags = PCRE_EXTRA_MARK;
1304 if (current->start_offset & F_STUDY) {
1305 dummy_extra32.flags |= PCRE_EXTRA_STUDY_DATA;
1306 dummy_extra32.study_data = extra32->study_data;
1307 }
1308 dummy_extra32.mark = &mark32_2;
1309 return_value32[1] = pcre32_exec(re32, &dummy_extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
1310 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector32_2, 32);
1311 }
1312 #endif
1313
1314 /* printf("[%d-%d-%d|%d-%d|%d-%d|%d-%d]%s",
1315 return_value8[0], return_value16[0],
1316 ovector8_1[0], ovector8_1[1],
1317 ovector16_1[0], ovector16_1[1],
1318 ovector32_1[0], ovector32_1[1],
1319 (current->flags & PCRE_CASELESS) ? "C" : ""); */
1320
1321 /* If F_DIFF is set, just run the test, but do not compare the results.
1322 Segfaults can still be captured. */
1323
1324 is_successful = 1;
1325 if (!(current->start_offset & F_DIFF)) {
1326 #if defined SUPPORT_UTF && ((defined(SUPPORT_PCRE8) + defined(SUPPORT_PCRE16) + defined(SUPPORT_PCRE32)) >= 2)
1327 if (!(current->start_offset & F_FORCECONV)) {
1328 int return_value;
1329
1330 /* All results must be the same. */
1331 #ifdef SUPPORT_PCRE8
1332 if ((return_value = return_value8[0]) != return_value8[1]) {
1333 printf("\n8 bit: Return value differs(J8:%d,I8:%d): [%d] '%s' @ '%s'\n",
1334 return_value8[0], return_value8[1], total, current->pattern, current->input);
1335 is_successful = 0;
1336 } else
1337 #endif
1338 #ifdef SUPPORT_PCRE16
1339 if ((return_value = return_value16[0]) != return_value16[1]) {
1340 printf("\n16 bit: Return value differs(J16:%d,I16:%d): [%d] '%s' @ '%s'\n",
1341 return_value16[0], return_value16[1], total, current->pattern, current->input);
1342 is_successful = 0;
1343 } else
1344 #endif
1345 #ifdef SUPPORT_PCRE32
1346 if ((return_value = return_value32[0]) != return_value32[1]) {
1347 printf("\n32 bit: Return value differs(J32:%d,I32:%d): [%d] '%s' @ '%s'\n",
1348 return_value32[0], return_value32[1], total, current->pattern, current->input);
1349 is_successful = 0;
1350 } else
1351 #endif
1352 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1353 if (return_value8[0] != return_value16[0]) {
1354 printf("\n8 and 16 bit: Return value differs(J8:%d,J16:%d): [%d] '%s' @ '%s'\n",
1355 return_value8[0], return_value16[0],
1356 total, current->pattern, current->input);
1357 is_successful = 0;
1358 } else
1359 #endif
1360 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE32
1361 if (return_value8[0] != return_value32[0]) {
1362 printf("\n8 and 32 bit: Return value differs(J8:%d,J32:%d): [%d] '%s' @ '%s'\n",
1363 return_value8[0], return_value32[0],
1364 total, current->pattern, current->input);
1365 is_successful = 0;
1366 } else
1367 #endif
1368 #if defined SUPPORT_PCRE16 && defined SUPPORT_PCRE32
1369 if (return_value16[0] != return_value32[0]) {
1370 printf("\n16 and 32 bit: Return value differs(J16:%d,J32:%d): [%d] '%s' @ '%s'\n",
1371 return_value16[0], return_value32[0],
1372 total, current->pattern, current->input);
1373 is_successful = 0;
1374 } else
1375 #endif
1376 if (return_value >= 0 || return_value == PCRE_ERROR_PARTIAL) {
1377 if (return_value == PCRE_ERROR_PARTIAL) {
1378 return_value = 2;
1379 } else {
1380 return_value *= 2;
1381 }
1382 #ifdef SUPPORT_PCRE8
1383 return_value8[0] = return_value;
1384 #endif
1385 #ifdef SUPPORT_PCRE16
1386 return_value16[0] = return_value;
1387 #endif
1388 #ifdef SUPPORT_PCRE32
1389 return_value32[0] = return_value;
1390 #endif
1391 /* Transform back the results. */
1392 if (current->flags & PCRE_UTF8) {
1393 #ifdef SUPPORT_PCRE16
1394 for (i = 0; i < return_value; ++i) {
1395 if (ovector16_1[i] >= 0)
1396 ovector16_1[i] = regtest_offsetmap16[ovector16_1[i]];
1397 if (ovector16_2[i] >= 0)
1398 ovector16_2[i] = regtest_offsetmap16[ovector16_2[i]];
1399 }
1400 #endif
1401 #ifdef SUPPORT_PCRE32
1402 for (i = 0; i < return_value; ++i) {
1403 if (ovector32_1[i] >= 0)
1404 ovector32_1[i] = regtest_offsetmap32[ovector32_1[i]];
1405 if (ovector32_2[i] >= 0)
1406 ovector32_2[i] = regtest_offsetmap32[ovector32_2[i]];
1407 }
1408 #endif
1409 }
1410
1411 for (i = 0; i < return_value; ++i) {
1412 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1413 if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
1414 printf("\n8 and 16 bit: Ovector[%d] value differs(J8:%d,I8:%d,J16:%d,I16:%d): [%d] '%s' @ '%s' \n",
1415 i, ovector8_1[i], ovector8_2[i], ovector16_1[i], ovector16_2[i],
1416 total, current->pattern, current->input);
1417 is_successful = 0;
1418 }
1419 #endif
1420 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE32
1421 if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector32_1[i] || ovector8_1[i] != ovector32_2[i]) {
1422 printf("\n8 and 32 bit: Ovector[%d] value differs(J8:%d,I8:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
1423 i, ovector8_1[i], ovector8_2[i], ovector32_1[i], ovector32_2[i],
1424 total, current->pattern, current->input);
1425 is_successful = 0;
1426 }
1427 #endif
1428 #if defined SUPPORT_PCRE16 && defined SUPPORT_PCRE16
1429 if (ovector16_1[i] != ovector16_2[i] || ovector16_1[i] != ovector16_1[i] || ovector16_1[i] != ovector16_2[i]) {
1430 printf("\n16 and 16 bit: Ovector[%d] value differs(J16:%d,I16:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
1431 i, ovector16_1[i], ovector16_2[i], ovector16_1[i], ovector16_2[i],
1432 total, current->pattern, current->input);
1433 is_successful = 0;
1434 }
1435 #endif
1436 }
1437 }
1438 } else
1439 #endif /* more than one of SUPPORT_PCRE8, SUPPORT_PCRE16 and SUPPORT_PCRE32 */
1440 {
1441 /* Only the 8 bit and 16 bit results must be equal. */
1442 #ifdef SUPPORT_PCRE8
1443 if (return_value8[0] != return_value8[1]) {
1444 printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1445 return_value8[0], return_value8[1], total, current->pattern, current->input);
1446 is_successful = 0;
1447 } else if (return_value8[0] >= 0 || return_value8[0] == PCRE_ERROR_PARTIAL) {
1448 if (return_value8[0] == PCRE_ERROR_PARTIAL)
1449 return_value8[0] = 2;
1450 else
1451 return_value8[0] *= 2;
1452
1453 for (i = 0; i < return_value8[0]; ++i)
1454 if (ovector8_1[i] != ovector8_2[i]) {
1455 printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1456 i, ovector8_1[i], ovector8_2[i], total, current->pattern, current->input);
1457 is_successful = 0;
1458 }
1459 }
1460 #endif
1461
1462 #ifdef SUPPORT_PCRE16
1463 if (return_value16[0] != return_value16[1]) {
1464 printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1465 return_value16[0], return_value16[1], total, current->pattern, current->input);
1466 is_successful = 0;
1467 } else if (return_value16[0] >= 0 || return_value16[0] == PCRE_ERROR_PARTIAL) {
1468 if (return_value16[0] == PCRE_ERROR_PARTIAL)
1469 return_value16[0] = 2;
1470 else
1471 return_value16[0] *= 2;
1472
1473 for (i = 0; i < return_value16[0]; ++i)
1474 if (ovector16_1[i] != ovector16_2[i]) {
1475 printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1476 i, ovector16_1[i], ovector16_2[i], total, current->pattern, current->input);
1477 is_successful = 0;
1478 }
1479 }
1480 #endif
1481
1482 #ifdef SUPPORT_PCRE32
1483 if (return_value32[0] != return_value32[1]) {
1484 printf("\n32 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1485 return_value32[0], return_value32[1], total, current->pattern, current->input);
1486 is_successful = 0;
1487 } else if (return_value32[0] >= 0 || return_value32[0] == PCRE_ERROR_PARTIAL) {
1488 if (return_value32[0] == PCRE_ERROR_PARTIAL)
1489 return_value32[0] = 2;
1490 else
1491 return_value32[0] *= 2;
1492
1493 for (i = 0; i < return_value32[0]; ++i)
1494 if (ovector32_1[i] != ovector32_2[i]) {
1495 printf("\n32 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1496 i, ovector32_1[i], ovector32_2[i], total, current->pattern, current->input);
1497 is_successful = 0;
1498 }
1499 }
1500 #endif
1501 }
1502 }
1503
1504 if (is_successful) {
1505 #ifdef SUPPORT_PCRE8
1506 if (!(current->start_offset & F_NO8) && ((utf && ucp) || is_ascii_input)) {
1507 if (return_value8[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1508 printf("8 bit: Test should match: [%d] '%s' @ '%s'\n",
1509 total, current->pattern, current->input);
1510 is_successful = 0;
1511 }
1512
1513 if (return_value8[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1514 printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n",
1515 total, current->pattern, current->input);
1516 is_successful = 0;
1517 }
1518 }
1519 #endif
1520 #ifdef SUPPORT_PCRE16
1521 if (!(current->start_offset & F_NO16) && ((utf && ucp) || is_ascii_input)) {
1522 if (return_value16[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1523 printf("16 bit: Test should match: [%d] '%s' @ '%s'\n",
1524 total, current->pattern, current->input);
1525 is_successful = 0;
1526 }
1527
1528 if (return_value16[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1529 printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n",
1530 total, current->pattern, current->input);
1531 is_successful = 0;
1532 }
1533 }
1534 #endif
1535 #ifdef SUPPORT_PCRE32
1536 if (!(current->start_offset & F_NO32) && ((utf && ucp) || is_ascii_input)) {
1537 if (return_value32[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1538 printf("32 bit: Test should match: [%d] '%s' @ '%s'\n",
1539 total, current->pattern, current->input);
1540 is_successful = 0;
1541 }
1542
1543 if (return_value32[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1544 printf("32 bit: Test should not match: [%d] '%s' @ '%s'\n",
1545 total, current->pattern, current->input);
1546 is_successful = 0;
1547 }
1548 }
1549 #endif
1550 }
1551
1552 if (is_successful) {
1553 #ifdef SUPPORT_PCRE8
1554 if (mark8_1 != mark8_2) {
1555 printf("8 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1556 total, current->pattern, current->input);
1557 is_successful = 0;
1558 }
1559 #endif
1560 #ifdef SUPPORT_PCRE16
1561 if (mark16_1 != mark16_2) {
1562 printf("16 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1563 total, current->pattern, current->input);
1564 is_successful = 0;
1565 }
1566 #endif
1567 #ifdef SUPPORT_PCRE32
1568 if (mark32_1 != mark32_2) {
1569 printf("32 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1570 total, current->pattern, current->input);
1571 is_successful = 0;
1572 }
1573 #endif
1574 }
1575
1576 #ifdef SUPPORT_PCRE8
1577 if (re8) {
1578 pcre_free_study(extra8);
1579 pcre_free(re8);
1580 }
1581 #endif
1582 #ifdef SUPPORT_PCRE16
1583 if (re16) {
1584 pcre16_free_study(extra16);
1585 pcre16_free(re16);
1586 }
1587 #endif
1588 #ifdef SUPPORT_PCRE32
1589 if (re32) {
1590 pcre32_free_study(extra32);
1591 pcre32_free(re32);
1592 }
1593 #endif
1594
1595 if (is_successful) {
1596 successful++;
1597 successful_row++;
1598 printf(".");
1599 if (successful_row >= 60) {
1600 successful_row = 0;
1601 printf("\n");
1602 }
1603 } else
1604 successful_row = 0;
1605
1606 fflush(stdout);
1607 current++;
1608 }
1609 tables(1);
1610 #ifdef SUPPORT_PCRE8
1611 setstack8(NULL);
1612 #endif
1613 #ifdef SUPPORT_PCRE16
1614 setstack16(NULL);
1615 #endif
1616 #ifdef SUPPORT_PCRE32
1617 setstack32(NULL);
1618 #endif
1619
1620 if (total == successful) {
1621 printf("\nAll JIT regression tests are successfully passed.\n");
1622 return 0;
1623 } else {
1624 printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
1625 return 1;
1626 }
1627 }
1628
1629 /* End of pcre_jit_test.c */

  ViewVC Help
Powered by ViewVC 1.1.5