/[pcre]/code/trunk/pcre_jit_test.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_test.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1188 - (show annotations)
Mon Oct 29 16:25:25 2012 UTC (7 years ago) by zherczeg
File MIME type: text/plain
File size: 64874 byte(s)
Fix function declaration in JIT test.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Main Library written by Philip Hazel
9 Copyright (c) 1997-2012 University of Cambridge
10
11 This JIT compiler regression test program was written by Zoltan Herczeg
12 Copyright (c) 2010-2012
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include <stdio.h>
48 #include <string.h>
49 #include "pcre.h"
50
51
52 #include "pcre_internal.h"
53
54 #define PCRE_BUG 0x80000000
55
56 /*
57 Letter characters:
58 \xe6\x92\xad = 0x64ad = 25773 (kanji)
59 Non-letter characters:
60 \xc2\xa1 = 0xa1 = (Inverted Exclamation Mark)
61 \xf3\xa9\xb7\x80 = 0xe9dc0 = 957888
62 \xed\xa0\x80 = 55296 = 0xd800 (Invalid UTF character)
63 \xed\xb0\x80 = 56320 = 0xdc00 (Invalid UTF character)
64 Newlines:
65 \xc2\x85 = 0x85 = 133 (NExt Line = NEL)
66 \xe2\x80\xa8 = 0x2028 = 8232 (Line Separator)
67 Othercase pairs:
68 \xc3\xa9 = 0xe9 = 233 (e')
69 \xc3\x89 = 0xc9 = 201 (E')
70 \xc3\xa1 = 0xe1 = 225 (a')
71 \xc3\x81 = 0xc1 = 193 (A')
72 \xc8\xba = 0x23a = 570
73 \xe2\xb1\xa5 = 0x2c65 = 11365
74 \xe1\xbd\xb8 = 0x1f78 = 8056
75 \xe1\xbf\xb8 = 0x1ff8 = 8184
76 \xf0\x90\x90\x80 = 0x10400 = 66560
77 \xf0\x90\x90\xa8 = 0x10428 = 66600
78 Mark property:
79 \xcc\x8d = 0x30d = 781
80 Special:
81 \xdf\xbf = 0x7ff = 2047 (highest 2 byte character)
82 \xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character)
83 \xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character)
84 \xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character)
85 \xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character)
86 */
87
88 static int regression_tests(void);
89
90 int main(void)
91 {
92 int jit = 0;
93 #if defined SUPPORT_PCRE8
94 pcre_config(PCRE_CONFIG_JIT, &jit);
95 #elif defined SUPPORT_PCRE16
96 pcre16_config(PCRE_CONFIG_JIT, &jit);
97 #elif defined SUPPORT_PCRE32
98 pcre32_config(PCRE_CONFIG_JIT, &jit);
99 #endif
100 if (!jit) {
101 printf("JIT must be enabled to run pcre_jit_test\n");
102 return 1;
103 }
104 return regression_tests();
105 }
106
107 /* --------------------------------------------------------------------------------------- */
108
109 #if !(defined SUPPORT_PCRE8) && !(defined SUPPORT_PCRE16) && !(defined SUPPORT_PCRE32)
110 #error SUPPORT_PCRE8 or SUPPORT_PCRE16 or SUPPORT_PCRE32 must be defined
111 #endif
112
113 #define MUA (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
114 #define MUAP (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
115 #define CMUA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
116 #define CMUAP (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
117 #define MA (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
118 #define MAP (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
119 #define CMA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
120
121 #define OFFSET_MASK 0x00ffff
122 #define F_NO8 0x010000
123 #define F_NO16 0x020000
124 #define F_NO32 0x020000
125 #define F_NOMATCH 0x040000
126 #define F_DIFF 0x080000
127 #define F_FORCECONV 0x100000
128 #define F_PROPERTY 0x200000
129
130 struct regression_test_case {
131 int flags;
132 int start_offset;
133 const char *pattern;
134 const char *input;
135 };
136
137 static struct regression_test_case regression_test_cases[] = {
138 /* Constant strings. */
139 { MUA, 0, "AbC", "AbAbC" },
140 { MUA, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
141 { CMUA, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
142 { MA, 0, "[^a]", "aAbB" },
143 { CMA, 0, "[^m]", "mMnN" },
144 { MA, 0, "a[^b][^#]", "abacd" },
145 { CMA, 0, "A[^B][^E]", "abacd" },
146 { CMUA, 0, "[^x][^#]", "XxBll" },
147 { MUA, 0, "[^a]", "aaa\xc3\xa1#Ab" },
148 { CMUA, 0, "[^A]", "aA\xe6\x92\xad" },
149 { MUA, 0, "\\W(\\W)?\\w", "\r\n+bc" },
150 { MUA, 0, "\\W(\\W)?\\w", "\n\r+bc" },
151 { MUA, 0, "\\W(\\W)?\\w", "\r\r+bc" },
152 { MUA, 0, "\\W(\\W)?\\w", "\n\n+bc" },
153 { MUA, 0, "[axd]", "sAXd" },
154 { CMUA, 0, "[axd]", "sAXd" },
155 { CMUA, 0 | F_NOMATCH, "[^axd]", "DxA" },
156 { MUA, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
157 { MUA, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
158 { CMUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
159 { MUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
160 { MUA, 0, "[^a]", "\xc2\x80[]" },
161 { CMUA, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
162 { CMA, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
163 { PCRE_CASELESS, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
164 { PCRE_CASELESS, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
165 { PCRE_CASELESS, 0, "a1", "Aa1" },
166 { MA, 0, "\\Ca", "cda" },
167 { CMA, 0, "\\Ca", "CDA" },
168 { MA, 0 | F_NOMATCH, "\\Cx", "cda" },
169 { CMA, 0 | F_NOMATCH, "\\Cx", "CDA" },
170 { CMUAP, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
171 { CMUAP, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
172 { CMUAP, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
173 { CMUAP, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
174
175 /* Assertions. */
176 { MUA, 0, "\\b[^A]", "A_B#" },
177 { MA, 0 | F_NOMATCH, "\\b\\W", "\n*" },
178 { MUA, 0, "\\B[^,]\\b[^s]\\b", "#X" },
179 { MAP, 0, "\\B", "_\xa1" },
180 { MAP, 0, "\\b_\\b[,A]\\B", "_," },
181 { MUAP, 0, "\\b", "\xe6\x92\xad!" },
182 { MUAP, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
183 { MUAP, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
184 { MUAP, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
185 { MUA, 0 | F_NOMATCH, "\\b.", "\xcd\xbe" },
186 { CMUAP, 0, "\\By", "\xf0\x90\x90\xa8y" },
187 { MA, 0 | F_NOMATCH, "\\R^", "\n" },
188 { MA, 1 | F_NOMATCH, "^", "\n" },
189 { 0, 0, "^ab", "ab" },
190 { 0, 0 | F_NOMATCH, "^ab", "aab" },
191 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "^a", "\r\raa\n\naa\r\naa" },
192 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
193 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^-", "a--b--\x85--" },
194 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xe2\x80\xa8--" },
195 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xc2\x85--" },
196 { 0, 0, "ab$", "ab" },
197 { 0, 0 | F_NOMATCH, "ab$", "abab\n\n" },
198 { PCRE_DOLLAR_ENDONLY, 0 | F_NOMATCH, "ab$", "abab\r\n" },
199 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "a$", "\r\raa\n\naa\r\naa" },
200 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aaa" },
201 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "#$", "#\xc2\x85###\r#" },
202 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "#$", "#\xe2\x80\xa9" },
203 { PCRE_NOTBOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "^a", "aa\naa" },
204 { PCRE_NOTBOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },
205 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\naa" },
206 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\r\n" },
207 { PCRE_UTF8 | PCRE_DOLLAR_ENDONLY | PCRE_NEWLINE_ANY, 0 | F_PROPERTY, "\\p{Any}{2,}$", "aa\r\n" },
208 { PCRE_NOTEOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },
209 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa" },
210 { PCRE_NEWLINE_CR | PCRE_UTF8, 0, "a\\Z", "aaa\r" },
211 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa\n" },
212 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r" },
213 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
214 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r\n" },
215 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
216 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
217 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
218 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
219 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
220 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
221 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
222 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
223 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
224 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xc2\x85" },
225 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
226 { MA, 0, "\\Aa", "aaa" },
227 { MA, 1 | F_NOMATCH, "\\Aa", "aaa" },
228 { MA, 1, "\\Ga", "aaa" },
229 { MA, 1 | F_NOMATCH, "\\Ga", "aba" },
230 { MA, 0, "a\\z", "aaa" },
231 { MA, 0 | F_NOMATCH, "a\\z", "aab" },
232
233 /* Brackets. */
234 { MUA, 0, "(ab|bb|cd)", "bacde" },
235 { MUA, 0, "(?:ab|a)(bc|c)", "ababc" },
236 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
237 { CMUA, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
238 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
239 { MUA, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
240
241 /* Greedy and non-greedy ? operators. */
242 { MUA, 0, "(?:a)?a", "laab" },
243 { CMUA, 0, "(A)?A", "llaab" },
244 { MUA, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
245 { MUA, 0, "(a)?a", "manm" },
246 { CMUA, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
247 { MUA, 0, "(a|b)?\?d((?:e)?)", "abcde" },
248 { MUA, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
249
250 /* Greedy and non-greedy + operators */
251 { MUA, 0, "(aa)+aa", "aaaaaaa" },
252 { MUA, 0, "(aa)+?aa", "aaaaaaa" },
253 { MUA, 0, "(?:aba|ab|a)+l", "ababamababal" },
254 { MUA, 0, "(?:aba|ab|a)+?l", "ababamababal" },
255 { MUA, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
256 { MUA, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
257 { MUA, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
258
259 /* Greedy and non-greedy * operators */
260 { CMUA, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
261 { MUA, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
262 { MUA, 0, "(aa|ab)*ab", "aaabaaab" },
263 { CMUA, 0, "(aa|Ab)*?aB", "aaabaaab" },
264 { MUA, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
265 { MUA, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
266 { MA, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
267 { MA, 0, "((?:a|)*){0}a", "a" },
268
269 /* Combining ? + * operators */
270 { MUA, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
271 { MUA, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
272 { MUA, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
273 { MUA, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
274 { MUA, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
275
276 /* Single character iterators. */
277 { MUA, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
278 { MUA, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
279 { MUA, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
280 { MUA, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
281 { MUA, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
282 { MUA, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
283 { MUA, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
284 { MUA, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
285 { MUA, 0, "(ba{2})+c", "baabaaabacbaabaac" },
286 { MUA, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
287 { MUA, 0, "(a?+[^b])+", "babaacacb" },
288 { MUA, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
289 { CMUA, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
290 { CMUA, 0, "[c-f]+k", "DemmFke" },
291 { MUA, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
292 { MUA, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
293 { CMUA, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
294 { CMUA, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
295 { CMUA, 0, "[ace]{3,}", "AcbDAcEEcEd" },
296 { CMUA, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
297 { MUA, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
298 { CMUA, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
299 { MUA, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
300 { MUA, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
301 { MUA, 0, "\\b\\w+\\B", "x,a_cd" },
302 { MUAP, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
303 { CMUA, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
304 { CMUAP, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" },
305 { CMUA, 0, "[^\xf0\x90\x90\x80]{2,4}@", "\xf0\x90\x90\xa8\xf0\x90\x90\x80###\xf0\x90\x90\x80@@@" },
306 { CMUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
307 { MUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
308 { MUA, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" },
309
310 /* Basic character sets. */
311 { MUA, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
312 { MUA, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
313 { MUA, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
314 { MUA, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
315 { MUA, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
316 { MUA, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
317
318 /* Unicode properties. */
319 { MUAP, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
320 { MUAP, 0 | F_PROPERTY, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
321 { MUAP, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
322 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}]", "abc" },
323 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}]", "abc" },
324 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
325 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
326 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
327 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
328 { MUAP, 0 | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
329 { MUAP, 0 | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
330 { MUAP, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
331 { CMUAP, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
332 { MUAP, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
333 { MUAP, 0 | F_PROPERTY, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
334 { MUA, 0 | F_PROPERTY, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
335 { CMUAP, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
336 { MUAP, 0 | F_PROPERTY, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
337 { MUAP, 0 | F_PROPERTY, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
338 { PCRE_UCP, 0, "[a-b\\s]{2,5}[^a]", "AB baaa" },
339
340 /* Possible empty brackets. */
341 { MUA, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
342 { MUA, 0, "(|ab||bc|a)+d", "abcxabcabd" },
343 { MUA, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
344 { MUA, 0, "(|ab||bc|a)*d", "abcxabcabd" },
345 { MUA, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
346 { MUA, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
347 { MUA, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
348 { MUA, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
349 { MUA, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
350 { MUA, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
351
352 /* Start offset. */
353 { MUA, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
354 { MUA, 4 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
355 { MUA, 2 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
356 { MUA, 1, "(\\w\\W\\w)+", "ab#d" },
357
358 /* Newline. */
359 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
360 { PCRE_MULTILINE | PCRE_NEWLINE_CR, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
361 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{1,3}[^#]", "\r\n##...." },
362
363 /* Any character except newline or any newline. */
364 { PCRE_NEWLINE_CRLF, 0, ".", "\r" },
365 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
366 { PCRE_NEWLINE_ANYCRLF, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
367 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
368 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
369 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0 | F_NOMATCH, ".(.).", "\xe2\x80\xa8\nb\r" },
370 { PCRE_NEWLINE_ANY, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
371 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
372 { PCRE_BSR_ANYCRLF, 0, "\\R", "\r" },
373 { PCRE_BSR_ANYCRLF, 0, "\\R", "\x85#\r\n#" },
374 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\xe2\x80\xa8#c" },
375 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\r\nc" },
376 { PCRE_NEWLINE_CRLF | PCRE_BSR_UNICODE | PCRE_UTF8, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
377 { MUA, 0 | F_NOMATCH, "\\R+", "ab" },
378 { MUA, 0, "\\R+", "ab\r\n\r" },
379 { MUA, 0, "\\R*", "ab\r\n\r" },
380 { MUA, 0, "\\R*", "\r\n\r" },
381 { MUA, 0, "\\R{2,4}", "\r\nab\r\r" },
382 { MUA, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
383 { MUA, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
384 { MUA, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
385 { MUA, 0 | F_NOMATCH, "\\R+\\R\\R", "\r\n\r\n" },
386 { MUA, 0, "\\R+\\R\\R", "\r\r\r" },
387 { MUA, 0, "\\R*\\R\\R", "\n\r" },
388 { MUA, 0 | F_NOMATCH, "\\R{2,4}\\R\\R", "\r\r\r" },
389 { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
390
391 /* Atomic groups (no fallback from "next" direction). */
392 { MUA, 0 | F_NOMATCH, "(?>ab)ab", "bab" },
393 { MUA, 0 | F_NOMATCH, "(?>(ab))ab", "bab" },
394 { MUA, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
395 "bababcdedefgheijijklmlmnop" },
396 { MUA, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
397 { MUA, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
398 { MUA, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
399 { MUA, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
400 { MUA, 0, "((?>a|)+?)b", "aaacaaab" },
401 { MUA, 0, "(?>x|)*$", "aaa" },
402 { MUA, 0, "(?>(x)|)*$", "aaa" },
403 { MUA, 0, "(?>x|())*$", "aaa" },
404 { MUA, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
405 { MUA, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
406 { MUA, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
407 { MUA, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
408 { MUA, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
409 { MUA, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
410 { MUA, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
411 { MUA, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
412 { MUA, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
413 { MUA, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
414 { MUA, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
415 { MUA, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
416 { MUA, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
417 { MUA, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
418 { CMA, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
419 { MUA, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
420 { MUA, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
421 { MUA, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d" },
422 { MUA, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
423 { MUA, 0 | F_PROPERTY, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
424 { MUA, 0 | F_PROPERTY, "\\X{2,4}", "abcdef" },
425 { MUA, 0 | F_PROPERTY, "\\X{2,4}?", "abcdef" },
426 { MUA, 0 | F_NOMATCH | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d##" },
427 { MUA, 0 | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
428 { MUA, 0, "(c(ab)?+ab)+", "cabcababcab" },
429 { MUA, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
430
431 /* Possessive quantifiers. */
432 { MUA, 0, "(?:a|b)++m", "mababbaaxababbaam" },
433 { MUA, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
434 { MUA, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
435 { MUA, 0, "(a|b)++m", "mababbaaxababbaam" },
436 { MUA, 0, "(a|b)*+m", "mababbaaxababbaam" },
437 { MUA, 0, "(a|b)*+m", "ababbaaxababbaam" },
438 { MUA, 0, "(a|b(*ACCEPT))++m", "maaxab" },
439 { MUA, 0, "(?:b*)++m", "bxbbxbbbxm" },
440 { MUA, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
441 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxm" },
442 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
443 { MUA, 0, "(b*)++m", "bxbbxbbbxm" },
444 { MUA, 0, "(b*)++m", "bxbbxbbbxbbm" },
445 { MUA, 0, "(b*)*+m", "bxbbxbbbxm" },
446 { MUA, 0, "(b*)*+m", "bxbbxbbbxbbm" },
447 { MUA, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
448 { MUA, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
449 { MUA, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
450 { MUA, 0, "(a|(b))++m", "mababbaaxababbaam" },
451 { MUA, 0, "((a)|b)*+m", "mababbaaxababbaam" },
452 { MUA, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
453 { MUA, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
454 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxm" },
455 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
456 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
457 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
458 { MUA, 0, "((b*))++m", "bxbbxbbbxm" },
459 { MUA, 0, "((b*))++m", "bxbbxbbbxbbm" },
460 { MUA, 0, "((b*))*+m", "bxbbxbbbxm" },
461 { MUA, 0, "((b*))*+m", "bxbbxbbbxbbm" },
462 { MUA, 0 | F_NOMATCH, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
463 { MUA, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
464 { MUA, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
465 { MUA, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
466 { MUA, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
467
468 /* Back references. */
469 { MUA, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
470 { CMUA, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
471 { CMA, 0, "(a{2,4})\\1", "AaAaaAaA" },
472 { MUA, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
473 { MUA, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
474 { MUA, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
475 { MUA, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
476 { MUA, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
477 { MUA, 0, "(?:(aa)|b)\\1?b", "bb" },
478 { CMUA, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
479 { MUA, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
480 { CMUA, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
481 { MUA, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
482 { CMA, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
483 { MUA, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
484 { MUA, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
485 { MA, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
486 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
487 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{0,2}", "wwwww." },
488 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwww" },
489 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwwww" },
490 { PCRE_UCP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
491 { CMUAP, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" },
492
493 /* Assertions. */
494 { MUA, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
495 { MUA, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
496 { MUA, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
497 { MUA, 0, "(?<=aaa|aa|a)a", "aaa" },
498 { MUA, 2, "(?<=aaa|aa|a)a", "aaa" },
499 { MA, 0, "(?<=aaa|aa|a)a", "aaa" },
500 { MA, 2, "(?<=aaa|aa|a)a", "aaa" },
501 { MUA, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
502 { MUA, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
503 { MUA, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
504 { MUA, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
505 { MUA, 0, "((?(?=(a))a)+k)", "bbak" },
506 { MUA, 0, "((?(?=a)a)+k)", "bbak" },
507 { MUA, 0 | F_NOMATCH, "(?=(?>(a))m)amk", "a k" },
508 { MUA, 0 | F_NOMATCH, "(?!(?>(a))m)amk", "a k" },
509 { MUA, 0 | F_NOMATCH, "(?>(?=(a))am)amk", "a k" },
510 { MUA, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
511 { MUA, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
512 { MUA, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
513 { MUA, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
514 { MUA, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
515 { MUA, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
516 { MUA, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
517 { MUA, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
518
519 /* Not empty, ACCEPT, FAIL */
520 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*", "bcx" },
521 { MUA | PCRE_NOTEMPTY, 0, "a*", "bcaad" },
522 { MUA | PCRE_NOTEMPTY, 0, "a*?", "bcaad" },
523 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
524 { MUA, 0, "a(*ACCEPT)b", "ab" },
525 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*(*ACCEPT)b", "bcx" },
526 { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
527 { MUA | PCRE_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
528 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "(?:z|a*(*ACCEPT)b)", "bcx" },
529 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
530 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
531 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
532 { MUA | PCRE_NOTEMPTY_ATSTART, 0 | F_NOMATCH, "a*(*ACCEPT)b", "" },
533 { MUA, 0, "((a(*ACCEPT)b))", "ab" },
534 { MUA, 0, "(a(*FAIL)a|a)", "aaa" },
535 { MUA, 0, "(?=ab(*ACCEPT)b)a", "ab" },
536 { MUA, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
537 { MUA, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
538 { MUA | PCRE_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
539
540 /* Conditional blocks. */
541 { MUA, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
542 { MUA, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
543 { MUA, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
544 { MUA, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
545 { MUA, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
546 { MUA, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
547 { MUA, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
548 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
549 { MUA, 0 | F_DIFF, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
550 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
551 { MUA, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
552 { MUA, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
553 { MUA, 0, "(?(?=a)ab)", "a" },
554 { MUA, 0, "(?(?<!b)c)", "b" },
555 { MUA, 0, "(?(DEFINE)a(b))", "a" },
556 { MUA, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
557 { MUA, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
558 { MUA, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
559 { MUA, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
560 { MUA, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
561 { MUA, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
562 { MUA, 0, "(c)?\?(?(1)a|b)", "cbb" },
563 { MUA, 0 | F_DIFF, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
564 { MUA, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
565 { MUA, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
566 { MUA, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
567 { MUA, 0 | F_DIFF, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
568 { MUA, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
569 { MUA, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
570 { MUA, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
571 { MUA, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
572 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)*l", "bc ddd abccabccl" },
573 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+?dd", "bcabcacdb bdddd" },
574 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+l", "ababccddabdbccd abcccl" },
575
576 /* Set start of match. */
577 { MUA, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
578 { MUA, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
579 { MUA, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
580 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a\\K(*ACCEPT)b", "aa" },
581 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
582
583 /* First line. */
584 { MUA | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}a", "bb\naaa" },
585 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}a", "bb\r\naaa" },
586 { MUA | PCRE_FIRSTLINE, 0, "(?<=a)", "a" },
587 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[^a][^b]", "ab" },
588 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "a", "\na" },
589 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[abc]", "\na" },
590 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^a", "\na" },
591 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^(?<=\n)", "\na" },
592 { MUA | PCRE_FIRSTLINE, 0, "\xf0\x90\x90\x80", "\xf0\x90\x90\x80" },
593 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\xc2\x85#" },
594 { PCRE_MULTILINE | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\x85#" },
595 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^#", "\xe2\x80\xa8#" },
596 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}", "\r\na" },
597 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, ".", "\r" },
598 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "a", "\ra" },
599 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH, "ba", "bbb\r\nba" },
600 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}{4}|a", "\r\na" },
601 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 1, ".", "\r\n" },
602 { PCRE_FIRSTLINE | PCRE_NEWLINE_LF | PCRE_DOTALL, 0 | F_NOMATCH, "ab.", "ab" },
603
604 /* Recurse. */
605 { MUA, 0, "(a)(?1)", "aa" },
606 { MUA, 0, "((a))(?1)", "aa" },
607 { MUA, 0, "(b|a)(?1)", "aa" },
608 { MUA, 0, "(b|(a))(?1)", "aa" },
609 { MUA, 0 | F_NOMATCH, "((a)(b)(?:a*))(?1)", "aba" },
610 { MUA, 0, "((a)(b)(?:a*))(?1)", "abab" },
611 { MUA, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
612 { MUA, 0, "((?2)b|(a)){2}(?1)", "aabab" },
613 { MUA, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
614 { MUA, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
615 { MUA, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
616 { MUA, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
617 { MUA, 0 | F_NOMATCH, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
618 { MUA, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
619 { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
620 { MUA, 0 | F_NOMATCH, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
621 { MUA, 0, "b|<(?R)*>", "<<b>" },
622 { MUA, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
623 { MUA, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
624 { MUA, 0, "(a)((?(R)a|b))(?2)", "aabbabaa" },
625 { MUA, 0, "(a)((?(R2)a|b))(?2)", "aabbabaa" },
626 { MUA, 0, "(a)((?(R1)a|b))(?2)", "ababba" },
627 { MUA, 0, "(?(R0)aa|bb(?R))", "abba aabb bbaa" },
628 { MUA, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" },
629 { MUA, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" },
630
631 /* 16 bit specific tests. */
632 { CMA, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" },
633 { CMA, 0 | F_FORCECONV, "\xe1\xbd\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
634 { CMA, 0 | F_FORCECONV, "[\xc3\xa1]", "\xc3\x81\xc3\xa1" },
635 { CMA, 0 | F_FORCECONV, "[\xe1\xbd\xb8]", "\xe1\xbf\xb8\xe1\xbd\xb8" },
636 { CMA, 0 | F_FORCECONV, "[a-\xed\xb0\x80]", "A" },
637 { CMA, 0 | F_NO8 | F_FORCECONV, "[a-\\x{dc00}]", "B" },
638 { CMA, 0 | F_NO8 | F_NOMATCH | F_FORCECONV, "[b-\\x{dc00}]", "a" },
639 { CMA, 0 | F_NO8 | F_FORCECONV, "\xed\xa0\x80\\x{d800}\xed\xb0\x80\\x{dc00}", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80" },
640 { CMA, 0 | F_NO8 | F_FORCECONV, "[\xed\xa0\x80\\x{d800}]{1,2}?[\xed\xb0\x80\\x{dc00}]{1,2}?#", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80#" },
641 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80\xed\xb0\x80#]{0,3}(?<=\xed\xb0\x80.)", "\xed\xa0\x80#\xed\xa0\x80##\xed\xb0\x80\xed\xa0\x80" },
642 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\x9f\xbf\xed\xa0\x83" },
643 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\xb4\x80\xed\xb3\xb0" },
644 { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" },
645 { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" },
646 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" },
647 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80][\xed\xb0\x80]{2,}", "\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" },
648 { MA, 0 | F_FORCECONV, "[^\xed\xb0\x80]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
649 { MA, 0 | F_NO8 | F_FORCECONV, "[^\\x{dc00}]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
650 { CMA, 0 | F_FORCECONV, ".\\B.", "\xed\xa0\x80\xed\xb0\x80" },
651 { CMA, 0 | F_FORCECONV, "\\D+(?:\\d+|.)\\S+(?:\\s+|.)\\W+(?:\\w+|.)\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80" },
652 { CMA, 0 | F_FORCECONV, "\\d*\\s*\\w*\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80" },
653 { CMA, 0 | F_FORCECONV | F_NOMATCH, "\\d*?\\D*?\\s*?\\S*?\\w*?\\W*?##", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80#" },
654 { CMA | PCRE_EXTENDED, 0 | F_FORCECONV, "\xed\xa0\x80 \xed\xb0\x80 !", "\xed\xa0\x80\xed\xb0\x80!" },
655 { CMA, 0 | F_FORCECONV, "\xed\xa0\x80+#[^#]+\xed\xa0\x80", "\xed\xa0\x80#a\xed\xa0\x80" },
656 { CMA, 0 | F_FORCECONV, "(\xed\xa0\x80+)#\\1", "\xed\xa0\x80\xed\xa0\x80#\xed\xa0\x80\xed\xa0\x80" },
657 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0 | F_NO8 | F_FORCECONV, "^-", "a--\xe2\x80\xa8--" },
658 { PCRE_BSR_UNICODE, 0 | F_NO8 | F_FORCECONV, "\\R", "ab\xe2\x80\xa8" },
659 { 0, 0 | F_NO8 | F_FORCECONV, "\\v", "ab\xe2\x80\xa9" },
660 { 0, 0 | F_NO8 | F_FORCECONV, "\\h", "ab\xe1\xa0\x8e" },
661 { 0, 0 | F_NO8 | F_FORCECONV, "\\v+?\\V+?#", "\xe2\x80\xa9\xe2\x80\xa9\xef\xbf\xbf\xef\xbf\xbf#" },
662 { 0, 0 | F_NO8 | F_FORCECONV, "\\h+?\\H+?#", "\xe1\xa0\x8e\xe1\xa0\x8e\xef\xbf\xbf\xef\xbf\xbf#" },
663
664 /* Partial matching. */
665 { MUA | PCRE_PARTIAL_SOFT, 0, "ab", "a" },
666 { MUA | PCRE_PARTIAL_SOFT, 0, "ab|a", "a" },
667 { MUA | PCRE_PARTIAL_HARD, 0, "ab|a", "a" },
668 { MUA | PCRE_PARTIAL_SOFT, 0, "\\b#", "a" },
669 { MUA | PCRE_PARTIAL_SOFT, 0, "(?<=a)b", "a" },
670 { MUA | PCRE_PARTIAL_SOFT, 0, "abc|(?<=xxa)bc", "xxab" },
671 { MUA | PCRE_PARTIAL_SOFT, 0, "a\\B", "a" },
672 { MUA | PCRE_PARTIAL_HARD, 0, "a\\b", "a" },
673
674 /* (*MARK) verb. */
675 { MUA, 0, "a(*MARK:aa)a", "ababaa" },
676 { MUA, 0 | F_NOMATCH, "a(*:aa)a", "abab" },
677 { MUA, 0, "a(*:aa)(b(*:bb)b|bc)", "abc" },
678 { MUA, 0 | F_NOMATCH, "a(*:1)x|b(*:2)y", "abc" },
679 { MUA, 0, "(?>a(*:aa))b|ac", "ac" },
680 { MUA, 0, "(?(DEFINE)(a(*:aa)))(?1)", "a" },
681 { MUA, 0 | F_NOMATCH, "(?(DEFINE)((a)(*:aa)))(?1)b", "aa" },
682 { MUA, 0, "(?(DEFINE)(a(*:aa)))a(?1)b|aac", "aac" },
683 { MUA, 0, "(a(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
684 { MUA, 0, "(a(*:aa)){0}(?:b(?1)b)+", "babba" },
685 { MUA, 0 | F_NOMATCH, "(a(*:aa)){0}(?:b(?1)b)+", "ba" },
686 { MUA, 0, "(a\\K(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
687 { MUA, 0, "(a\\K(*:aa)){0}(?:b(?1)b)+", "babba" },
688 { MUA, 0 | F_NOMATCH, "(a\\K(*:aa)){0}(?:b(?1)b)+", "ba" },
689
690 /* (*COMMIT) verb. */
691 { MUA, 0 | F_NOMATCH, "a(*COMMIT)b", "ac" },
692 { MUA, 0, "aa(*COMMIT)b", "xaxaab" },
693 { MUA, 0 | F_NOMATCH, "a(*COMMIT)(*:msg)b|ac", "ac" },
694 { MUA, 0, "(?=a(*COMMIT)b|ac)ac|(*:m)(a)c", "ac" },
695 { MUA, 0, "(?!a(*COMMIT)(*:msg)b)a(c)|cd", "acd" },
696
697 /* Deep recursion. */
698 { MUA, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
699 { MUA, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
700 { MUA, 0, "((a?)+)+b", "aaaaaaaaaaaa b" },
701
702 /* Deep recursion: Stack limit reached. */
703 { MA, 0 | F_NOMATCH, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
704 { MA, 0 | F_NOMATCH, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
705 { MA, 0 | F_NOMATCH, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
706 { MA, 0 | F_NOMATCH, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
707 { MA, 0 | F_NOMATCH, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
708
709 { 0, 0, NULL, NULL }
710 };
711
712 static const unsigned char *tables(int mode)
713 {
714 /* The purpose of this function to allow valgrind
715 for reporting invalid reads and writes. */
716 static unsigned char *tables_copy;
717 const char *errorptr;
718 int erroroffset;
719 unsigned char *default_tables;
720 #if defined SUPPORT_PCRE8
721 pcre *regex;
722 char null_str[1] = { 0 };
723 #elif defined SUPPORT_PCRE16
724 pcre16 *regex;
725 PCRE_UCHAR16 null_str[1] = { 0 };
726 #elif defined SUPPORT_PCRE32
727 pcre32 *regex;
728 PCRE_UCHAR32 null_str[1] = { 0 };
729 #endif
730
731 if (mode) {
732 if (tables_copy)
733 free(tables_copy);
734 tables_copy = NULL;
735 return NULL;
736 }
737
738 if (tables_copy)
739 return tables_copy;
740
741 default_tables = NULL;
742 #if defined SUPPORT_PCRE8
743 regex = pcre_compile(null_str, 0, &errorptr, &erroroffset, NULL);
744 if (regex) {
745 pcre_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
746 pcre_free(regex);
747 }
748 #elif defined SUPPORT_PCRE16
749 regex = pcre16_compile(null_str, 0, &errorptr, &erroroffset, NULL);
750 if (regex) {
751 pcre16_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
752 pcre16_free(regex);
753 }
754 #elif defined SUPPORT_PCRE32
755 regex = pcre32_compile(null_str, 0, &errorptr, &erroroffset, NULL);
756 if (regex) {
757 pcre32_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
758 pcre32_free(regex);
759 }
760 #endif
761 /* Shouldn't ever happen. */
762 if (!default_tables)
763 return NULL;
764
765 /* Unfortunately this value cannot get from pcre_fullinfo.
766 Since this is a test program, this is acceptable at the moment. */
767 tables_copy = (unsigned char *)malloc(1088);
768 if (!tables_copy)
769 return NULL;
770
771 memcpy(tables_copy, default_tables, 1088);
772 return tables_copy;
773 }
774
775 #ifdef SUPPORT_PCRE8
776 static pcre_jit_stack* callback8(void *arg)
777 {
778 return (pcre_jit_stack *)arg;
779 }
780 #endif
781
782 #ifdef SUPPORT_PCRE16
783 static pcre16_jit_stack* callback16(void *arg)
784 {
785 return (pcre16_jit_stack *)arg;
786 }
787 #endif
788
789 #ifdef SUPPORT_PCRE32
790 static pcre32_jit_stack* callback32(void *arg)
791 {
792 return (pcre32_jit_stack *)arg;
793 }
794 #endif
795
796 #ifdef SUPPORT_PCRE8
797 static pcre_jit_stack *stack8;
798
799 static pcre_jit_stack *getstack8(void)
800 {
801 if (!stack8)
802 stack8 = pcre_jit_stack_alloc(1, 1024 * 1024);
803 return stack8;
804 }
805
806 static void setstack8(pcre_extra *extra)
807 {
808 if (!extra) {
809 if (stack8)
810 pcre_jit_stack_free(stack8);
811 stack8 = NULL;
812 return;
813 }
814
815 pcre_assign_jit_stack(extra, callback8, getstack8());
816 }
817 #endif /* SUPPORT_PCRE8 */
818
819 #ifdef SUPPORT_PCRE16
820 static pcre16_jit_stack *stack16;
821
822 static pcre16_jit_stack *getstack16(void)
823 {
824 if (!stack16)
825 stack16 = pcre16_jit_stack_alloc(1, 1024 * 1024);
826 return stack16;
827 }
828
829 static void setstack16(pcre16_extra *extra)
830 {
831 if (!extra) {
832 if (stack16)
833 pcre16_jit_stack_free(stack16);
834 stack16 = NULL;
835 return;
836 }
837
838 pcre16_assign_jit_stack(extra, callback16, getstack16());
839 }
840 #endif /* SUPPORT_PCRE8 */
841
842 #ifdef SUPPORT_PCRE32
843 static pcre32_jit_stack *stack32;
844
845 static pcre32_jit_stack *getstack32(void)
846 {
847 if (!stack32)
848 stack32 = pcre32_jit_stack_alloc(1, 1024 * 1024);
849 return stack32;
850 }
851
852 static void setstack32(pcre32_extra *extra)
853 {
854 if (!extra) {
855 if (stack32)
856 pcre32_jit_stack_free(stack32);
857 stack32 = NULL;
858 return;
859 }
860
861 pcre32_assign_jit_stack(extra, callback32, getstack32());
862 }
863 #endif /* SUPPORT_PCRE8 */
864
865 #ifdef SUPPORT_PCRE16
866
867 static int convert_utf8_to_utf16(const char *input, PCRE_UCHAR16 *output, int *offsetmap, int max_length)
868 {
869 unsigned char *iptr = (unsigned char*)input;
870 PCRE_UCHAR16 *optr = output;
871 unsigned int c;
872
873 if (max_length == 0)
874 return 0;
875
876 while (*iptr && max_length > 1) {
877 c = 0;
878 if (offsetmap)
879 *offsetmap++ = (int)(iptr - (unsigned char*)input);
880
881 if (!(*iptr & 0x80))
882 c = *iptr++;
883 else if (!(*iptr & 0x20)) {
884 c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
885 iptr += 2;
886 } else if (!(*iptr & 0x10)) {
887 c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
888 iptr += 3;
889 } else if (!(*iptr & 0x08)) {
890 c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
891 iptr += 4;
892 }
893
894 if (c < 65536) {
895 *optr++ = c;
896 max_length--;
897 } else if (max_length <= 2) {
898 *optr = '\0';
899 return (int)(optr - output);
900 } else {
901 c -= 0x10000;
902 *optr++ = 0xd800 | ((c >> 10) & 0x3ff);
903 *optr++ = 0xdc00 | (c & 0x3ff);
904 max_length -= 2;
905 if (offsetmap)
906 offsetmap++;
907 }
908 }
909 if (offsetmap)
910 *offsetmap = (int)(iptr - (unsigned char*)input);
911 *optr = '\0';
912 return (int)(optr - output);
913 }
914
915 static int copy_char8_to_char16(const char *input, PCRE_UCHAR16 *output, int max_length)
916 {
917 unsigned char *iptr = (unsigned char*)input;
918 PCRE_UCHAR16 *optr = output;
919
920 if (max_length == 0)
921 return 0;
922
923 while (*iptr && max_length > 1) {
924 *optr++ = *iptr++;
925 max_length--;
926 }
927 *optr = '\0';
928 return (int)(optr - output);
929 }
930
931 #define REGTEST_MAX_LENGTH16 4096
932 static PCRE_UCHAR16 regtest_buf16[REGTEST_MAX_LENGTH16];
933 static int regtest_offsetmap16[REGTEST_MAX_LENGTH16];
934
935 #endif /* SUPPORT_PCRE16 */
936
937 #ifdef SUPPORT_PCRE32
938
939 static int convert_utf8_to_utf32(const char *input, PCRE_UCHAR32 *output, int *offsetmap, int max_length)
940 {
941 unsigned char *iptr = (unsigned char*)input;
942 PCRE_UCHAR32 *optr = output;
943 unsigned int c;
944
945 if (max_length == 0)
946 return 0;
947
948 while (*iptr && max_length > 1) {
949 c = 0;
950 if (offsetmap)
951 *offsetmap++ = (int)(iptr - (unsigned char*)input);
952
953 if (!(*iptr & 0x80))
954 c = *iptr++;
955 else if (!(*iptr & 0x20)) {
956 c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
957 iptr += 2;
958 } else if (!(*iptr & 0x10)) {
959 c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
960 iptr += 3;
961 } else if (!(*iptr & 0x08)) {
962 c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
963 iptr += 4;
964 }
965
966 *optr++ = c;
967 max_length--;
968 }
969 if (offsetmap)
970 *offsetmap = (int)(iptr - (unsigned char*)input);
971 *optr = 0;
972 return (int)(optr - output);
973 }
974
975 static int copy_char8_to_char32(const char *input, PCRE_UCHAR32 *output, int max_length)
976 {
977 unsigned char *iptr = (unsigned char*)input;
978 PCRE_UCHAR32 *optr = output;
979
980 if (max_length == 0)
981 return 0;
982
983 while (*iptr && max_length > 1) {
984 *optr++ = *iptr++;
985 max_length--;
986 }
987 *optr = '\0';
988 return (int)(optr - output);
989 }
990
991 #define REGTEST_MAX_LENGTH32 4096
992 static PCRE_UCHAR32 regtest_buf32[REGTEST_MAX_LENGTH32];
993 static int regtest_offsetmap32[REGTEST_MAX_LENGTH32];
994
995 #endif /* SUPPORT_PCRE32 */
996
997 static int check_ascii(const char *input)
998 {
999 const unsigned char *ptr = (unsigned char *)input;
1000 while (*ptr) {
1001 if (*ptr > 127)
1002 return 0;
1003 ptr++;
1004 }
1005 return 1;
1006 }
1007
1008 static int regression_tests(void)
1009 {
1010 struct regression_test_case *current = regression_test_cases;
1011 const char *error;
1012 char *cpu_info;
1013 int i, err_offs;
1014 int is_successful, is_ascii_pattern, is_ascii_input;
1015 int total = 0;
1016 int successful = 0;
1017 int successful_row = 0;
1018 int counter = 0;
1019 int study_mode;
1020 int utf = 0, ucp = 0;
1021 int disabled_flags = 0;
1022 #ifdef SUPPORT_PCRE8
1023 pcre *re8;
1024 pcre_extra *extra8;
1025 pcre_extra dummy_extra8;
1026 int ovector8_1[32];
1027 int ovector8_2[32];
1028 int return_value8[2];
1029 unsigned char *mark8_1, *mark8_2;
1030 #endif
1031 #ifdef SUPPORT_PCRE16
1032 pcre16 *re16;
1033 pcre16_extra *extra16;
1034 pcre16_extra dummy_extra16;
1035 int ovector16_1[32];
1036 int ovector16_2[32];
1037 int return_value16[2];
1038 PCRE_UCHAR16 *mark16_1, *mark16_2;
1039 int length16;
1040 #endif
1041 #ifdef SUPPORT_PCRE32
1042 pcre32 *re32;
1043 pcre32_extra *extra32;
1044 pcre32_extra dummy_extra32;
1045 int ovector32_1[32];
1046 int ovector32_2[32];
1047 int return_value32[2];
1048 PCRE_UCHAR32 *mark32_1, *mark32_2;
1049 int length32;
1050 #endif
1051
1052 /* This test compares the behaviour of interpreter and JIT. Although disabling
1053 utf or ucp may make tests fail, if the pcre_exec result is the SAME, it is
1054 still considered successful from pcre_jit_test point of view. */
1055
1056 #if defined SUPPORT_PCRE8
1057 pcre_config(PCRE_CONFIG_JITTARGET, &cpu_info);
1058 #elif defined SUPPORT_PCRE16
1059 pcre16_config(PCRE_CONFIG_JITTARGET, &cpu_info);
1060 #elif defined SUPPORT_PCRE32
1061 pcre32_config(PCRE_CONFIG_JITTARGET, &cpu_info);
1062 #endif
1063
1064 printf("Running JIT regression tests\n");
1065 printf(" target CPU of SLJIT compiler: %s\n", cpu_info);
1066
1067 #if defined SUPPORT_PCRE8
1068 pcre_config(PCRE_CONFIG_UTF8, &utf);
1069 pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
1070 #elif defined SUPPORT_PCRE16
1071 pcre16_config(PCRE_CONFIG_UTF16, &utf);
1072 pcre16_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
1073 #elif defined SUPPORT_PCRE16
1074 pcre32_config(PCRE_CONFIG_UTF32, &utf);
1075 pcre32_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
1076 #endif
1077
1078 if (!utf)
1079 disabled_flags |= PCRE_UTF8 | PCRE_UTF16 | PCRE_UTF32;
1080 if (!ucp)
1081 disabled_flags |= PCRE_UCP;
1082 #ifdef SUPPORT_PCRE8
1083 printf(" in 8 bit mode with UTF-8 %s and ucp %s:\n", utf ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
1084 #endif
1085 #ifdef SUPPORT_PCRE16
1086 printf(" in 16 bit mode with UTF-16 %s and ucp %s:\n", utf ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
1087 #endif
1088 #ifdef SUPPORT_PCRE32
1089 printf(" in 32 bit mode with UTF-32 %s and ucp %s:\n", utf ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
1090 #endif
1091
1092 while (current->pattern) {
1093 /* printf("\nPattern: %s :\n", current->pattern); */
1094 total++;
1095 if (current->start_offset & F_PROPERTY) {
1096 is_ascii_pattern = 0;
1097 is_ascii_input = 0;
1098 } else {
1099 is_ascii_pattern = check_ascii(current->pattern);
1100 is_ascii_input = check_ascii(current->input);
1101 }
1102
1103 if (current->flags & PCRE_PARTIAL_SOFT)
1104 study_mode = PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE;
1105 else if (current->flags & PCRE_PARTIAL_HARD)
1106 study_mode = PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE;
1107 else
1108 study_mode = PCRE_STUDY_JIT_COMPILE;
1109 error = NULL;
1110 #ifdef SUPPORT_PCRE8
1111 re8 = NULL;
1112 if (!(current->start_offset & F_NO8))
1113 re8 = pcre_compile(current->pattern,
1114 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags),
1115 &error, &err_offs, tables(0));
1116
1117 extra8 = NULL;
1118 if (re8) {
1119 error = NULL;
1120 extra8 = pcre_study(re8, study_mode, &error);
1121 if (!extra8) {
1122 printf("\n8 bit: Cannot study pattern: %s\n", current->pattern);
1123 pcre_free(re8);
1124 re8 = NULL;
1125 }
1126 else if (!(extra8->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
1127 printf("\n8 bit: JIT compiler does not support: %s\n", current->pattern);
1128 pcre_free_study(extra8);
1129 pcre_free(re8);
1130 re8 = NULL;
1131 }
1132 extra8->flags |= PCRE_EXTRA_MARK;
1133 } else if (((utf && ucp) || is_ascii_pattern) && !(current->start_offset & F_NO8))
1134 printf("\n8 bit: Cannot compile pattern \"%s\": %s\n", current->pattern, error);
1135 #endif
1136 #ifdef SUPPORT_PCRE16
1137 if ((current->flags & PCRE_UTF16) || (current->start_offset & F_FORCECONV))
1138 convert_utf8_to_utf16(current->pattern, regtest_buf16, NULL, REGTEST_MAX_LENGTH16);
1139 else
1140 copy_char8_to_char16(current->pattern, regtest_buf16, REGTEST_MAX_LENGTH16);
1141
1142 re16 = NULL;
1143 if (!(current->start_offset & F_NO16))
1144 re16 = pcre16_compile(regtest_buf16,
1145 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags),
1146 &error, &err_offs, tables(0));
1147
1148 extra16 = NULL;
1149 if (re16) {
1150 error = NULL;
1151 extra16 = pcre16_study(re16, study_mode, &error);
1152 if (!extra16) {
1153 printf("\n16 bit: Cannot study pattern: %s\n", current->pattern);
1154 pcre16_free(re16);
1155 re16 = NULL;
1156 }
1157 else if (!(extra16->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
1158 printf("\n16 bit: JIT compiler does not support: %s\n", current->pattern);
1159 pcre16_free_study(extra16);
1160 pcre16_free(re16);
1161 re16 = NULL;
1162 }
1163 extra16->flags |= PCRE_EXTRA_MARK;
1164 } else if (((utf && ucp) || is_ascii_pattern) && !(current->start_offset & F_NO16))
1165 printf("\n16 bit: Cannot compile pattern \"%s\": %s\n", current->pattern, error);
1166 #endif
1167 #ifdef SUPPORT_PCRE32
1168 if ((current->flags & PCRE_UTF32) || (current->start_offset & F_FORCECONV))
1169 convert_utf8_to_utf32(current->pattern, regtest_buf32, NULL, REGTEST_MAX_LENGTH32);
1170 else
1171 copy_char8_to_char32(current->pattern, regtest_buf32, REGTEST_MAX_LENGTH32);
1172
1173 re32 = NULL;
1174 if (!(current->start_offset & F_NO32))
1175 re32 = pcre32_compile(regtest_buf32,
1176 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags),
1177 &error, &err_offs, tables(0));
1178
1179 extra32 = NULL;
1180 if (re32) {
1181 error = NULL;
1182 extra32 = pcre32_study(re32, study_mode, &error);
1183 if (!extra32) {
1184 printf("\n32 bit: Cannot study pattern: %s\n", current->pattern);
1185 pcre32_free(re32);
1186 re32 = NULL;
1187 }
1188 if (!(extra32->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
1189 printf("\n32 bit: JIT compiler does not support: %s\n", current->pattern);
1190 pcre32_free_study(extra32);
1191 pcre32_free(re32);
1192 re32 = NULL;
1193 }
1194 extra32->flags |= PCRE_EXTRA_MARK;
1195 } else if (((utf && ucp) || is_ascii_pattern) && !(current->start_offset & F_NO32))
1196 printf("\n32 bit: Cannot compile pattern \"%s\": %s\n", current->pattern, error);
1197 #endif
1198
1199 counter++;
1200 if ((counter & 0x3) != 0) {
1201 #ifdef SUPPORT_PCRE8
1202 setstack8(NULL);
1203 #endif
1204 #ifdef SUPPORT_PCRE16
1205 setstack16(NULL);
1206 #endif
1207 #ifdef SUPPORT_PCRE32
1208 setstack32(NULL);
1209 #endif
1210 }
1211
1212 #ifdef SUPPORT_PCRE8
1213 return_value8[0] = -1000;
1214 return_value8[1] = -1000;
1215 for (i = 0; i < 32; ++i)
1216 ovector8_1[i] = -2;
1217 for (i = 0; i < 32; ++i)
1218 ovector8_2[i] = -2;
1219 if (re8) {
1220 mark8_1 = NULL;
1221 mark8_2 = NULL;
1222 extra8->mark = &mark8_1;
1223
1224 if ((counter & 0x1) != 0) {
1225 setstack8(extra8);
1226 return_value8[0] = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1227 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_1, 32);
1228 } else
1229 return_value8[0] = pcre_jit_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1230 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_1, 32, getstack8());
1231 memset(&dummy_extra8, 0, sizeof(pcre_extra));
1232 dummy_extra8.flags = PCRE_EXTRA_MARK;
1233 dummy_extra8.mark = &mark8_2;
1234 return_value8[1] = pcre_exec(re8, &dummy_extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1235 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_2, 32);
1236 }
1237 #endif
1238
1239 #ifdef SUPPORT_PCRE16
1240 return_value16[0] = -1000;
1241 return_value16[1] = -1000;
1242 for (i = 0; i < 32; ++i)
1243 ovector16_1[i] = -2;
1244 for (i = 0; i < 32; ++i)
1245 ovector16_2[i] = -2;
1246 if (re16) {
1247 mark16_1 = NULL;
1248 mark16_2 = NULL;
1249 if ((current->flags & PCRE_UTF16) || (current->start_offset & F_FORCECONV))
1250 length16 = convert_utf8_to_utf16(current->input, regtest_buf16, regtest_offsetmap16, REGTEST_MAX_LENGTH16);
1251 else
1252 length16 = copy_char8_to_char16(current->input, regtest_buf16, REGTEST_MAX_LENGTH16);
1253 extra16->mark = &mark16_1;
1254 if ((counter & 0x1) != 0) {
1255 setstack16(extra16);
1256 return_value16[0] = pcre16_exec(re16, extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
1257 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_1, 32);
1258 } else
1259 return_value16[0] = pcre16_jit_exec(re16, extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
1260 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_1, 32, getstack16());
1261 memset(&dummy_extra16, 0, sizeof(pcre16_extra));
1262 dummy_extra16.flags = PCRE_EXTRA_MARK;
1263 dummy_extra16.mark = &mark16_2;
1264 return_value16[1] = pcre16_exec(re16, &dummy_extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
1265 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_2, 32);
1266 }
1267 #endif
1268
1269 #ifdef SUPPORT_PCRE32
1270 return_value32[0] = -1000;
1271 return_value32[1] = -1000;
1272 for (i = 0; i < 32; ++i)
1273 ovector32_1[i] = -2;
1274 for (i = 0; i < 32; ++i)
1275 ovector32_2[i] = -2;
1276 if (re32) {
1277 mark32_1 = NULL;
1278 mark32_2 = NULL;
1279 if ((current->flags & PCRE_UTF32) || (current->start_offset & F_FORCECONV))
1280 length32 = convert_utf8_to_utf32(current->input, regtest_buf32, regtest_offsetmap32, REGTEST_MAX_LENGTH32);
1281 else
1282 length32 = copy_char8_to_char32(current->input, regtest_buf32, REGTEST_MAX_LENGTH32);
1283 extra32->mark = &mark32_1;
1284 if ((counter & 0x1) != 0) {
1285 setstack32(extra32);
1286 return_value32[0] = pcre32_exec(re32, extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
1287 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector32_1, 32);
1288 } else
1289 return_value32[0] = pcre32_jit_exec(re32, extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
1290 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector32_1, 32, getstack32());
1291 memset(&dummy_extra32, 0, sizeof(pcre32_extra));
1292 dummy_extra32.flags = PCRE_EXTRA_MARK;
1293 dummy_extra32.mark = &mark32_2;
1294 return_value32[1] = pcre32_exec(re32, &dummy_extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
1295 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector32_2, 32);
1296 }
1297 #endif
1298
1299 /* printf("[%d-%d-%d|%d-%d|%d-%d|%d-%d]%s",
1300 return_value8[0], return_value16[0],
1301 ovector8_1[0], ovector8_1[1],
1302 ovector16_1[0], ovector16_1[1],
1303 ovector32_1[0], ovector32_1[1],
1304 (current->flags & PCRE_CASELESS) ? "C" : ""); */
1305
1306 /* If F_DIFF is set, just run the test, but do not compare the results.
1307 Segfaults can still be captured. */
1308
1309 is_successful = 1;
1310 if (!(current->start_offset & F_DIFF)) {
1311 #if defined SUPPORT_UTF && ((defined(SUPPORT_PCRE8) + defined(SUPPORT_PCRE16) + defined(SUPPORT_PCRE32)) >= 2)
1312 if (!(current->start_offset & F_FORCECONV)) {
1313 int return_value;
1314
1315 /* All results must be the same. */
1316 #ifdef SUPPORT_PCRE8
1317 if ((return_value = return_value8[0]) != return_value8[1]) {
1318 printf("\n8 bit: Return value differs(J8:%d,I8:%d): [%d] '%s' @ '%s'\n",
1319 return_value8[0], return_value8[1], total, current->pattern, current->input);
1320 is_successful = 0;
1321 } else
1322 #endif
1323 #ifdef SUPPORT_PCRE16
1324 if ((return_value = return_value16[0]) != return_value16[1]) {
1325 printf("\n16 bit: Return value differs(J16:%d,I16:%d): [%d] '%s' @ '%s'\n",
1326 return_value16[0], return_value16[1], total, current->pattern, current->input);
1327 is_successful = 0;
1328 } else
1329 #endif
1330 #ifdef SUPPORT_PCRE32
1331 if ((return_value = return_value32[0]) != return_value32[1]) {
1332 printf("\n32 bit: Return value differs(J32:%d,I32:%d): [%d] '%s' @ '%s'\n",
1333 return_value32[0], return_value32[1], total, current->pattern, current->input);
1334 is_successful = 0;
1335 } else
1336 #endif
1337 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1338 if (return_value8[0] != return_value16[0]) {
1339 printf("\n8 and 16 bit: Return value differs(J8:%d,J16:%d): [%d] '%s' @ '%s'\n",
1340 return_value8[0], return_value16[0],
1341 total, current->pattern, current->input);
1342 is_successful = 0;
1343 } else
1344 #endif
1345 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE32
1346 if (return_value8[0] != return_value32[0]) {
1347 printf("\n8 and 32 bit: Return value differs(J8:%d,J32:%d): [%d] '%s' @ '%s'\n",
1348 return_value8[0], return_value32[0],
1349 total, current->pattern, current->input);
1350 is_successful = 0;
1351 } else
1352 #endif
1353 #if defined SUPPORT_PCRE16 && defined SUPPORT_PCRE32
1354 if (return_value16[0] != return_value32[0]) {
1355 printf("\n16 and 32 bit: Return value differs(J16:%d,J32:%d): [%d] '%s' @ '%s'\n",
1356 return_value16[0], return_value32[0],
1357 total, current->pattern, current->input);
1358 is_successful = 0;
1359 } else
1360 #endif
1361 if (return_value >= 0 || return_value == PCRE_ERROR_PARTIAL) {
1362 if (return_value == PCRE_ERROR_PARTIAL) {
1363 return_value = 2;
1364 } else {
1365 return_value *= 2;
1366 }
1367 #ifdef SUPPORT_PCRE8
1368 return_value8[0] = return_value;
1369 #endif
1370 #ifdef SUPPORT_PCRE16
1371 return_value16[0] = return_value;
1372 #endif
1373 #ifdef SUPPORT_PCRE32
1374 return_value32[0] = return_value;
1375 #endif
1376 /* Transform back the results. */
1377 if (current->flags & PCRE_UTF8) {
1378 #ifdef SUPPORT_PCRE16
1379 for (i = 0; i < return_value; ++i) {
1380 if (ovector16_1[i] >= 0)
1381 ovector16_1[i] = regtest_offsetmap16[ovector16_1[i]];
1382 if (ovector16_2[i] >= 0)
1383 ovector16_2[i] = regtest_offsetmap16[ovector16_2[i]];
1384 }
1385 #endif
1386 #ifdef SUPPORT_PCRE32
1387 for (i = 0; i < return_value; ++i) {
1388 if (ovector32_1[i] >= 0)
1389 ovector32_1[i] = regtest_offsetmap32[ovector32_1[i]];
1390 if (ovector32_2[i] >= 0)
1391 ovector32_2[i] = regtest_offsetmap32[ovector32_2[i]];
1392 }
1393 #endif
1394 }
1395
1396 for (i = 0; i < return_value; ++i) {
1397 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1398 if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
1399 printf("\n8 and 16 bit: Ovector[%d] value differs(J8:%d,I8:%d,J16:%d,I16:%d): [%d] '%s' @ '%s' \n",
1400 i, ovector8_1[i], ovector8_2[i], ovector16_1[i], ovector16_2[i],
1401 total, current->pattern, current->input);
1402 is_successful = 0;
1403 }
1404 #endif
1405 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE32
1406 if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector32_1[i] || ovector8_1[i] != ovector32_2[i]) {
1407 printf("\n8 and 32 bit: Ovector[%d] value differs(J8:%d,I8:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
1408 i, ovector8_1[i], ovector8_2[i], ovector32_1[i], ovector32_2[i],
1409 total, current->pattern, current->input);
1410 is_successful = 0;
1411 }
1412 #endif
1413 #if defined SUPPORT_PCRE16 && defined SUPPORT_PCRE16
1414 if (ovector16_1[i] != ovector16_2[i] || ovector16_1[i] != ovector16_1[i] || ovector16_1[i] != ovector16_2[i]) {
1415 printf("\n16 and 16 bit: Ovector[%d] value differs(J16:%d,I16:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
1416 i, ovector16_1[i], ovector16_2[i], ovector16_1[i], ovector16_2[i],
1417 total, current->pattern, current->input);
1418 is_successful = 0;
1419 }
1420 #endif
1421 }
1422 }
1423 } else
1424 #endif /* more than one of SUPPORT_PCRE8, SUPPORT_PCRE16 and SUPPORT_PCRE32 */
1425 {
1426 /* Only the 8 bit and 16 bit results must be equal. */
1427 #ifdef SUPPORT_PCRE8
1428 if (return_value8[0] != return_value8[1]) {
1429 printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1430 return_value8[0], return_value8[1], total, current->pattern, current->input);
1431 is_successful = 0;
1432 } else if (return_value8[0] >= 0 || return_value8[0] == PCRE_ERROR_PARTIAL) {
1433 if (return_value8[0] == PCRE_ERROR_PARTIAL)
1434 return_value8[0] = 2;
1435 else
1436 return_value8[0] *= 2;
1437
1438 for (i = 0; i < return_value8[0]; ++i)
1439 if (ovector8_1[i] != ovector8_2[i]) {
1440 printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1441 i, ovector8_1[i], ovector8_2[i], total, current->pattern, current->input);
1442 is_successful = 0;
1443 }
1444 }
1445 #endif
1446
1447 #ifdef SUPPORT_PCRE16
1448 if (return_value16[0] != return_value16[1]) {
1449 printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1450 return_value16[0], return_value16[1], total, current->pattern, current->input);
1451 is_successful = 0;
1452 } else if (return_value16[0] >= 0 || return_value16[0] == PCRE_ERROR_PARTIAL) {
1453 if (return_value16[0] == PCRE_ERROR_PARTIAL)
1454 return_value16[0] = 2;
1455 else
1456 return_value16[0] *= 2;
1457
1458 for (i = 0; i < return_value16[0]; ++i)
1459 if (ovector16_1[i] != ovector16_2[i]) {
1460 printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1461 i, ovector16_1[i], ovector16_2[i], total, current->pattern, current->input);
1462 is_successful = 0;
1463 }
1464 }
1465 #endif
1466
1467 #ifdef SUPPORT_PCRE32
1468 if (return_value32[0] != return_value32[1]) {
1469 printf("\n32 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1470 return_value32[0], return_value32[1], total, current->pattern, current->input);
1471 is_successful = 0;
1472 } else if (return_value32[0] >= 0 || return_value32[0] == PCRE_ERROR_PARTIAL) {
1473 if (return_value32[0] == PCRE_ERROR_PARTIAL)
1474 return_value32[0] = 2;
1475 else
1476 return_value32[0] *= 2;
1477
1478 for (i = 0; i < return_value32[0]; ++i)
1479 if (ovector32_1[i] != ovector32_2[i]) {
1480 printf("\n32 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1481 i, ovector32_1[i], ovector32_2[i], total, current->pattern, current->input);
1482 is_successful = 0;
1483 }
1484 }
1485 #endif
1486 }
1487 }
1488
1489 if (is_successful) {
1490 #ifdef SUPPORT_PCRE8
1491 if (!(current->start_offset & F_NO8) && ((utf && ucp) || is_ascii_input)) {
1492 if (return_value8[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1493 printf("8 bit: Test should match: [%d] '%s' @ '%s'\n",
1494 total, current->pattern, current->input);
1495 is_successful = 0;
1496 }
1497
1498 if (return_value8[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1499 printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n",
1500 total, current->pattern, current->input);
1501 is_successful = 0;
1502 }
1503 }
1504 #endif
1505 #ifdef SUPPORT_PCRE16
1506 if (!(current->start_offset & F_NO16) && ((utf && ucp) || is_ascii_input)) {
1507 if (return_value16[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1508 printf("16 bit: Test should match: [%d] '%s' @ '%s'\n",
1509 total, current->pattern, current->input);
1510 is_successful = 0;
1511 }
1512
1513 if (return_value16[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1514 printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n",
1515 total, current->pattern, current->input);
1516 is_successful = 0;
1517 }
1518 }
1519 #endif
1520 #ifdef SUPPORT_PCRE32
1521 if (!(current->start_offset & F_NO32) && ((utf && ucp) || is_ascii_input)) {
1522 if (return_value32[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1523 printf("32 bit: Test should match: [%d] '%s' @ '%s'\n",
1524 total, current->pattern, current->input);
1525 is_successful = 0;
1526 }
1527
1528 if (return_value32[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1529 printf("32 bit: Test should not match: [%d] '%s' @ '%s'\n",
1530 total, current->pattern, current->input);
1531 is_successful = 0;
1532 }
1533 }
1534 #endif
1535 }
1536
1537 if (is_successful) {
1538 #ifdef SUPPORT_PCRE8
1539 if (mark8_1 != mark8_2) {
1540 printf("8 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1541 total, current->pattern, current->input);
1542 is_successful = 0;
1543 }
1544 #endif
1545 #ifdef SUPPORT_PCRE16
1546 if (mark16_1 != mark16_2) {
1547 printf("16 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1548 total, current->pattern, current->input);
1549 is_successful = 0;
1550 }
1551 #endif
1552 #ifdef SUPPORT_PCRE32
1553 if (mark32_1 != mark32_2) {
1554 printf("32 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1555 total, current->pattern, current->input);
1556 is_successful = 0;
1557 }
1558 #endif
1559 }
1560
1561 #ifdef SUPPORT_PCRE8
1562 if (re8) {
1563 pcre_free_study(extra8);
1564 pcre_free(re8);
1565 }
1566 #endif
1567 #ifdef SUPPORT_PCRE16
1568 if (re16) {
1569 pcre16_free_study(extra16);
1570 pcre16_free(re16);
1571 }
1572 #endif
1573 #ifdef SUPPORT_PCRE32
1574 if (re32) {
1575 pcre32_free_study(extra32);
1576 pcre32_free(re32);
1577 }
1578 #endif
1579
1580 if (is_successful) {
1581 successful++;
1582 successful_row++;
1583 printf(".");
1584 if (successful_row >= 60) {
1585 successful_row = 0;
1586 printf("\n");
1587 }
1588 } else
1589 successful_row = 0;
1590
1591 fflush(stdout);
1592 current++;
1593 }
1594 tables(1);
1595 #ifdef SUPPORT_PCRE8
1596 setstack8(NULL);
1597 #endif
1598 #ifdef SUPPORT_PCRE16
1599 setstack16(NULL);
1600 #endif
1601 #ifdef SUPPORT_PCRE32
1602 setstack32(NULL);
1603 #endif
1604
1605 if (total == successful) {
1606 printf("\nAll JIT regression tests are successfully passed.\n");
1607 return 0;
1608 } else {
1609 printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
1610 return 1;
1611 }
1612 }
1613
1614 /* End of pcre_jit_test.c */

  ViewVC Help
Powered by ViewVC 1.1.5