/[pcre]/code/trunk/pcre_jit_test.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_test.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1272 - (show annotations)
Thu Mar 7 11:30:01 2013 UTC (6 years, 9 months ago) by zherczeg
File MIME type: text/plain
File size: 65777 byte(s)
(*PRUNE) is now supported by the JIT compiler.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Main Library written by Philip Hazel
9 Copyright (c) 1997-2012 University of Cambridge
10
11 This JIT compiler regression test program was written by Zoltan Herczeg
12 Copyright (c) 2010-2012
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include <stdio.h>
48 #include <string.h>
49 #include "pcre.h"
50
51
52 #include "pcre_internal.h"
53
54 #define PCRE_BUG 0x80000000
55
56 /*
57 Letter characters:
58 \xe6\x92\xad = 0x64ad = 25773 (kanji)
59 Non-letter characters:
60 \xc2\xa1 = 0xa1 = (Inverted Exclamation Mark)
61 \xf3\xa9\xb7\x80 = 0xe9dc0 = 957888
62 \xed\xa0\x80 = 55296 = 0xd800 (Invalid UTF character)
63 \xed\xb0\x80 = 56320 = 0xdc00 (Invalid UTF character)
64 Newlines:
65 \xc2\x85 = 0x85 = 133 (NExt Line = NEL)
66 \xe2\x80\xa8 = 0x2028 = 8232 (Line Separator)
67 Othercase pairs:
68 \xc3\xa9 = 0xe9 = 233 (e')
69 \xc3\x89 = 0xc9 = 201 (E')
70 \xc3\xa1 = 0xe1 = 225 (a')
71 \xc3\x81 = 0xc1 = 193 (A')
72 \xc8\xba = 0x23a = 570
73 \xe2\xb1\xa5 = 0x2c65 = 11365
74 \xe1\xbd\xb8 = 0x1f78 = 8056
75 \xe1\xbf\xb8 = 0x1ff8 = 8184
76 \xf0\x90\x90\x80 = 0x10400 = 66560
77 \xf0\x90\x90\xa8 = 0x10428 = 66600
78 Mark property:
79 \xcc\x8d = 0x30d = 781
80 Special:
81 \xdf\xbf = 0x7ff = 2047 (highest 2 byte character)
82 \xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character)
83 \xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character)
84 \xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character)
85 \xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character)
86 */
87
88 static int regression_tests(void);
89
90 int main(void)
91 {
92 int jit = 0;
93 #if defined SUPPORT_PCRE8
94 pcre_config(PCRE_CONFIG_JIT, &jit);
95 #elif defined SUPPORT_PCRE16
96 pcre16_config(PCRE_CONFIG_JIT, &jit);
97 #elif defined SUPPORT_PCRE32
98 pcre32_config(PCRE_CONFIG_JIT, &jit);
99 #endif
100 if (!jit) {
101 printf("JIT must be enabled to run pcre_jit_test\n");
102 return 1;
103 }
104 return regression_tests();
105 }
106
107 /* --------------------------------------------------------------------------------------- */
108
109 #if !(defined SUPPORT_PCRE8) && !(defined SUPPORT_PCRE16) && !(defined SUPPORT_PCRE32)
110 #error SUPPORT_PCRE8 or SUPPORT_PCRE16 or SUPPORT_PCRE32 must be defined
111 #endif
112
113 #define MUA (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
114 #define MUAP (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
115 #define CMUA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
116 #define CMUAP (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
117 #define MA (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
118 #define MAP (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
119 #define CMA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
120
121 #define OFFSET_MASK 0x00ffff
122 #define F_NO8 0x010000
123 #define F_NO16 0x020000
124 #define F_NO32 0x020000
125 #define F_NOMATCH 0x040000
126 #define F_DIFF 0x080000
127 #define F_FORCECONV 0x100000
128 #define F_PROPERTY 0x200000
129 #define F_STUDY 0x400000
130
131 struct regression_test_case {
132 int flags;
133 int start_offset;
134 const char *pattern;
135 const char *input;
136 };
137
138 static struct regression_test_case regression_test_cases[] = {
139 /* Constant strings. */
140 { MUA, 0, "AbC", "AbAbC" },
141 { MUA, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
142 { CMUA, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
143 { MA, 0, "[^a]", "aAbB" },
144 { CMA, 0, "[^m]", "mMnN" },
145 { MA, 0, "a[^b][^#]", "abacd" },
146 { CMA, 0, "A[^B][^E]", "abacd" },
147 { CMUA, 0, "[^x][^#]", "XxBll" },
148 { MUA, 0, "[^a]", "aaa\xc3\xa1#Ab" },
149 { CMUA, 0, "[^A]", "aA\xe6\x92\xad" },
150 { MUA, 0, "\\W(\\W)?\\w", "\r\n+bc" },
151 { MUA, 0, "\\W(\\W)?\\w", "\n\r+bc" },
152 { MUA, 0, "\\W(\\W)?\\w", "\r\r+bc" },
153 { MUA, 0, "\\W(\\W)?\\w", "\n\n+bc" },
154 { MUA, 0, "[axd]", "sAXd" },
155 { CMUA, 0, "[axd]", "sAXd" },
156 { CMUA, 0 | F_NOMATCH, "[^axd]", "DxA" },
157 { MUA, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
158 { MUA, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
159 { CMUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
160 { MUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
161 { MUA, 0, "[^a]", "\xc2\x80[]" },
162 { CMUA, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
163 { CMA, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
164 { PCRE_CASELESS, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
165 { PCRE_CASELESS, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
166 { PCRE_CASELESS, 0, "a1", "Aa1" },
167 { MA, 0, "\\Ca", "cda" },
168 { CMA, 0, "\\Ca", "CDA" },
169 { MA, 0 | F_NOMATCH, "\\Cx", "cda" },
170 { CMA, 0 | F_NOMATCH, "\\Cx", "CDA" },
171 { CMUAP, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
172 { CMUAP, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
173 { CMUAP, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
174 { CMUAP, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
175
176 /* Assertions. */
177 { MUA, 0, "\\b[^A]", "A_B#" },
178 { MA, 0 | F_NOMATCH, "\\b\\W", "\n*" },
179 { MUA, 0, "\\B[^,]\\b[^s]\\b", "#X" },
180 { MAP, 0, "\\B", "_\xa1" },
181 { MAP, 0, "\\b_\\b[,A]\\B", "_," },
182 { MUAP, 0, "\\b", "\xe6\x92\xad!" },
183 { MUAP, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
184 { MUAP, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
185 { MUAP, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
186 { MUA, 0 | F_NOMATCH, "\\b.", "\xcd\xbe" },
187 { CMUAP, 0, "\\By", "\xf0\x90\x90\xa8y" },
188 { MA, 0 | F_NOMATCH, "\\R^", "\n" },
189 { MA, 1 | F_NOMATCH, "^", "\n" },
190 { 0, 0, "^ab", "ab" },
191 { 0, 0 | F_NOMATCH, "^ab", "aab" },
192 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "^a", "\r\raa\n\naa\r\naa" },
193 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
194 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^-", "a--b--\x85--" },
195 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xe2\x80\xa8--" },
196 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xc2\x85--" },
197 { 0, 0, "ab$", "ab" },
198 { 0, 0 | F_NOMATCH, "ab$", "abab\n\n" },
199 { PCRE_DOLLAR_ENDONLY, 0 | F_NOMATCH, "ab$", "abab\r\n" },
200 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "a$", "\r\raa\n\naa\r\naa" },
201 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aaa" },
202 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "#$", "#\xc2\x85###\r#" },
203 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "#$", "#\xe2\x80\xa9" },
204 { PCRE_NOTBOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "^a", "aa\naa" },
205 { PCRE_NOTBOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },
206 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\naa" },
207 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\r\n" },
208 { PCRE_UTF8 | PCRE_DOLLAR_ENDONLY | PCRE_NEWLINE_ANY, 0 | F_PROPERTY, "\\p{Any}{2,}$", "aa\r\n" },
209 { PCRE_NOTEOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },
210 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa" },
211 { PCRE_NEWLINE_CR | PCRE_UTF8, 0, "a\\Z", "aaa\r" },
212 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa\n" },
213 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r" },
214 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
215 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r\n" },
216 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
217 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
218 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
219 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
220 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
221 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
222 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
223 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
224 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
225 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xc2\x85" },
226 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
227 { MA, 0, "\\Aa", "aaa" },
228 { MA, 1 | F_NOMATCH, "\\Aa", "aaa" },
229 { MA, 1, "\\Ga", "aaa" },
230 { MA, 1 | F_NOMATCH, "\\Ga", "aba" },
231 { MA, 0, "a\\z", "aaa" },
232 { MA, 0 | F_NOMATCH, "a\\z", "aab" },
233
234 /* Brackets. */
235 { MUA, 0, "(ab|bb|cd)", "bacde" },
236 { MUA, 0, "(?:ab|a)(bc|c)", "ababc" },
237 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
238 { CMUA, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
239 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
240 { MUA, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
241
242 /* Greedy and non-greedy ? operators. */
243 { MUA, 0, "(?:a)?a", "laab" },
244 { CMUA, 0, "(A)?A", "llaab" },
245 { MUA, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
246 { MUA, 0, "(a)?a", "manm" },
247 { CMUA, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
248 { MUA, 0, "(a|b)?\?d((?:e)?)", "abcde" },
249 { MUA, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
250
251 /* Greedy and non-greedy + operators */
252 { MUA, 0, "(aa)+aa", "aaaaaaa" },
253 { MUA, 0, "(aa)+?aa", "aaaaaaa" },
254 { MUA, 0, "(?:aba|ab|a)+l", "ababamababal" },
255 { MUA, 0, "(?:aba|ab|a)+?l", "ababamababal" },
256 { MUA, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
257 { MUA, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
258 { MUA, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
259
260 /* Greedy and non-greedy * operators */
261 { CMUA, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
262 { MUA, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
263 { MUA, 0, "(aa|ab)*ab", "aaabaaab" },
264 { CMUA, 0, "(aa|Ab)*?aB", "aaabaaab" },
265 { MUA, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
266 { MUA, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
267 { MA, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
268 { MA, 0, "((?:a|)*){0}a", "a" },
269
270 /* Combining ? + * operators */
271 { MUA, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
272 { MUA, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
273 { MUA, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
274 { MUA, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
275 { MUA, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
276
277 /* Single character iterators. */
278 { MUA, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
279 { MUA, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
280 { MUA, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
281 { MUA, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
282 { MUA, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
283 { MUA, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
284 { MUA, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
285 { MUA, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
286 { MUA, 0, "(ba{2})+c", "baabaaabacbaabaac" },
287 { MUA, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
288 { MUA, 0, "(a?+[^b])+", "babaacacb" },
289 { MUA, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
290 { CMUA, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
291 { CMUA, 0, "[c-f]+k", "DemmFke" },
292 { MUA, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
293 { MUA, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
294 { CMUA, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
295 { CMUA, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
296 { CMUA, 0, "[ace]{3,}", "AcbDAcEEcEd" },
297 { CMUA, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
298 { MUA, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
299 { CMUA, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
300 { MUA, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
301 { MUA, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
302 { MUA, 0, "\\b\\w+\\B", "x,a_cd" },
303 { MUAP, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
304 { CMUA, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
305 { CMUAP, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" },
306 { CMUA, 0, "[^\xf0\x90\x90\x80]{2,4}@", "\xf0\x90\x90\xa8\xf0\x90\x90\x80###\xf0\x90\x90\x80@@@" },
307 { CMUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
308 { MUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
309 { MUA, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" },
310
311 /* Basic character sets. */
312 { MUA, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
313 { MUA, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
314 { MUA, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
315 { MUA, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
316 { MUA, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
317 { MUA, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
318
319 /* Unicode properties. */
320 { MUAP, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
321 { MUAP, 0 | F_PROPERTY, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
322 { MUAP, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
323 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}]", "abc" },
324 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}]", "abc" },
325 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
326 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
327 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
328 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
329 { MUAP, 0 | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
330 { MUAP, 0 | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
331 { MUAP, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
332 { CMUAP, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
333 { MUAP, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
334 { MUAP, 0 | F_PROPERTY, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
335 { MUA, 0 | F_PROPERTY, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
336 { CMUAP, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
337 { MUAP, 0 | F_PROPERTY, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
338 { MUAP, 0 | F_PROPERTY, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
339 { PCRE_UCP, 0, "[a-b\\s]{2,5}[^a]", "AB baaa" },
340
341 /* Possible empty brackets. */
342 { MUA, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
343 { MUA, 0, "(|ab||bc|a)+d", "abcxabcabd" },
344 { MUA, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
345 { MUA, 0, "(|ab||bc|a)*d", "abcxabcabd" },
346 { MUA, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
347 { MUA, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
348 { MUA, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
349 { MUA, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
350 { MUA, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
351 { MUA, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
352
353 /* Start offset. */
354 { MUA, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
355 { MUA, 4 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
356 { MUA, 2 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
357 { MUA, 1, "(\\w\\W\\w)+", "ab#d" },
358
359 /* Newline. */
360 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
361 { PCRE_MULTILINE | PCRE_NEWLINE_CR, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
362 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{1,3}[^#]", "\r\n##...." },
363
364 /* Any character except newline or any newline. */
365 { PCRE_NEWLINE_CRLF, 0, ".", "\r" },
366 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
367 { PCRE_NEWLINE_ANYCRLF, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
368 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
369 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
370 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0 | F_NOMATCH, ".(.).", "\xe2\x80\xa8\nb\r" },
371 { PCRE_NEWLINE_ANY, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
372 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
373 { PCRE_BSR_ANYCRLF, 0, "\\R", "\r" },
374 { PCRE_BSR_ANYCRLF, 0, "\\R", "\x85#\r\n#" },
375 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\xe2\x80\xa8#c" },
376 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\r\nc" },
377 { PCRE_NEWLINE_CRLF | PCRE_BSR_UNICODE | PCRE_UTF8, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
378 { MUA, 0 | F_NOMATCH, "\\R+", "ab" },
379 { MUA, 0, "\\R+", "ab\r\n\r" },
380 { MUA, 0, "\\R*", "ab\r\n\r" },
381 { MUA, 0, "\\R*", "\r\n\r" },
382 { MUA, 0, "\\R{2,4}", "\r\nab\r\r" },
383 { MUA, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
384 { MUA, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
385 { MUA, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
386 { MUA, 0 | F_NOMATCH, "\\R+\\R\\R", "\r\n\r\n" },
387 { MUA, 0, "\\R+\\R\\R", "\r\r\r" },
388 { MUA, 0, "\\R*\\R\\R", "\n\r" },
389 { MUA, 0 | F_NOMATCH, "\\R{2,4}\\R\\R", "\r\r\r" },
390 { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
391
392 /* Atomic groups (no fallback from "next" direction). */
393 { MUA, 0 | F_NOMATCH, "(?>ab)ab", "bab" },
394 { MUA, 0 | F_NOMATCH, "(?>(ab))ab", "bab" },
395 { MUA, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
396 "bababcdedefgheijijklmlmnop" },
397 { MUA, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
398 { MUA, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
399 { MUA, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
400 { MUA, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
401 { MUA, 0, "((?>a|)+?)b", "aaacaaab" },
402 { MUA, 0, "(?>x|)*$", "aaa" },
403 { MUA, 0, "(?>(x)|)*$", "aaa" },
404 { MUA, 0, "(?>x|())*$", "aaa" },
405 { MUA, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
406 { MUA, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
407 { MUA, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
408 { MUA, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
409 { MUA, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
410 { MUA, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
411 { MUA, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
412 { MUA, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
413 { MUA, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
414 { MUA, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
415 { MUA, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
416 { MUA, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
417 { MUA, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
418 { MUA, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
419 { CMA, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
420 { MUA, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
421 { MUA, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
422 { MUA, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d" },
423 { MUA, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
424 { MUA, 0 | F_PROPERTY, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
425 { MUA, 0 | F_PROPERTY, "\\X{2,4}", "abcdef" },
426 { MUA, 0 | F_PROPERTY, "\\X{2,4}?", "abcdef" },
427 { MUA, 0 | F_NOMATCH | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d##" },
428 { MUA, 0 | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
429 { MUA, 0, "(c(ab)?+ab)+", "cabcababcab" },
430 { MUA, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
431
432 /* Possessive quantifiers. */
433 { MUA, 0, "(?:a|b)++m", "mababbaaxababbaam" },
434 { MUA, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
435 { MUA, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
436 { MUA, 0, "(a|b)++m", "mababbaaxababbaam" },
437 { MUA, 0, "(a|b)*+m", "mababbaaxababbaam" },
438 { MUA, 0, "(a|b)*+m", "ababbaaxababbaam" },
439 { MUA, 0, "(a|b(*ACCEPT))++m", "maaxab" },
440 { MUA, 0, "(?:b*)++m", "bxbbxbbbxm" },
441 { MUA, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
442 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxm" },
443 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
444 { MUA, 0, "(b*)++m", "bxbbxbbbxm" },
445 { MUA, 0, "(b*)++m", "bxbbxbbbxbbm" },
446 { MUA, 0, "(b*)*+m", "bxbbxbbbxm" },
447 { MUA, 0, "(b*)*+m", "bxbbxbbbxbbm" },
448 { MUA, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
449 { MUA, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
450 { MUA, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
451 { MUA, 0, "(a|(b))++m", "mababbaaxababbaam" },
452 { MUA, 0, "((a)|b)*+m", "mababbaaxababbaam" },
453 { MUA, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
454 { MUA, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
455 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxm" },
456 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
457 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
458 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
459 { MUA, 0, "((b*))++m", "bxbbxbbbxm" },
460 { MUA, 0, "((b*))++m", "bxbbxbbbxbbm" },
461 { MUA, 0, "((b*))*+m", "bxbbxbbbxm" },
462 { MUA, 0, "((b*))*+m", "bxbbxbbbxbbm" },
463 { MUA, 0 | F_NOMATCH, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
464 { MUA, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
465 { MUA, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
466 { MUA, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
467 { MUA, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
468
469 /* Back references. */
470 { MUA, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
471 { CMUA, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
472 { CMA, 0, "(a{2,4})\\1", "AaAaaAaA" },
473 { MUA, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
474 { MUA, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
475 { MUA, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
476 { MUA, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
477 { MUA, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
478 { MUA, 0, "(?:(aa)|b)\\1?b", "bb" },
479 { CMUA, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
480 { MUA, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
481 { CMUA, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
482 { MUA, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
483 { CMA, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
484 { MUA, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
485 { MUA, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
486 { MA, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
487 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
488 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{0,2}", "wwwww." },
489 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwww" },
490 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwwww" },
491 { PCRE_UCP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
492 { CMUAP, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" },
493
494 /* Assertions. */
495 { MUA, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
496 { MUA, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
497 { MUA, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
498 { MUA, 0, "(?<=aaa|aa|a)a", "aaa" },
499 { MUA, 2, "(?<=aaa|aa|a)a", "aaa" },
500 { MA, 0, "(?<=aaa|aa|a)a", "aaa" },
501 { MA, 2, "(?<=aaa|aa|a)a", "aaa" },
502 { MUA, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
503 { MUA, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
504 { MUA, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
505 { MUA, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
506 { MUA, 0, "((?(?=(a))a)+k)", "bbak" },
507 { MUA, 0, "((?(?=a)a)+k)", "bbak" },
508 { MUA, 0 | F_NOMATCH, "(?=(?>(a))m)amk", "a k" },
509 { MUA, 0 | F_NOMATCH, "(?!(?>(a))m)amk", "a k" },
510 { MUA, 0 | F_NOMATCH, "(?>(?=(a))am)amk", "a k" },
511 { MUA, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
512 { MUA, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
513 { MUA, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
514 { MUA, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
515 { MUA, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
516 { MUA, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
517 { MUA, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
518 { MUA, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
519
520 /* Not empty, ACCEPT, FAIL */
521 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*", "bcx" },
522 { MUA | PCRE_NOTEMPTY, 0, "a*", "bcaad" },
523 { MUA | PCRE_NOTEMPTY, 0, "a*?", "bcaad" },
524 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
525 { MUA, 0, "a(*ACCEPT)b", "ab" },
526 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*(*ACCEPT)b", "bcx" },
527 { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
528 { MUA | PCRE_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
529 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "(?:z|a*(*ACCEPT)b)", "bcx" },
530 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
531 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
532 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
533 { MUA | PCRE_NOTEMPTY_ATSTART, 0 | F_NOMATCH, "a*(*ACCEPT)b", "" },
534 { MUA, 0, "((a(*ACCEPT)b))", "ab" },
535 { MUA, 0, "(a(*FAIL)a|a)", "aaa" },
536 { MUA, 0, "(?=ab(*ACCEPT)b)a", "ab" },
537 { MUA, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
538 { MUA, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
539 { MUA | PCRE_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
540
541 /* Conditional blocks. */
542 { MUA, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
543 { MUA, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
544 { MUA, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
545 { MUA, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
546 { MUA, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
547 { MUA, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
548 { MUA, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
549 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
550 { MUA, 0 | F_DIFF, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
551 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
552 { MUA, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
553 { MUA, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
554 { MUA, 0, "(?(?=a)ab)", "a" },
555 { MUA, 0, "(?(?<!b)c)", "b" },
556 { MUA, 0, "(?(DEFINE)a(b))", "a" },
557 { MUA, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
558 { MUA, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
559 { MUA, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
560 { MUA, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
561 { MUA, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
562 { MUA, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
563 { MUA, 0, "(c)?\?(?(1)a|b)", "cbb" },
564 { MUA, 0 | F_DIFF, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
565 { MUA, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
566 { MUA, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
567 { MUA, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
568 { MUA, 0 | F_DIFF, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
569 { MUA, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
570 { MUA, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
571 { MUA, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
572 { MUA, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
573 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)*l", "bc ddd abccabccl" },
574 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+?dd", "bcabcacdb bdddd" },
575 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+l", "ababccddabdbccd abcccl" },
576 { MUA, 0, "((?:a|aa)(?(1)aaa))x", "aax" },
577
578 /* Set start of match. */
579 { MUA, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
580 { MUA, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
581 { MUA, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
582 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a\\K(*ACCEPT)b", "aa" },
583 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
584
585 /* First line. */
586 { MUA | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}a", "bb\naaa" },
587 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}a", "bb\r\naaa" },
588 { MUA | PCRE_FIRSTLINE, 0, "(?<=a)", "a" },
589 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[^a][^b]", "ab" },
590 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "a", "\na" },
591 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[abc]", "\na" },
592 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^a", "\na" },
593 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^(?<=\n)", "\na" },
594 { MUA | PCRE_FIRSTLINE, 0, "\xf0\x90\x90\x80", "\xf0\x90\x90\x80" },
595 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\xc2\x85#" },
596 { PCRE_MULTILINE | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\x85#" },
597 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^#", "\xe2\x80\xa8#" },
598 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}", "\r\na" },
599 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, ".", "\r" },
600 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "a", "\ra" },
601 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH, "ba", "bbb\r\nba" },
602 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}{4}|a", "\r\na" },
603 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 1, ".", "\r\n" },
604 { PCRE_FIRSTLINE | PCRE_NEWLINE_LF | PCRE_DOTALL, 0 | F_NOMATCH, "ab.", "ab" },
605
606 /* Recurse. */
607 { MUA, 0, "(a)(?1)", "aa" },
608 { MUA, 0, "((a))(?1)", "aa" },
609 { MUA, 0, "(b|a)(?1)", "aa" },
610 { MUA, 0, "(b|(a))(?1)", "aa" },
611 { MUA, 0 | F_NOMATCH, "((a)(b)(?:a*))(?1)", "aba" },
612 { MUA, 0, "((a)(b)(?:a*))(?1)", "abab" },
613 { MUA, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
614 { MUA, 0, "((?2)b|(a)){2}(?1)", "aabab" },
615 { MUA, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
616 { MUA, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
617 { MUA, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
618 { MUA, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
619 { MUA, 0 | F_NOMATCH, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
620 { MUA, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
621 { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
622 { MUA, 0 | F_NOMATCH, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
623 { MUA, 0, "b|<(?R)*>", "<<b>" },
624 { MUA, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
625 { MUA, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
626 { MUA, 0, "(a)((?(R)a|b))(?2)", "aabbabaa" },
627 { MUA, 0, "(a)((?(R2)a|b))(?2)", "aabbabaa" },
628 { MUA, 0, "(a)((?(R1)a|b))(?2)", "ababba" },
629 { MUA, 0, "(?(R0)aa|bb(?R))", "abba aabb bbaa" },
630 { MUA, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" },
631 { MUA, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" },
632
633 /* 16 bit specific tests. */
634 { CMA, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" },
635 { CMA, 0 | F_FORCECONV, "\xe1\xbd\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
636 { CMA, 0 | F_FORCECONV, "[\xc3\xa1]", "\xc3\x81\xc3\xa1" },
637 { CMA, 0 | F_FORCECONV, "[\xe1\xbd\xb8]", "\xe1\xbf\xb8\xe1\xbd\xb8" },
638 { CMA, 0 | F_FORCECONV, "[a-\xed\xb0\x80]", "A" },
639 { CMA, 0 | F_NO8 | F_FORCECONV, "[a-\\x{dc00}]", "B" },
640 { CMA, 0 | F_NO8 | F_NOMATCH | F_FORCECONV, "[b-\\x{dc00}]", "a" },
641 { CMA, 0 | F_NO8 | F_FORCECONV, "\xed\xa0\x80\\x{d800}\xed\xb0\x80\\x{dc00}", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80" },
642 { CMA, 0 | F_NO8 | F_FORCECONV, "[\xed\xa0\x80\\x{d800}]{1,2}?[\xed\xb0\x80\\x{dc00}]{1,2}?#", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80#" },
643 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80\xed\xb0\x80#]{0,3}(?<=\xed\xb0\x80.)", "\xed\xa0\x80#\xed\xa0\x80##\xed\xb0\x80\xed\xa0\x80" },
644 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\x9f\xbf\xed\xa0\x83" },
645 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\xb4\x80\xed\xb3\xb0" },
646 { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" },
647 { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" },
648 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" },
649 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80][\xed\xb0\x80]{2,}", "\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" },
650 { MA, 0 | F_FORCECONV, "[^\xed\xb0\x80]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
651 { MA, 0 | F_NO8 | F_FORCECONV, "[^\\x{dc00}]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
652 { CMA, 0 | F_FORCECONV, ".\\B.", "\xed\xa0\x80\xed\xb0\x80" },
653 { CMA, 0 | F_FORCECONV, "\\D+(?:\\d+|.)\\S+(?:\\s+|.)\\W+(?:\\w+|.)\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80" },
654 { CMA, 0 | F_FORCECONV, "\\d*\\s*\\w*\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80" },
655 { CMA, 0 | F_FORCECONV | F_NOMATCH, "\\d*?\\D*?\\s*?\\S*?\\w*?\\W*?##", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80#" },
656 { CMA | PCRE_EXTENDED, 0 | F_FORCECONV, "\xed\xa0\x80 \xed\xb0\x80 !", "\xed\xa0\x80\xed\xb0\x80!" },
657 { CMA, 0 | F_FORCECONV, "\xed\xa0\x80+#[^#]+\xed\xa0\x80", "\xed\xa0\x80#a\xed\xa0\x80" },
658 { CMA, 0 | F_FORCECONV, "(\xed\xa0\x80+)#\\1", "\xed\xa0\x80\xed\xa0\x80#\xed\xa0\x80\xed\xa0\x80" },
659 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0 | F_NO8 | F_FORCECONV, "^-", "a--\xe2\x80\xa8--" },
660 { PCRE_BSR_UNICODE, 0 | F_NO8 | F_FORCECONV, "\\R", "ab\xe2\x80\xa8" },
661 { 0, 0 | F_NO8 | F_FORCECONV, "\\v", "ab\xe2\x80\xa9" },
662 { 0, 0 | F_NO8 | F_FORCECONV, "\\h", "ab\xe1\xa0\x8e" },
663 { 0, 0 | F_NO8 | F_FORCECONV, "\\v+?\\V+?#", "\xe2\x80\xa9\xe2\x80\xa9\xef\xbf\xbf\xef\xbf\xbf#" },
664 { 0, 0 | F_NO8 | F_FORCECONV, "\\h+?\\H+?#", "\xe1\xa0\x8e\xe1\xa0\x8e\xef\xbf\xbf\xef\xbf\xbf#" },
665
666 /* Partial matching. */
667 { MUA | PCRE_PARTIAL_SOFT, 0, "ab", "a" },
668 { MUA | PCRE_PARTIAL_SOFT, 0, "ab|a", "a" },
669 { MUA | PCRE_PARTIAL_HARD, 0, "ab|a", "a" },
670 { MUA | PCRE_PARTIAL_SOFT, 0, "\\b#", "a" },
671 { MUA | PCRE_PARTIAL_SOFT, 0, "(?<=a)b", "a" },
672 { MUA | PCRE_PARTIAL_SOFT, 0, "abc|(?<=xxa)bc", "xxab" },
673 { MUA | PCRE_PARTIAL_SOFT, 0, "a\\B", "a" },
674 { MUA | PCRE_PARTIAL_HARD, 0, "a\\b", "a" },
675
676 /* (*MARK) verb. */
677 { MUA, 0, "a(*MARK:aa)a", "ababaa" },
678 { MUA, 0 | F_NOMATCH, "a(*:aa)a", "abab" },
679 { MUA, 0, "a(*:aa)(b(*:bb)b|bc)", "abc" },
680 { MUA, 0 | F_NOMATCH, "a(*:1)x|b(*:2)y", "abc" },
681 { MUA, 0, "(?>a(*:aa))b|ac", "ac" },
682 { MUA, 0, "(?(DEFINE)(a(*:aa)))(?1)", "a" },
683 { MUA, 0 | F_NOMATCH, "(?(DEFINE)((a)(*:aa)))(?1)b", "aa" },
684 { MUA, 0, "(?(DEFINE)(a(*:aa)))a(?1)b|aac", "aac" },
685 { MUA, 0, "(a(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
686 { MUA, 0, "(a(*:aa)){0}(?:b(?1)b)+", "babba" },
687 { MUA, 0 | F_NOMATCH | F_STUDY, "(a(*:aa)){0}(?:b(?1)b)+", "ba" },
688 { MUA, 0, "(a\\K(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
689 { MUA, 0, "(a\\K(*:aa)){0}(?:b(?1)b)+", "babba" },
690 { MUA, 0 | F_NOMATCH | F_STUDY, "(a\\K(*:aa)){0}(?:b(?1)b)+", "ba" },
691 { MUA, 0 | F_NOMATCH | F_STUDY, "(*:mark)m", "a" },
692
693 /* (*COMMIT) verb. */
694 { MUA, 0 | F_NOMATCH, "a(*COMMIT)b", "ac" },
695 { MUA, 0, "aa(*COMMIT)b", "xaxaab" },
696 { MUA, 0 | F_NOMATCH, "a(*COMMIT)(*:msg)b|ac", "ac" },
697 { MUA, 0, "(?=a(*COMMIT)b|ac)ac|(*:m)(a)c", "ac" },
698 { MUA, 0, "(?!a(*COMMIT)(*:msg)b)a(c)|cd", "acd" },
699
700 /* (*PRUNE) verb. */
701 { MUA, 0, "aa\\K(*PRUNE)b", "aaab" },
702 { MUA, 0, "aa(*PRUNE:bb)b|a", "aa" },
703 { MUA, 0, "(a)(a)(*PRUNE)b|(a)", "aa" },
704 { MUA, 0, "(a)(a)(a)(a)(a)(a)(a)(a)(*PRUNE)b|(a)", "aaaaaaaa" },
705 { MUA | PCRE_PARTIAL_SOFT, 0, "a(*PRUNE)a|", "a" },
706 { MUA | PCRE_PARTIAL_SOFT, 0, "a(*PRUNE)a|m", "a" },
707
708 /* Deep recursion. */
709 { MUA, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
710 { MUA, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
711 { MUA, 0, "((a?)+)+b", "aaaaaaaaaaaa b" },
712
713 /* Deep recursion: Stack limit reached. */
714 { MA, 0 | F_NOMATCH, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
715 { MA, 0 | F_NOMATCH, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
716 { MA, 0 | F_NOMATCH, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
717 { MA, 0 | F_NOMATCH, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
718 { MA, 0 | F_NOMATCH, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
719
720 { 0, 0, NULL, NULL }
721 };
722
723 static const unsigned char *tables(int mode)
724 {
725 /* The purpose of this function to allow valgrind
726 for reporting invalid reads and writes. */
727 static unsigned char *tables_copy;
728 const char *errorptr;
729 int erroroffset;
730 unsigned char *default_tables;
731 #if defined SUPPORT_PCRE8
732 pcre *regex;
733 char null_str[1] = { 0 };
734 #elif defined SUPPORT_PCRE16
735 pcre16 *regex;
736 PCRE_UCHAR16 null_str[1] = { 0 };
737 #elif defined SUPPORT_PCRE32
738 pcre32 *regex;
739 PCRE_UCHAR32 null_str[1] = { 0 };
740 #endif
741
742 if (mode) {
743 if (tables_copy)
744 free(tables_copy);
745 tables_copy = NULL;
746 return NULL;
747 }
748
749 if (tables_copy)
750 return tables_copy;
751
752 default_tables = NULL;
753 #if defined SUPPORT_PCRE8
754 regex = pcre_compile(null_str, 0, &errorptr, &erroroffset, NULL);
755 if (regex) {
756 pcre_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
757 pcre_free(regex);
758 }
759 #elif defined SUPPORT_PCRE16
760 regex = pcre16_compile(null_str, 0, &errorptr, &erroroffset, NULL);
761 if (regex) {
762 pcre16_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
763 pcre16_free(regex);
764 }
765 #elif defined SUPPORT_PCRE32
766 regex = pcre32_compile(null_str, 0, &errorptr, &erroroffset, NULL);
767 if (regex) {
768 pcre32_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
769 pcre32_free(regex);
770 }
771 #endif
772 /* Shouldn't ever happen. */
773 if (!default_tables)
774 return NULL;
775
776 /* Unfortunately this value cannot get from pcre_fullinfo.
777 Since this is a test program, this is acceptable at the moment. */
778 tables_copy = (unsigned char *)malloc(1088);
779 if (!tables_copy)
780 return NULL;
781
782 memcpy(tables_copy, default_tables, 1088);
783 return tables_copy;
784 }
785
786 #ifdef SUPPORT_PCRE8
787 static pcre_jit_stack* callback8(void *arg)
788 {
789 return (pcre_jit_stack *)arg;
790 }
791 #endif
792
793 #ifdef SUPPORT_PCRE16
794 static pcre16_jit_stack* callback16(void *arg)
795 {
796 return (pcre16_jit_stack *)arg;
797 }
798 #endif
799
800 #ifdef SUPPORT_PCRE32
801 static pcre32_jit_stack* callback32(void *arg)
802 {
803 return (pcre32_jit_stack *)arg;
804 }
805 #endif
806
807 #ifdef SUPPORT_PCRE8
808 static pcre_jit_stack *stack8;
809
810 static pcre_jit_stack *getstack8(void)
811 {
812 if (!stack8)
813 stack8 = pcre_jit_stack_alloc(1, 1024 * 1024);
814 return stack8;
815 }
816
817 static void setstack8(pcre_extra *extra)
818 {
819 if (!extra) {
820 if (stack8)
821 pcre_jit_stack_free(stack8);
822 stack8 = NULL;
823 return;
824 }
825
826 pcre_assign_jit_stack(extra, callback8, getstack8());
827 }
828 #endif /* SUPPORT_PCRE8 */
829
830 #ifdef SUPPORT_PCRE16
831 static pcre16_jit_stack *stack16;
832
833 static pcre16_jit_stack *getstack16(void)
834 {
835 if (!stack16)
836 stack16 = pcre16_jit_stack_alloc(1, 1024 * 1024);
837 return stack16;
838 }
839
840 static void setstack16(pcre16_extra *extra)
841 {
842 if (!extra) {
843 if (stack16)
844 pcre16_jit_stack_free(stack16);
845 stack16 = NULL;
846 return;
847 }
848
849 pcre16_assign_jit_stack(extra, callback16, getstack16());
850 }
851 #endif /* SUPPORT_PCRE8 */
852
853 #ifdef SUPPORT_PCRE32
854 static pcre32_jit_stack *stack32;
855
856 static pcre32_jit_stack *getstack32(void)
857 {
858 if (!stack32)
859 stack32 = pcre32_jit_stack_alloc(1, 1024 * 1024);
860 return stack32;
861 }
862
863 static void setstack32(pcre32_extra *extra)
864 {
865 if (!extra) {
866 if (stack32)
867 pcre32_jit_stack_free(stack32);
868 stack32 = NULL;
869 return;
870 }
871
872 pcre32_assign_jit_stack(extra, callback32, getstack32());
873 }
874 #endif /* SUPPORT_PCRE8 */
875
876 #ifdef SUPPORT_PCRE16
877
878 static int convert_utf8_to_utf16(const char *input, PCRE_UCHAR16 *output, int *offsetmap, int max_length)
879 {
880 unsigned char *iptr = (unsigned char*)input;
881 PCRE_UCHAR16 *optr = output;
882 unsigned int c;
883
884 if (max_length == 0)
885 return 0;
886
887 while (*iptr && max_length > 1) {
888 c = 0;
889 if (offsetmap)
890 *offsetmap++ = (int)(iptr - (unsigned char*)input);
891
892 if (!(*iptr & 0x80))
893 c = *iptr++;
894 else if (!(*iptr & 0x20)) {
895 c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
896 iptr += 2;
897 } else if (!(*iptr & 0x10)) {
898 c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
899 iptr += 3;
900 } else if (!(*iptr & 0x08)) {
901 c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
902 iptr += 4;
903 }
904
905 if (c < 65536) {
906 *optr++ = c;
907 max_length--;
908 } else if (max_length <= 2) {
909 *optr = '\0';
910 return (int)(optr - output);
911 } else {
912 c -= 0x10000;
913 *optr++ = 0xd800 | ((c >> 10) & 0x3ff);
914 *optr++ = 0xdc00 | (c & 0x3ff);
915 max_length -= 2;
916 if (offsetmap)
917 offsetmap++;
918 }
919 }
920 if (offsetmap)
921 *offsetmap = (int)(iptr - (unsigned char*)input);
922 *optr = '\0';
923 return (int)(optr - output);
924 }
925
926 static int copy_char8_to_char16(const char *input, PCRE_UCHAR16 *output, int max_length)
927 {
928 unsigned char *iptr = (unsigned char*)input;
929 PCRE_UCHAR16 *optr = output;
930
931 if (max_length == 0)
932 return 0;
933
934 while (*iptr && max_length > 1) {
935 *optr++ = *iptr++;
936 max_length--;
937 }
938 *optr = '\0';
939 return (int)(optr - output);
940 }
941
942 #define REGTEST_MAX_LENGTH16 4096
943 static PCRE_UCHAR16 regtest_buf16[REGTEST_MAX_LENGTH16];
944 static int regtest_offsetmap16[REGTEST_MAX_LENGTH16];
945
946 #endif /* SUPPORT_PCRE16 */
947
948 #ifdef SUPPORT_PCRE32
949
950 static int convert_utf8_to_utf32(const char *input, PCRE_UCHAR32 *output, int *offsetmap, int max_length)
951 {
952 unsigned char *iptr = (unsigned char*)input;
953 PCRE_UCHAR32 *optr = output;
954 unsigned int c;
955
956 if (max_length == 0)
957 return 0;
958
959 while (*iptr && max_length > 1) {
960 c = 0;
961 if (offsetmap)
962 *offsetmap++ = (int)(iptr - (unsigned char*)input);
963
964 if (!(*iptr & 0x80))
965 c = *iptr++;
966 else if (!(*iptr & 0x20)) {
967 c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
968 iptr += 2;
969 } else if (!(*iptr & 0x10)) {
970 c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
971 iptr += 3;
972 } else if (!(*iptr & 0x08)) {
973 c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
974 iptr += 4;
975 }
976
977 *optr++ = c;
978 max_length--;
979 }
980 if (offsetmap)
981 *offsetmap = (int)(iptr - (unsigned char*)input);
982 *optr = 0;
983 return (int)(optr - output);
984 }
985
986 static int copy_char8_to_char32(const char *input, PCRE_UCHAR32 *output, int max_length)
987 {
988 unsigned char *iptr = (unsigned char*)input;
989 PCRE_UCHAR32 *optr = output;
990
991 if (max_length == 0)
992 return 0;
993
994 while (*iptr && max_length > 1) {
995 *optr++ = *iptr++;
996 max_length--;
997 }
998 *optr = '\0';
999 return (int)(optr - output);
1000 }
1001
1002 #define REGTEST_MAX_LENGTH32 4096
1003 static PCRE_UCHAR32 regtest_buf32[REGTEST_MAX_LENGTH32];
1004 static int regtest_offsetmap32[REGTEST_MAX_LENGTH32];
1005
1006 #endif /* SUPPORT_PCRE32 */
1007
1008 static int check_ascii(const char *input)
1009 {
1010 const unsigned char *ptr = (unsigned char *)input;
1011 while (*ptr) {
1012 if (*ptr > 127)
1013 return 0;
1014 ptr++;
1015 }
1016 return 1;
1017 }
1018
1019 static int regression_tests(void)
1020 {
1021 struct regression_test_case *current = regression_test_cases;
1022 const char *error;
1023 char *cpu_info;
1024 int i, err_offs;
1025 int is_successful, is_ascii_pattern, is_ascii_input;
1026 int total = 0;
1027 int successful = 0;
1028 int successful_row = 0;
1029 int counter = 0;
1030 int study_mode;
1031 int utf = 0, ucp = 0;
1032 int disabled_flags = 0;
1033 #ifdef SUPPORT_PCRE8
1034 pcre *re8;
1035 pcre_extra *extra8;
1036 pcre_extra dummy_extra8;
1037 int ovector8_1[32];
1038 int ovector8_2[32];
1039 int return_value8[2];
1040 unsigned char *mark8_1, *mark8_2;
1041 #endif
1042 #ifdef SUPPORT_PCRE16
1043 pcre16 *re16;
1044 pcre16_extra *extra16;
1045 pcre16_extra dummy_extra16;
1046 int ovector16_1[32];
1047 int ovector16_2[32];
1048 int return_value16[2];
1049 PCRE_UCHAR16 *mark16_1, *mark16_2;
1050 int length16;
1051 #endif
1052 #ifdef SUPPORT_PCRE32
1053 pcre32 *re32;
1054 pcre32_extra *extra32;
1055 pcre32_extra dummy_extra32;
1056 int ovector32_1[32];
1057 int ovector32_2[32];
1058 int return_value32[2];
1059 PCRE_UCHAR32 *mark32_1, *mark32_2;
1060 int length32;
1061 #endif
1062
1063 /* This test compares the behaviour of interpreter and JIT. Although disabling
1064 utf or ucp may make tests fail, if the pcre_exec result is the SAME, it is
1065 still considered successful from pcre_jit_test point of view. */
1066
1067 #if defined SUPPORT_PCRE8
1068 pcre_config(PCRE_CONFIG_JITTARGET, &cpu_info);
1069 #elif defined SUPPORT_PCRE16
1070 pcre16_config(PCRE_CONFIG_JITTARGET, &cpu_info);
1071 #elif defined SUPPORT_PCRE32
1072 pcre32_config(PCRE_CONFIG_JITTARGET, &cpu_info);
1073 #endif
1074
1075 printf("Running JIT regression tests\n");
1076 printf(" target CPU of SLJIT compiler: %s\n", cpu_info);
1077
1078 #if defined SUPPORT_PCRE8
1079 pcre_config(PCRE_CONFIG_UTF8, &utf);
1080 pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
1081 #elif defined SUPPORT_PCRE16
1082 pcre16_config(PCRE_CONFIG_UTF16, &utf);
1083 pcre16_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
1084 #elif defined SUPPORT_PCRE16
1085 pcre32_config(PCRE_CONFIG_UTF32, &utf);
1086 pcre32_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
1087 #endif
1088
1089 if (!utf)
1090 disabled_flags |= PCRE_UTF8 | PCRE_UTF16 | PCRE_UTF32;
1091 if (!ucp)
1092 disabled_flags |= PCRE_UCP;
1093 #ifdef SUPPORT_PCRE8
1094 printf(" in 8 bit mode with UTF-8 %s and ucp %s:\n", utf ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
1095 #endif
1096 #ifdef SUPPORT_PCRE16
1097 printf(" in 16 bit mode with UTF-16 %s and ucp %s:\n", utf ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
1098 #endif
1099 #ifdef SUPPORT_PCRE32
1100 printf(" in 32 bit mode with UTF-32 %s and ucp %s:\n", utf ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
1101 #endif
1102
1103 while (current->pattern) {
1104 /* printf("\nPattern: %s :\n", current->pattern); */
1105 total++;
1106 if (current->start_offset & F_PROPERTY) {
1107 is_ascii_pattern = 0;
1108 is_ascii_input = 0;
1109 } else {
1110 is_ascii_pattern = check_ascii(current->pattern);
1111 is_ascii_input = check_ascii(current->input);
1112 }
1113
1114 if (current->flags & PCRE_PARTIAL_SOFT)
1115 study_mode = PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE;
1116 else if (current->flags & PCRE_PARTIAL_HARD)
1117 study_mode = PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE;
1118 else
1119 study_mode = PCRE_STUDY_JIT_COMPILE;
1120 error = NULL;
1121 #ifdef SUPPORT_PCRE8
1122 re8 = NULL;
1123 if (!(current->start_offset & F_NO8))
1124 re8 = pcre_compile(current->pattern,
1125 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags),
1126 &error, &err_offs, tables(0));
1127
1128 extra8 = NULL;
1129 if (re8) {
1130 error = NULL;
1131 extra8 = pcre_study(re8, study_mode, &error);
1132 if (!extra8) {
1133 printf("\n8 bit: Cannot study pattern: %s\n", current->pattern);
1134 pcre_free(re8);
1135 re8 = NULL;
1136 }
1137 else if (!(extra8->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
1138 printf("\n8 bit: JIT compiler does not support: %s\n", current->pattern);
1139 pcre_free_study(extra8);
1140 pcre_free(re8);
1141 re8 = NULL;
1142 }
1143 extra8->flags |= PCRE_EXTRA_MARK;
1144 } else if (((utf && ucp) || is_ascii_pattern) && !(current->start_offset & F_NO8))
1145 printf("\n8 bit: Cannot compile pattern \"%s\": %s\n", current->pattern, error);
1146 #endif
1147 #ifdef SUPPORT_PCRE16
1148 if ((current->flags & PCRE_UTF16) || (current->start_offset & F_FORCECONV))
1149 convert_utf8_to_utf16(current->pattern, regtest_buf16, NULL, REGTEST_MAX_LENGTH16);
1150 else
1151 copy_char8_to_char16(current->pattern, regtest_buf16, REGTEST_MAX_LENGTH16);
1152
1153 re16 = NULL;
1154 if (!(current->start_offset & F_NO16))
1155 re16 = pcre16_compile(regtest_buf16,
1156 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags),
1157 &error, &err_offs, tables(0));
1158
1159 extra16 = NULL;
1160 if (re16) {
1161 error = NULL;
1162 extra16 = pcre16_study(re16, study_mode, &error);
1163 if (!extra16) {
1164 printf("\n16 bit: Cannot study pattern: %s\n", current->pattern);
1165 pcre16_free(re16);
1166 re16 = NULL;
1167 }
1168 else if (!(extra16->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
1169 printf("\n16 bit: JIT compiler does not support: %s\n", current->pattern);
1170 pcre16_free_study(extra16);
1171 pcre16_free(re16);
1172 re16 = NULL;
1173 }
1174 extra16->flags |= PCRE_EXTRA_MARK;
1175 } else if (((utf && ucp) || is_ascii_pattern) && !(current->start_offset & F_NO16))
1176 printf("\n16 bit: Cannot compile pattern \"%s\": %s\n", current->pattern, error);
1177 #endif
1178 #ifdef SUPPORT_PCRE32
1179 if ((current->flags & PCRE_UTF32) || (current->start_offset & F_FORCECONV))
1180 convert_utf8_to_utf32(current->pattern, regtest_buf32, NULL, REGTEST_MAX_LENGTH32);
1181 else
1182 copy_char8_to_char32(current->pattern, regtest_buf32, REGTEST_MAX_LENGTH32);
1183
1184 re32 = NULL;
1185 if (!(current->start_offset & F_NO32))
1186 re32 = pcre32_compile(regtest_buf32,
1187 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags),
1188 &error, &err_offs, tables(0));
1189
1190 extra32 = NULL;
1191 if (re32) {
1192 error = NULL;
1193 extra32 = pcre32_study(re32, study_mode, &error);
1194 if (!extra32) {
1195 printf("\n32 bit: Cannot study pattern: %s\n", current->pattern);
1196 pcre32_free(re32);
1197 re32 = NULL;
1198 }
1199 if (!(extra32->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
1200 printf("\n32 bit: JIT compiler does not support: %s\n", current->pattern);
1201 pcre32_free_study(extra32);
1202 pcre32_free(re32);
1203 re32 = NULL;
1204 }
1205 extra32->flags |= PCRE_EXTRA_MARK;
1206 } else if (((utf && ucp) || is_ascii_pattern) && !(current->start_offset & F_NO32))
1207 printf("\n32 bit: Cannot compile pattern \"%s\": %s\n", current->pattern, error);
1208 #endif
1209
1210 counter++;
1211 if ((counter & 0x3) != 0) {
1212 #ifdef SUPPORT_PCRE8
1213 setstack8(NULL);
1214 #endif
1215 #ifdef SUPPORT_PCRE16
1216 setstack16(NULL);
1217 #endif
1218 #ifdef SUPPORT_PCRE32
1219 setstack32(NULL);
1220 #endif
1221 }
1222
1223 #ifdef SUPPORT_PCRE8
1224 return_value8[0] = -1000;
1225 return_value8[1] = -1000;
1226 for (i = 0; i < 32; ++i)
1227 ovector8_1[i] = -2;
1228 for (i = 0; i < 32; ++i)
1229 ovector8_2[i] = -2;
1230 if (re8) {
1231 mark8_1 = NULL;
1232 mark8_2 = NULL;
1233 extra8->mark = &mark8_1;
1234
1235 if ((counter & 0x1) != 0) {
1236 setstack8(extra8);
1237 return_value8[0] = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1238 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_1, 32);
1239 } else
1240 return_value8[0] = pcre_jit_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1241 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_1, 32, getstack8());
1242 memset(&dummy_extra8, 0, sizeof(pcre_extra));
1243 dummy_extra8.flags = PCRE_EXTRA_MARK;
1244 if (current->start_offset & F_STUDY) {
1245 dummy_extra8.flags |= PCRE_EXTRA_STUDY_DATA;
1246 dummy_extra8.study_data = extra8->study_data;
1247 }
1248 dummy_extra8.mark = &mark8_2;
1249 return_value8[1] = pcre_exec(re8, &dummy_extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1250 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_2, 32);
1251 }
1252 #endif
1253
1254 #ifdef SUPPORT_PCRE16
1255 return_value16[0] = -1000;
1256 return_value16[1] = -1000;
1257 for (i = 0; i < 32; ++i)
1258 ovector16_1[i] = -2;
1259 for (i = 0; i < 32; ++i)
1260 ovector16_2[i] = -2;
1261 if (re16) {
1262 mark16_1 = NULL;
1263 mark16_2 = NULL;
1264 if ((current->flags & PCRE_UTF16) || (current->start_offset & F_FORCECONV))
1265 length16 = convert_utf8_to_utf16(current->input, regtest_buf16, regtest_offsetmap16, REGTEST_MAX_LENGTH16);
1266 else
1267 length16 = copy_char8_to_char16(current->input, regtest_buf16, REGTEST_MAX_LENGTH16);
1268 extra16->mark = &mark16_1;
1269 if ((counter & 0x1) != 0) {
1270 setstack16(extra16);
1271 return_value16[0] = pcre16_exec(re16, extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
1272 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_1, 32);
1273 } else
1274 return_value16[0] = pcre16_jit_exec(re16, extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
1275 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_1, 32, getstack16());
1276 memset(&dummy_extra16, 0, sizeof(pcre16_extra));
1277 dummy_extra16.flags = PCRE_EXTRA_MARK;
1278 if (current->start_offset & F_STUDY) {
1279 dummy_extra16.flags |= PCRE_EXTRA_STUDY_DATA;
1280 dummy_extra16.study_data = extra16->study_data;
1281 }
1282 dummy_extra16.mark = &mark16_2;
1283 return_value16[1] = pcre16_exec(re16, &dummy_extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
1284 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_2, 32);
1285 }
1286 #endif
1287
1288 #ifdef SUPPORT_PCRE32
1289 return_value32[0] = -1000;
1290 return_value32[1] = -1000;
1291 for (i = 0; i < 32; ++i)
1292 ovector32_1[i] = -2;
1293 for (i = 0; i < 32; ++i)
1294 ovector32_2[i] = -2;
1295 if (re32) {
1296 mark32_1 = NULL;
1297 mark32_2 = NULL;
1298 if ((current->flags & PCRE_UTF32) || (current->start_offset & F_FORCECONV))
1299 length32 = convert_utf8_to_utf32(current->input, regtest_buf32, regtest_offsetmap32, REGTEST_MAX_LENGTH32);
1300 else
1301 length32 = copy_char8_to_char32(current->input, regtest_buf32, REGTEST_MAX_LENGTH32);
1302 extra32->mark = &mark32_1;
1303 if ((counter & 0x1) != 0) {
1304 setstack32(extra32);
1305 return_value32[0] = pcre32_exec(re32, extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
1306 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector32_1, 32);
1307 } else
1308 return_value32[0] = pcre32_jit_exec(re32, extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
1309 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector32_1, 32, getstack32());
1310 memset(&dummy_extra32, 0, sizeof(pcre32_extra));
1311 dummy_extra32.flags = PCRE_EXTRA_MARK;
1312 if (current->start_offset & F_STUDY) {
1313 dummy_extra32.flags |= PCRE_EXTRA_STUDY_DATA;
1314 dummy_extra32.study_data = extra32->study_data;
1315 }
1316 dummy_extra32.mark = &mark32_2;
1317 return_value32[1] = pcre32_exec(re32, &dummy_extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
1318 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector32_2, 32);
1319 }
1320 #endif
1321
1322 /* printf("[%d-%d-%d|%d-%d|%d-%d|%d-%d]%s",
1323 return_value8[0], return_value16[0],
1324 ovector8_1[0], ovector8_1[1],
1325 ovector16_1[0], ovector16_1[1],
1326 ovector32_1[0], ovector32_1[1],
1327 (current->flags & PCRE_CASELESS) ? "C" : ""); */
1328
1329 /* If F_DIFF is set, just run the test, but do not compare the results.
1330 Segfaults can still be captured. */
1331
1332 is_successful = 1;
1333 if (!(current->start_offset & F_DIFF)) {
1334 #if defined SUPPORT_UTF && ((defined(SUPPORT_PCRE8) + defined(SUPPORT_PCRE16) + defined(SUPPORT_PCRE32)) >= 2)
1335 if (!(current->start_offset & F_FORCECONV)) {
1336 int return_value;
1337
1338 /* All results must be the same. */
1339 #ifdef SUPPORT_PCRE8
1340 if ((return_value = return_value8[0]) != return_value8[1]) {
1341 printf("\n8 bit: Return value differs(J8:%d,I8:%d): [%d] '%s' @ '%s'\n",
1342 return_value8[0], return_value8[1], total, current->pattern, current->input);
1343 is_successful = 0;
1344 } else
1345 #endif
1346 #ifdef SUPPORT_PCRE16
1347 if ((return_value = return_value16[0]) != return_value16[1]) {
1348 printf("\n16 bit: Return value differs(J16:%d,I16:%d): [%d] '%s' @ '%s'\n",
1349 return_value16[0], return_value16[1], total, current->pattern, current->input);
1350 is_successful = 0;
1351 } else
1352 #endif
1353 #ifdef SUPPORT_PCRE32
1354 if ((return_value = return_value32[0]) != return_value32[1]) {
1355 printf("\n32 bit: Return value differs(J32:%d,I32:%d): [%d] '%s' @ '%s'\n",
1356 return_value32[0], return_value32[1], total, current->pattern, current->input);
1357 is_successful = 0;
1358 } else
1359 #endif
1360 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1361 if (return_value8[0] != return_value16[0]) {
1362 printf("\n8 and 16 bit: Return value differs(J8:%d,J16:%d): [%d] '%s' @ '%s'\n",
1363 return_value8[0], return_value16[0],
1364 total, current->pattern, current->input);
1365 is_successful = 0;
1366 } else
1367 #endif
1368 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE32
1369 if (return_value8[0] != return_value32[0]) {
1370 printf("\n8 and 32 bit: Return value differs(J8:%d,J32:%d): [%d] '%s' @ '%s'\n",
1371 return_value8[0], return_value32[0],
1372 total, current->pattern, current->input);
1373 is_successful = 0;
1374 } else
1375 #endif
1376 #if defined SUPPORT_PCRE16 && defined SUPPORT_PCRE32
1377 if (return_value16[0] != return_value32[0]) {
1378 printf("\n16 and 32 bit: Return value differs(J16:%d,J32:%d): [%d] '%s' @ '%s'\n",
1379 return_value16[0], return_value32[0],
1380 total, current->pattern, current->input);
1381 is_successful = 0;
1382 } else
1383 #endif
1384 if (return_value >= 0 || return_value == PCRE_ERROR_PARTIAL) {
1385 if (return_value == PCRE_ERROR_PARTIAL) {
1386 return_value = 2;
1387 } else {
1388 return_value *= 2;
1389 }
1390 #ifdef SUPPORT_PCRE8
1391 return_value8[0] = return_value;
1392 #endif
1393 #ifdef SUPPORT_PCRE16
1394 return_value16[0] = return_value;
1395 #endif
1396 #ifdef SUPPORT_PCRE32
1397 return_value32[0] = return_value;
1398 #endif
1399 /* Transform back the results. */
1400 if (current->flags & PCRE_UTF8) {
1401 #ifdef SUPPORT_PCRE16
1402 for (i = 0; i < return_value; ++i) {
1403 if (ovector16_1[i] >= 0)
1404 ovector16_1[i] = regtest_offsetmap16[ovector16_1[i]];
1405 if (ovector16_2[i] >= 0)
1406 ovector16_2[i] = regtest_offsetmap16[ovector16_2[i]];
1407 }
1408 #endif
1409 #ifdef SUPPORT_PCRE32
1410 for (i = 0; i < return_value; ++i) {
1411 if (ovector32_1[i] >= 0)
1412 ovector32_1[i] = regtest_offsetmap32[ovector32_1[i]];
1413 if (ovector32_2[i] >= 0)
1414 ovector32_2[i] = regtest_offsetmap32[ovector32_2[i]];
1415 }
1416 #endif
1417 }
1418
1419 for (i = 0; i < return_value; ++i) {
1420 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1421 if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
1422 printf("\n8 and 16 bit: Ovector[%d] value differs(J8:%d,I8:%d,J16:%d,I16:%d): [%d] '%s' @ '%s' \n",
1423 i, ovector8_1[i], ovector8_2[i], ovector16_1[i], ovector16_2[i],
1424 total, current->pattern, current->input);
1425 is_successful = 0;
1426 }
1427 #endif
1428 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE32
1429 if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector32_1[i] || ovector8_1[i] != ovector32_2[i]) {
1430 printf("\n8 and 32 bit: Ovector[%d] value differs(J8:%d,I8:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
1431 i, ovector8_1[i], ovector8_2[i], ovector32_1[i], ovector32_2[i],
1432 total, current->pattern, current->input);
1433 is_successful = 0;
1434 }
1435 #endif
1436 #if defined SUPPORT_PCRE16 && defined SUPPORT_PCRE16
1437 if (ovector16_1[i] != ovector16_2[i] || ovector16_1[i] != ovector16_1[i] || ovector16_1[i] != ovector16_2[i]) {
1438 printf("\n16 and 16 bit: Ovector[%d] value differs(J16:%d,I16:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
1439 i, ovector16_1[i], ovector16_2[i], ovector16_1[i], ovector16_2[i],
1440 total, current->pattern, current->input);
1441 is_successful = 0;
1442 }
1443 #endif
1444 }
1445 }
1446 } else
1447 #endif /* more than one of SUPPORT_PCRE8, SUPPORT_PCRE16 and SUPPORT_PCRE32 */
1448 {
1449 /* Only the 8 bit and 16 bit results must be equal. */
1450 #ifdef SUPPORT_PCRE8
1451 if (return_value8[0] != return_value8[1]) {
1452 printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1453 return_value8[0], return_value8[1], total, current->pattern, current->input);
1454 is_successful = 0;
1455 } else if (return_value8[0] >= 0 || return_value8[0] == PCRE_ERROR_PARTIAL) {
1456 if (return_value8[0] == PCRE_ERROR_PARTIAL)
1457 return_value8[0] = 2;
1458 else
1459 return_value8[0] *= 2;
1460
1461 for (i = 0; i < return_value8[0]; ++i)
1462 if (ovector8_1[i] != ovector8_2[i]) {
1463 printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1464 i, ovector8_1[i], ovector8_2[i], total, current->pattern, current->input);
1465 is_successful = 0;
1466 }
1467 }
1468 #endif
1469
1470 #ifdef SUPPORT_PCRE16
1471 if (return_value16[0] != return_value16[1]) {
1472 printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1473 return_value16[0], return_value16[1], total, current->pattern, current->input);
1474 is_successful = 0;
1475 } else if (return_value16[0] >= 0 || return_value16[0] == PCRE_ERROR_PARTIAL) {
1476 if (return_value16[0] == PCRE_ERROR_PARTIAL)
1477 return_value16[0] = 2;
1478 else
1479 return_value16[0] *= 2;
1480
1481 for (i = 0; i < return_value16[0]; ++i)
1482 if (ovector16_1[i] != ovector16_2[i]) {
1483 printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1484 i, ovector16_1[i], ovector16_2[i], total, current->pattern, current->input);
1485 is_successful = 0;
1486 }
1487 }
1488 #endif
1489
1490 #ifdef SUPPORT_PCRE32
1491 if (return_value32[0] != return_value32[1]) {
1492 printf("\n32 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1493 return_value32[0], return_value32[1], total, current->pattern, current->input);
1494 is_successful = 0;
1495 } else if (return_value32[0] >= 0 || return_value32[0] == PCRE_ERROR_PARTIAL) {
1496 if (return_value32[0] == PCRE_ERROR_PARTIAL)
1497 return_value32[0] = 2;
1498 else
1499 return_value32[0] *= 2;
1500
1501 for (i = 0; i < return_value32[0]; ++i)
1502 if (ovector32_1[i] != ovector32_2[i]) {
1503 printf("\n32 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1504 i, ovector32_1[i], ovector32_2[i], total, current->pattern, current->input);
1505 is_successful = 0;
1506 }
1507 }
1508 #endif
1509 }
1510 }
1511
1512 if (is_successful) {
1513 #ifdef SUPPORT_PCRE8
1514 if (!(current->start_offset & F_NO8) && ((utf && ucp) || is_ascii_input)) {
1515 if (return_value8[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1516 printf("8 bit: Test should match: [%d] '%s' @ '%s'\n",
1517 total, current->pattern, current->input);
1518 is_successful = 0;
1519 }
1520
1521 if (return_value8[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1522 printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n",
1523 total, current->pattern, current->input);
1524 is_successful = 0;
1525 }
1526 }
1527 #endif
1528 #ifdef SUPPORT_PCRE16
1529 if (!(current->start_offset & F_NO16) && ((utf && ucp) || is_ascii_input)) {
1530 if (return_value16[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1531 printf("16 bit: Test should match: [%d] '%s' @ '%s'\n",
1532 total, current->pattern, current->input);
1533 is_successful = 0;
1534 }
1535
1536 if (return_value16[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1537 printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n",
1538 total, current->pattern, current->input);
1539 is_successful = 0;
1540 }
1541 }
1542 #endif
1543 #ifdef SUPPORT_PCRE32
1544 if (!(current->start_offset & F_NO32) && ((utf && ucp) || is_ascii_input)) {
1545 if (return_value32[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1546 printf("32 bit: Test should match: [%d] '%s' @ '%s'\n",
1547 total, current->pattern, current->input);
1548 is_successful = 0;
1549 }
1550
1551 if (return_value32[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1552 printf("32 bit: Test should not match: [%d] '%s' @ '%s'\n",
1553 total, current->pattern, current->input);
1554 is_successful = 0;
1555 }
1556 }
1557 #endif
1558 }
1559
1560 if (is_successful) {
1561 #ifdef SUPPORT_PCRE8
1562 if (mark8_1 != mark8_2) {
1563 printf("8 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1564 total, current->pattern, current->input);
1565 is_successful = 0;
1566 }
1567 #endif
1568 #ifdef SUPPORT_PCRE16
1569 if (mark16_1 != mark16_2) {
1570 printf("16 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1571 total, current->pattern, current->input);
1572 is_successful = 0;
1573 }
1574 #endif
1575 #ifdef SUPPORT_PCRE32
1576 if (mark32_1 != mark32_2) {
1577 printf("32 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1578 total, current->pattern, current->input);
1579 is_successful = 0;
1580 }
1581 #endif
1582 }
1583
1584 #ifdef SUPPORT_PCRE8
1585 if (re8) {
1586 pcre_free_study(extra8);
1587 pcre_free(re8);
1588 }
1589 #endif
1590 #ifdef SUPPORT_PCRE16
1591 if (re16) {
1592 pcre16_free_study(extra16);
1593 pcre16_free(re16);
1594 }
1595 #endif
1596 #ifdef SUPPORT_PCRE32
1597 if (re32) {
1598 pcre32_free_study(extra32);
1599 pcre32_free(re32);
1600 }
1601 #endif
1602
1603 if (is_successful) {
1604 successful++;
1605 successful_row++;
1606 printf(".");
1607 if (successful_row >= 60) {
1608 successful_row = 0;
1609 printf("\n");
1610 }
1611 } else
1612 successful_row = 0;
1613
1614 fflush(stdout);
1615 current++;
1616 }
1617 tables(1);
1618 #ifdef SUPPORT_PCRE8
1619 setstack8(NULL);
1620 #endif
1621 #ifdef SUPPORT_PCRE16
1622 setstack16(NULL);
1623 #endif
1624 #ifdef SUPPORT_PCRE32
1625 setstack32(NULL);
1626 #endif
1627
1628 if (total == successful) {
1629 printf("\nAll JIT regression tests are successfully passed.\n");
1630 return 0;
1631 } else {
1632 printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
1633 return 1;
1634 }
1635 }
1636
1637 /* End of pcre_jit_test.c */

  ViewVC Help
Powered by ViewVC 1.1.5