/[pcre]/code/trunk/pcre_jit_test.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_test.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1112 - (show annotations)
Tue Oct 16 15:57:09 2012 UTC (6 years, 11 months ago) by chpe
File MIME type: text/plain
File size: 69052 byte(s)
pcre32: Fix indentation
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Main Library written by Philip Hazel
9 Copyright (c) 1997-2012 University of Cambridge
10
11 This JIT compiler regression test program was written by Zoltan Herczeg
12 Copyright (c) 2010-2012
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include <stdio.h>
48 #include <string.h>
49 #include "pcre.h"
50
51
52 #include "pcre_internal.h"
53
54 #define PCRE_BUG 0x80000000
55
56 /*
57 Letter characters:
58 \xe6\x92\xad = 0x64ad = 25773 (kanji)
59 Non-letter characters:
60 \xc2\xa1 = 0xa1 = (Inverted Exclamation Mark)
61 \xf3\xa9\xb7\x80 = 0xe9dc0 = 957888
62 \xed\xa0\x80 = 55296 = 0xd800 (Invalid UTF character)
63 \xed\xb0\x80 = 56320 = 0xdc00 (Invalid UTF character)
64 Newlines:
65 \xc2\x85 = 0x85 = 133 (NExt Line = NEL)
66 \xe2\x80\xa8 = 0x2028 = 8232 (Line Separator)
67 Othercase pairs:
68 \xc3\xa9 = 0xe9 = 233 (e')
69 \xc3\x89 = 0xc9 = 201 (E')
70 \xc3\xa1 = 0xe1 = 225 (a')
71 \xc3\x81 = 0xc1 = 193 (A')
72 \xc8\xba = 0x23a = 570
73 \xe2\xb1\xa5 = 0x2c65 = 11365
74 \xe1\xbd\xb8 = 0x1f78 = 8056
75 \xe1\xbf\xb8 = 0x1ff8 = 8184
76 \xf0\x90\x90\x80 = 0x10400 = 66560
77 \xf0\x90\x90\xa8 = 0x10428 = 66600
78 Mark property:
79 \xcc\x8d = 0x30d = 781
80 Special:
81 \xdf\xbf = 0x7ff = 2047 (highest 2 byte character)
82 \xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character)
83 \xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character)
84 \xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character)
85 \xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character)
86 */
87
88 static int regression_tests(void);
89
90 int main(void)
91 {
92 int jit = 0;
93 #if defined SUPPORT_PCRE8
94 pcre_config(PCRE_CONFIG_JIT, &jit);
95 #elif defined SUPPORT_PCRE16
96 pcre16_config(PCRE_CONFIG_JIT, &jit);
97 #elif defined SUPPORT_PCRE32
98 pcre32_config(PCRE_CONFIG_JIT, &jit);
99 #endif
100 if (!jit) {
101 printf("JIT must be enabled to run pcre_jit_test\n");
102 return 1;
103 }
104 return regression_tests();
105 }
106
107 /* --------------------------------------------------------------------------------------- */
108
109 #if !(defined SUPPORT_PCRE8) && !(defined SUPPORT_PCRE16) && !(defined SUPPORT_PCRE32)
110 #error SUPPORT_PCRE8 or SUPPORT_PCRE16 or SUPPORT_PCRE32 must be defined
111 #endif
112
113 #define MUA (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
114 #define MUAP (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
115 #define CMUA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
116 #define CMUAP (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
117 #define MA (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
118 #define MAP (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
119 #define CMA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
120
121 #define OFFSET_MASK 0x00ffff
122 #define F_NO8 0x010000
123 #define F_NO16 0x020000
124 #define F_NO32 0x020000
125 #define F_NOMATCH 0x040000
126 #define F_DIFF 0x080000
127 #define F_FORCECONV 0x100000
128 #define F_PROPERTY 0x200000
129
130 struct regression_test_case {
131 int flags;
132 int start_offset;
133 const char *pattern;
134 const char *input;
135 };
136
137 static struct regression_test_case regression_test_cases[] = {
138 /* Constant strings. */
139 { MUA, 0, "AbC", "AbAbC" },
140 { MUA, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
141 { CMUA, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
142 { MA, 0, "[^a]", "aAbB" },
143 { CMA, 0, "[^m]", "mMnN" },
144 { MA, 0, "a[^b][^#]", "abacd" },
145 { CMA, 0, "A[^B][^E]", "abacd" },
146 { CMUA, 0, "[^x][^#]", "XxBll" },
147 { MUA, 0, "[^a]", "aaa\xc3\xa1#Ab" },
148 { CMUA, 0, "[^A]", "aA\xe6\x92\xad" },
149 { MUA, 0, "\\W(\\W)?\\w", "\r\n+bc" },
150 { MUA, 0, "\\W(\\W)?\\w", "\n\r+bc" },
151 { MUA, 0, "\\W(\\W)?\\w", "\r\r+bc" },
152 { MUA, 0, "\\W(\\W)?\\w", "\n\n+bc" },
153 { MUA, 0, "[axd]", "sAXd" },
154 { CMUA, 0, "[axd]", "sAXd" },
155 { CMUA, 0 | F_NOMATCH, "[^axd]", "DxA" },
156 { MUA, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
157 { MUA, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
158 { CMUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
159 { MUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
160 { MUA, 0, "[^a]", "\xc2\x80[]" },
161 { CMUA, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
162 { CMA, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
163 { PCRE_CASELESS, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
164 { PCRE_CASELESS, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
165 { PCRE_CASELESS, 0, "a1", "Aa1" },
166 { MA, 0, "\\Ca", "cda" },
167 { CMA, 0, "\\Ca", "CDA" },
168 { MA, 0 | F_NOMATCH, "\\Cx", "cda" },
169 { CMA, 0 | F_NOMATCH, "\\Cx", "CDA" },
170 { CMUAP, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
171 { CMUAP, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
172 { CMUAP, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
173 { CMUAP, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
174
175 /* Assertions. */
176 { MUA, 0, "\\b[^A]", "A_B#" },
177 { MA, 0 | F_NOMATCH, "\\b\\W", "\n*" },
178 { MUA, 0, "\\B[^,]\\b[^s]\\b", "#X" },
179 { MAP, 0, "\\B", "_\xa1" },
180 { MAP, 0, "\\b_\\b[,A]\\B", "_," },
181 { MUAP, 0, "\\b", "\xe6\x92\xad!" },
182 { MUAP, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
183 { MUAP, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
184 { MUAP, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
185 { MUA, 0 | F_NOMATCH, "\\b.", "\xcd\xbe" },
186 { CMUAP, 0, "\\By", "\xf0\x90\x90\xa8y" },
187 { MA, 0 | F_NOMATCH, "\\R^", "\n" },
188 { MA, 1 | F_NOMATCH, "^", "\n" },
189 { 0, 0, "^ab", "ab" },
190 { 0, 0 | F_NOMATCH, "^ab", "aab" },
191 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "^a", "\r\raa\n\naa\r\naa" },
192 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
193 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^-", "a--b--\x85--" },
194 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xe2\x80\xa8--" },
195 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xc2\x85--" },
196 { 0, 0, "ab$", "ab" },
197 { 0, 0 | F_NOMATCH, "ab$", "abab\n\n" },
198 { PCRE_DOLLAR_ENDONLY, 0 | F_NOMATCH, "ab$", "abab\r\n" },
199 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "a$", "\r\raa\n\naa\r\naa" },
200 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aaa" },
201 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "#$", "#\xc2\x85###\r#" },
202 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "#$", "#\xe2\x80\xa9" },
203 { PCRE_NOTBOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "^a", "aa\naa" },
204 { PCRE_NOTBOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },
205 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\naa" },
206 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\r\n" },
207 { PCRE_UTF8 | PCRE_DOLLAR_ENDONLY | PCRE_NEWLINE_ANY, 0 | F_PROPERTY, "\\p{Any}{2,}$", "aa\r\n" },
208 { PCRE_NOTEOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },
209 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa" },
210 { PCRE_NEWLINE_CR | PCRE_UTF8, 0, "a\\Z", "aaa\r" },
211 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa\n" },
212 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r" },
213 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
214 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r\n" },
215 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
216 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
217 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
218 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
219 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
220 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
221 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
222 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
223 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
224 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xc2\x85" },
225 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
226 { MA, 0, "\\Aa", "aaa" },
227 { MA, 1 | F_NOMATCH, "\\Aa", "aaa" },
228 { MA, 1, "\\Ga", "aaa" },
229 { MA, 1 | F_NOMATCH, "\\Ga", "aba" },
230 { MA, 0, "a\\z", "aaa" },
231 { MA, 0 | F_NOMATCH, "a\\z", "aab" },
232
233 /* Brackets. */
234 { MUA, 0, "(ab|bb|cd)", "bacde" },
235 { MUA, 0, "(?:ab|a)(bc|c)", "ababc" },
236 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
237 { CMUA, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
238 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
239 { MUA, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
240
241 /* Greedy and non-greedy ? operators. */
242 { MUA, 0, "(?:a)?a", "laab" },
243 { CMUA, 0, "(A)?A", "llaab" },
244 { MUA, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
245 { MUA, 0, "(a)?a", "manm" },
246 { CMUA, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
247 { MUA, 0, "(a|b)?\?d((?:e)?)", "abcde" },
248 { MUA, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
249
250 /* Greedy and non-greedy + operators */
251 { MUA, 0, "(aa)+aa", "aaaaaaa" },
252 { MUA, 0, "(aa)+?aa", "aaaaaaa" },
253 { MUA, 0, "(?:aba|ab|a)+l", "ababamababal" },
254 { MUA, 0, "(?:aba|ab|a)+?l", "ababamababal" },
255 { MUA, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
256 { MUA, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
257 { MUA, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
258
259 /* Greedy and non-greedy * operators */
260 { CMUA, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
261 { MUA, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
262 { MUA, 0, "(aa|ab)*ab", "aaabaaab" },
263 { CMUA, 0, "(aa|Ab)*?aB", "aaabaaab" },
264 { MUA, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
265 { MUA, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
266 { MA, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
267 { MA, 0, "((?:a|)*){0}a", "a" },
268
269 /* Combining ? + * operators */
270 { MUA, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
271 { MUA, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
272 { MUA, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
273 { MUA, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
274 { MUA, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
275
276 /* Single character iterators. */
277 { MUA, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
278 { MUA, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
279 { MUA, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
280 { MUA, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
281 { MUA, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
282 { MUA, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
283 { MUA, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
284 { MUA, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
285 { MUA, 0, "(ba{2})+c", "baabaaabacbaabaac" },
286 { MUA, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
287 { MUA, 0, "(a?+[^b])+", "babaacacb" },
288 { MUA, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
289 { CMUA, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
290 { CMUA, 0, "[c-f]+k", "DemmFke" },
291 { MUA, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
292 { MUA, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
293 { CMUA, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
294 { CMUA, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
295 { CMUA, 0, "[ace]{3,}", "AcbDAcEEcEd" },
296 { CMUA, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
297 { MUA, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
298 { CMUA, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
299 { MUA, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
300 { MUA, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
301 { MUA, 0, "\\b\\w+\\B", "x,a_cd" },
302 { MUAP, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
303 { CMUA, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
304 { CMUAP, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" },
305 { CMUA, 0, "[^\xf0\x90\x90\x80]{2,4}@", "\xf0\x90\x90\xa8\xf0\x90\x90\x80###\xf0\x90\x90\x80@@@" },
306 { CMUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
307 { MUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
308 { MUA, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" },
309
310 /* Basic character sets. */
311 { MUA, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
312 { MUA, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
313 { MUA, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
314 { MUA, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
315 { MUA, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
316 { MUA, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
317
318 /* Unicode properties. */
319 { MUAP, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
320 { MUAP, 0 | F_PROPERTY, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
321 { MUAP, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
322 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}]", "abc" },
323 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}]", "abc" },
324 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
325 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
326 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
327 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
328 { MUAP, 0 | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
329 { MUAP, 0 | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
330 { MUAP, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
331 { CMUAP, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
332 { MUAP, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
333 { MUAP, 0 | F_PROPERTY, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
334 { MUA, 0 | F_PROPERTY, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
335 { CMUAP, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
336 { MUAP, 0 | F_PROPERTY, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
337 { MUAP, 0 | F_PROPERTY, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
338 { PCRE_UCP, 0, "[a-b\\s]{2,5}[^a]", "AB baaa" },
339
340 /* Possible empty brackets. */
341 { MUA, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
342 { MUA, 0, "(|ab||bc|a)+d", "abcxabcabd" },
343 { MUA, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
344 { MUA, 0, "(|ab||bc|a)*d", "abcxabcabd" },
345 { MUA, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
346 { MUA, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
347 { MUA, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
348 { MUA, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
349 { MUA, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
350 { MUA, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
351
352 /* Start offset. */
353 { MUA, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
354 { MUA, 4 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
355 { MUA, 2 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
356 { MUA, 1, "(\\w\\W\\w)+", "ab#d" },
357
358 /* Newline. */
359 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
360 { PCRE_MULTILINE | PCRE_NEWLINE_CR, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
361 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{1,3}[^#]", "\r\n##...." },
362
363 /* Any character except newline or any newline. */
364 { PCRE_NEWLINE_CRLF, 0, ".", "\r" },
365 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
366 { PCRE_NEWLINE_ANYCRLF, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
367 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
368 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
369 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0 | F_NOMATCH, ".(.).", "\xe2\x80\xa8\nb\r" },
370 { PCRE_NEWLINE_ANY, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
371 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
372 { PCRE_BSR_ANYCRLF, 0, "\\R", "\r" },
373 { PCRE_BSR_ANYCRLF, 0, "\\R", "\x85#\r\n#" },
374 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\xe2\x80\xa8#c" },
375 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\r\nc" },
376 { PCRE_NEWLINE_CRLF | PCRE_BSR_UNICODE | PCRE_UTF8, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
377 { MUA, 0 | F_NOMATCH, "\\R+", "ab" },
378 { MUA, 0, "\\R+", "ab\r\n\r" },
379 { MUA, 0, "\\R*", "ab\r\n\r" },
380 { MUA, 0, "\\R*", "\r\n\r" },
381 { MUA, 0, "\\R{2,4}", "\r\nab\r\r" },
382 { MUA, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
383 { MUA, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
384 { MUA, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
385 { MUA, 0 | F_NOMATCH, "\\R+\\R\\R", "\r\n\r\n" },
386 { MUA, 0, "\\R+\\R\\R", "\r\r\r" },
387 { MUA, 0, "\\R*\\R\\R", "\n\r" },
388 { MUA, 0 | F_NOMATCH, "\\R{2,4}\\R\\R", "\r\r\r" },
389 { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
390
391 /* Atomic groups (no fallback from "next" direction). */
392 { MUA, 0 | F_NOMATCH, "(?>ab)ab", "bab" },
393 { MUA, 0 | F_NOMATCH, "(?>(ab))ab", "bab" },
394 { MUA, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
395 "bababcdedefgheijijklmlmnop" },
396 { MUA, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
397 { MUA, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
398 { MUA, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
399 { MUA, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
400 { MUA, 0, "((?>a|)+?)b", "aaacaaab" },
401 { MUA, 0, "(?>x|)*$", "aaa" },
402 { MUA, 0, "(?>(x)|)*$", "aaa" },
403 { MUA, 0, "(?>x|())*$", "aaa" },
404 { MUA, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
405 { MUA, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
406 { MUA, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
407 { MUA, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
408 { MUA, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
409 { MUA, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
410 { MUA, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
411 { MUA, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
412 { MUA, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
413 { MUA, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
414 { MUA, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
415 { MUA, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
416 { MUA, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
417 { MUA, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
418 { CMA, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
419 { MUA, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
420 { MUA, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
421 { MUA, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d" },
422 { MUA, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
423 { MUA, 0 | F_PROPERTY, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
424 { MUA, 0 | F_PROPERTY, "\\X{2,4}", "abcdef" },
425 { MUA, 0 | F_PROPERTY, "\\X{2,4}?", "abcdef" },
426 { MUA, 0 | F_NOMATCH | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d##" },
427 { MUA, 0 | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
428 { MUA, 0, "(c(ab)?+ab)+", "cabcababcab" },
429 { MUA, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
430
431 /* Possessive quantifiers. */
432 { MUA, 0, "(?:a|b)++m", "mababbaaxababbaam" },
433 { MUA, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
434 { MUA, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
435 { MUA, 0, "(a|b)++m", "mababbaaxababbaam" },
436 { MUA, 0, "(a|b)*+m", "mababbaaxababbaam" },
437 { MUA, 0, "(a|b)*+m", "ababbaaxababbaam" },
438 { MUA, 0, "(a|b(*ACCEPT))++m", "maaxab" },
439 { MUA, 0, "(?:b*)++m", "bxbbxbbbxm" },
440 { MUA, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
441 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxm" },
442 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
443 { MUA, 0, "(b*)++m", "bxbbxbbbxm" },
444 { MUA, 0, "(b*)++m", "bxbbxbbbxbbm" },
445 { MUA, 0, "(b*)*+m", "bxbbxbbbxm" },
446 { MUA, 0, "(b*)*+m", "bxbbxbbbxbbm" },
447 { MUA, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
448 { MUA, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
449 { MUA, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
450 { MUA, 0, "(a|(b))++m", "mababbaaxababbaam" },
451 { MUA, 0, "((a)|b)*+m", "mababbaaxababbaam" },
452 { MUA, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
453 { MUA, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
454 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxm" },
455 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
456 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
457 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
458 { MUA, 0, "((b*))++m", "bxbbxbbbxm" },
459 { MUA, 0, "((b*))++m", "bxbbxbbbxbbm" },
460 { MUA, 0, "((b*))*+m", "bxbbxbbbxm" },
461 { MUA, 0, "((b*))*+m", "bxbbxbbbxbbm" },
462 { MUA, 0 | F_NOMATCH, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
463 { MUA, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
464 { MUA, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
465 { MUA, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
466 { MUA, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
467
468 /* Back references. */
469 { MUA, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
470 { CMUA, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
471 { CMA, 0, "(a{2,4})\\1", "AaAaaAaA" },
472 { MUA, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
473 { MUA, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
474 { MUA, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
475 { MUA, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
476 { MUA, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
477 { MUA, 0, "(?:(aa)|b)\\1?b", "bb" },
478 { CMUA, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
479 { MUA, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
480 { CMUA, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
481 { MUA, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
482 { CMA, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
483 { MUA, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
484 { MUA, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
485 { MA, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
486 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
487 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{0,2}", "wwwww." },
488 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwww" },
489 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwwww" },
490 { PCRE_UCP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
491 { CMUAP, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" },
492
493 /* Assertions. */
494 { MUA, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
495 { MUA, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
496 { MUA, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
497 { MUA, 0, "(?<=aaa|aa|a)a", "aaa" },
498 { MUA, 2, "(?<=aaa|aa|a)a", "aaa" },
499 { MA, 0, "(?<=aaa|aa|a)a", "aaa" },
500 { MA, 2, "(?<=aaa|aa|a)a", "aaa" },
501 { MUA, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
502 { MUA, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
503 { MUA, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
504 { MUA, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
505 { MUA, 0, "((?(?=(a))a)+k)", "bbak" },
506 { MUA, 0, "((?(?=a)a)+k)", "bbak" },
507 { MUA, 0 | F_NOMATCH, "(?=(?>(a))m)amk", "a k" },
508 { MUA, 0 | F_NOMATCH, "(?!(?>(a))m)amk", "a k" },
509 { MUA, 0 | F_NOMATCH, "(?>(?=(a))am)amk", "a k" },
510 { MUA, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
511 { MUA, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
512 { MUA, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
513 { MUA, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
514 { MUA, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
515 { MUA, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
516 { MUA, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
517 { MUA, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
518
519 /* Not empty, ACCEPT, FAIL */
520 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*", "bcx" },
521 { MUA | PCRE_NOTEMPTY, 0, "a*", "bcaad" },
522 { MUA | PCRE_NOTEMPTY, 0, "a*?", "bcaad" },
523 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
524 { MUA, 0, "a(*ACCEPT)b", "ab" },
525 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*(*ACCEPT)b", "bcx" },
526 { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
527 { MUA | PCRE_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
528 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "(?:z|a*(*ACCEPT)b)", "bcx" },
529 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
530 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
531 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
532 { MUA | PCRE_NOTEMPTY_ATSTART, 0 | F_NOMATCH, "a*(*ACCEPT)b", "" },
533 { MUA, 0, "((a(*ACCEPT)b))", "ab" },
534 { MUA, 0, "(a(*FAIL)a|a)", "aaa" },
535 { MUA, 0, "(?=ab(*ACCEPT)b)a", "ab" },
536 { MUA, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
537 { MUA, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
538 { MUA | PCRE_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
539
540 /* Conditional blocks. */
541 { MUA, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
542 { MUA, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
543 { MUA, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
544 { MUA, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
545 { MUA, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
546 { MUA, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
547 { MUA, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
548 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
549 { MUA, 0 | F_DIFF, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
550 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
551 { MUA, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
552 { MUA, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
553 { MUA, 0, "(?(?=a)ab)", "a" },
554 { MUA, 0, "(?(?<!b)c)", "b" },
555 { MUA, 0, "(?(DEFINE)a(b))", "a" },
556 { MUA, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
557 { MUA, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
558 { MUA, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
559 { MUA, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
560 { MUA, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
561 { MUA, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
562 { MUA, 0, "(c)?\?(?(1)a|b)", "cbb" },
563 { MUA, 0 | F_DIFF, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
564 { MUA, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
565 { MUA, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
566 { MUA, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
567 { MUA, 0 | F_DIFF, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
568 { MUA, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
569 { MUA, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
570 { MUA, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
571 { MUA, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
572 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)*l", "bc ddd abccabccl" },
573 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+?dd", "bcabcacdb bdddd" },
574 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+l", "ababccddabdbccd abcccl" },
575
576 /* Set start of match. */
577 { MUA, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
578 { MUA, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
579 { MUA, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
580 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a\\K(*ACCEPT)b", "aa" },
581 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
582
583 /* First line. */
584 { MUA | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}a", "bb\naaa" },
585 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}a", "bb\r\naaa" },
586 { MUA | PCRE_FIRSTLINE, 0, "(?<=a)", "a" },
587 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[^a][^b]", "ab" },
588 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "a", "\na" },
589 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[abc]", "\na" },
590 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^a", "\na" },
591 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^(?<=\n)", "\na" },
592 { MUA | PCRE_FIRSTLINE, 0, "\xf0\x90\x90\x80", "\xf0\x90\x90\x80" },
593 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\xc2\x85#" },
594 { PCRE_MULTILINE | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\x85#" },
595 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^#", "\xe2\x80\xa8#" },
596 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}", "\r\na" },
597 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, ".", "\r" },
598 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "a", "\ra" },
599 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH, "ba", "bbb\r\nba" },
600 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}{4}|a", "\r\na" },
601 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 1, ".", "\r\n" },
602 { PCRE_FIRSTLINE | PCRE_NEWLINE_LF | PCRE_DOTALL, 0 | F_NOMATCH, "ab.", "ab" },
603
604 /* Recurse. */
605 { MUA, 0, "(a)(?1)", "aa" },
606 { MUA, 0, "((a))(?1)", "aa" },
607 { MUA, 0, "(b|a)(?1)", "aa" },
608 { MUA, 0, "(b|(a))(?1)", "aa" },
609 { MUA, 0 | F_NOMATCH, "((a)(b)(?:a*))(?1)", "aba" },
610 { MUA, 0, "((a)(b)(?:a*))(?1)", "abab" },
611 { MUA, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
612 { MUA, 0, "((?2)b|(a)){2}(?1)", "aabab" },
613 { MUA, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
614 { MUA, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
615 { MUA, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
616 { MUA, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
617 { MUA, 0 | F_NOMATCH, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
618 { MUA, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
619 { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
620 { MUA, 0 | F_NOMATCH, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
621 { MUA, 0, "b|<(?R)*>", "<<b>" },
622 { MUA, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
623 { MUA, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
624 { MUA, 0, "(a)((?(R)a|b))(?2)", "aabbabaa" },
625 { MUA, 0, "(a)((?(R2)a|b))(?2)", "aabbabaa" },
626 { MUA, 0, "(a)((?(R1)a|b))(?2)", "ababba" },
627 { MUA, 0, "(?(R0)aa|bb(?R))", "abba aabb bbaa" },
628 { MUA, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" },
629 { MUA, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" },
630
631 /* 16 bit specific tests. */
632 { CMA, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" },
633 { CMA, 0 | F_FORCECONV, "\xe1\xbd\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
634 { CMA, 0 | F_FORCECONV, "[\xc3\xa1]", "\xc3\x81\xc3\xa1" },
635 { CMA, 0 | F_FORCECONV, "[\xe1\xbd\xb8]", "\xe1\xbf\xb8\xe1\xbd\xb8" },
636 { CMA, 0 | F_FORCECONV, "[a-\xed\xb0\x80]", "A" },
637 { CMA, 0 | F_NO8 | F_FORCECONV, "[a-\\x{dc00}]", "B" },
638 { CMA, 0 | F_NO8 | F_NOMATCH | F_FORCECONV, "[b-\\x{dc00}]", "a" },
639 { CMA, 0 | F_NO8 | F_FORCECONV, "\xed\xa0\x80\\x{d800}\xed\xb0\x80\\x{dc00}", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80" },
640 { CMA, 0 | F_NO8 | F_FORCECONV, "[\xed\xa0\x80\\x{d800}]{1,2}?[\xed\xb0\x80\\x{dc00}]{1,2}?#", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80#" },
641 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80\xed\xb0\x80#]{0,3}(?<=\xed\xb0\x80.)", "\xed\xa0\x80#\xed\xa0\x80##\xed\xb0\x80\xed\xa0\x80" },
642 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\x9f\xbf\xed\xa0\x83" },
643 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\xb4\x80\xed\xb3\xb0" },
644 { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" },
645 { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" },
646 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" },
647 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80][\xed\xb0\x80]{2,}", "\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" },
648 { MA, 0 | F_FORCECONV, "[^\xed\xb0\x80]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
649 { MA, 0 | F_NO8 | F_FORCECONV, "[^\\x{dc00}]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
650 { CMA, 0 | F_FORCECONV, ".\\B.", "\xed\xa0\x80\xed\xb0\x80" },
651 { CMA, 0 | F_FORCECONV, "\\D+(?:\\d+|.)\\S+(?:\\s+|.)\\W+(?:\\w+|.)\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80" },
652 { CMA, 0 | F_FORCECONV, "\\d*\\s*\\w*\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80" },
653 { CMA, 0 | F_FORCECONV | F_NOMATCH, "\\d*?\\D*?\\s*?\\S*?\\w*?\\W*?##", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80#" },
654 { CMA | PCRE_EXTENDED, 0 | F_FORCECONV, "\xed\xa0\x80 \xed\xb0\x80 !", "\xed\xa0\x80\xed\xb0\x80!" },
655 { CMA, 0 | F_FORCECONV, "\xed\xa0\x80+#[^#]+\xed\xa0\x80", "\xed\xa0\x80#a\xed\xa0\x80" },
656 { CMA, 0 | F_FORCECONV, "(\xed\xa0\x80+)#\\1", "\xed\xa0\x80\xed\xa0\x80#\xed\xa0\x80\xed\xa0\x80" },
657 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0 | F_NO8 | F_FORCECONV, "^-", "a--\xe2\x80\xa8--" },
658 { PCRE_BSR_UNICODE, 0 | F_NO8 | F_FORCECONV, "\\R", "ab\xe2\x80\xa8" },
659 { 0, 0 | F_NO8 | F_FORCECONV, "\\v", "ab\xe2\x80\xa9" },
660 { 0, 0 | F_NO8 | F_FORCECONV, "\\h", "ab\xe1\xa0\x8e" },
661 { 0, 0 | F_NO8 | F_FORCECONV, "\\v+?\\V+?#", "\xe2\x80\xa9\xe2\x80\xa9\xef\xbf\xbf\xef\xbf\xbf#" },
662 { 0, 0 | F_NO8 | F_FORCECONV, "\\h+?\\H+?#", "\xe1\xa0\x8e\xe1\xa0\x8e\xef\xbf\xbf\xef\xbf\xbf#" },
663
664 /* Partial matching. */
665 { MUA | PCRE_PARTIAL_SOFT, 0, "ab", "a" },
666 { MUA | PCRE_PARTIAL_SOFT, 0, "ab|a", "a" },
667 { MUA | PCRE_PARTIAL_HARD, 0, "ab|a", "a" },
668 { MUA | PCRE_PARTIAL_SOFT, 0, "\\b#", "a" },
669 { MUA | PCRE_PARTIAL_SOFT, 0, "(?<=a)b", "a" },
670 { MUA | PCRE_PARTIAL_SOFT, 0, "abc|(?<=xxa)bc", "xxab" },
671 { MUA | PCRE_PARTIAL_SOFT, 0, "a\\B", "a" },
672 { MUA | PCRE_PARTIAL_HARD, 0, "a\\b", "a" },
673
674 /* (*MARK) verb. */
675 { MUA, 0, "a(*MARK:aa)a", "ababaa" },
676 { MUA, 0 | F_NOMATCH, "a(*:aa)a", "abab" },
677 { MUA, 0, "a(*:aa)(b(*:bb)b|bc)", "abc" },
678 { MUA, 0 | F_NOMATCH, "a(*:1)x|b(*:2)y", "abc" },
679 { MUA, 0, "(?>a(*:aa))b|ac", "ac" },
680 { MUA, 0, "(?(DEFINE)(a(*:aa)))(?1)", "a" },
681 { MUA, 0 | F_NOMATCH, "(?(DEFINE)((a)(*:aa)))(?1)b", "aa" },
682 { MUA, 0, "(?(DEFINE)(a(*:aa)))a(?1)b|aac", "aac" },
683 { MUA, 0, "(a(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
684 { MUA, 0, "(a(*:aa)){0}(?:b(?1)b)+", "babba" },
685 { MUA, 0 | F_NOMATCH, "(a(*:aa)){0}(?:b(?1)b)+", "ba" },
686 { MUA, 0, "(a\\K(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
687 { MUA, 0, "(a\\K(*:aa)){0}(?:b(?1)b)+", "babba" },
688 { MUA, 0 | F_NOMATCH, "(a\\K(*:aa)){0}(?:b(?1)b)+", "ba" },
689
690 /* (*COMMIT) verb. */
691 { MUA, 0 | F_NOMATCH, "a(*COMMIT)b", "ac" },
692 { MUA, 0, "aa(*COMMIT)b", "xaxaab" },
693 { MUA, 0 | F_NOMATCH, "a(*COMMIT)(*:msg)b|ac", "ac" },
694 { MUA, 0, "(?=a(*COMMIT)b|ac)ac|(*:m)(a)c", "ac" },
695 { MUA, 0, "(?!a(*COMMIT)(*:msg)b)a(c)|cd", "acd" },
696
697 /* Deep recursion. */
698 { MUA, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
699 { MUA, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
700 { MUA, 0, "((a?)+)+b", "aaaaaaaaaaaa b" },
701
702 /* Deep recursion: Stack limit reached. */
703 { MA, 0 | F_NOMATCH, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
704 { MA, 0 | F_NOMATCH, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
705 { MA, 0 | F_NOMATCH, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
706 { MA, 0 | F_NOMATCH, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
707 { MA, 0 | F_NOMATCH, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
708
709 { 0, 0, NULL, NULL }
710 };
711
712 static const unsigned char *tables(int mode)
713 {
714 /* The purpose of this function to allow valgrind
715 for reporting invalid reads and writes. */
716 static unsigned char *tables_copy;
717 const char *errorptr;
718 int erroroffset;
719 unsigned char *default_tables;
720 #if defined SUPPORT_PCRE8
721 pcre *regex;
722 char null_str[1] = { 0 };
723 #elif defined SUPPORT_PCRE16
724 pcre16 *regex;
725 PCRE_UCHAR16 null_str[1] = { 0 };
726 #elif defined SUPPORT_PCRE32
727 pcre32 *regex;
728 PCRE_UCHAR32 null_str[1] = { 0 };
729 #endif
730
731 if (mode) {
732 if (tables_copy)
733 free(tables_copy);
734 tables_copy = NULL;
735 return NULL;
736 }
737
738 if (tables_copy)
739 return tables_copy;
740
741 default_tables = NULL;
742 #if defined SUPPORT_PCRE8
743 regex = pcre_compile(null_str, 0, &errorptr, &erroroffset, NULL);
744 if (regex) {
745 pcre_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
746 pcre_free(regex);
747 }
748 #elif defined SUPPORT_PCRE16
749 regex = pcre16_compile(null_str, 0, &errorptr, &erroroffset, NULL);
750 if (regex) {
751 pcre16_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
752 pcre16_free(regex);
753 }
754 #elif defined SUPPORT_PCRE32
755 regex = pcre32_compile(null_str, 0, &errorptr, &erroroffset, NULL);
756 if (regex) {
757 pcre32_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
758 pcre32_free(regex);
759 }
760 #endif
761 /* Shouldn't ever happen. */
762 if (!default_tables)
763 return NULL;
764
765 /* Unfortunately this value cannot get from pcre_fullinfo.
766 Since this is a test program, this is acceptable at the moment. */
767 tables_copy = (unsigned char *)malloc(1088);
768 if (!tables_copy)
769 return NULL;
770
771 memcpy(tables_copy, default_tables, 1088);
772 return tables_copy;
773 }
774
775 #ifdef SUPPORT_PCRE8
776 static pcre_jit_stack* callback8(void *arg)
777 {
778 return (pcre_jit_stack *)arg;
779 }
780 #endif
781
782 #ifdef SUPPORT_PCRE16
783 static pcre16_jit_stack* callback16(void *arg)
784 {
785 return (pcre16_jit_stack *)arg;
786 }
787 #endif
788
789 #ifdef SUPPORT_PCRE32
790 static pcre32_jit_stack* callback32(void *arg)
791 {
792 return (pcre32_jit_stack *)arg;
793 }
794 #endif
795
796 #ifdef SUPPORT_PCRE8
797 static void setstack8(pcre_extra *extra)
798 {
799 static pcre_jit_stack *stack;
800
801 if (!extra) {
802 if (stack)
803 pcre_jit_stack_free(stack);
804 stack = NULL;
805 return;
806 }
807
808 if (!stack)
809 stack = pcre_jit_stack_alloc(1, 1024 * 1024);
810 /* Extra can be NULL. */
811 pcre_assign_jit_stack(extra, callback8, stack);
812 }
813 #endif /* SUPPORT_PCRE8 */
814
815 #ifdef SUPPORT_PCRE16
816 static void setstack16(pcre16_extra *extra)
817 {
818 static pcre16_jit_stack *stack;
819
820 if (!extra) {
821 if (stack)
822 pcre16_jit_stack_free(stack);
823 stack = NULL;
824 return;
825 }
826
827 if (!stack)
828 stack = pcre16_jit_stack_alloc(1, 1024 * 1024);
829 /* Extra can be NULL. */
830 pcre16_assign_jit_stack(extra, callback16, stack);
831 }
832 #endif /* SUPPORT_PCRE8 */
833
834 #ifdef SUPPORT_PCRE32
835 static void setstack32(pcre32_extra *extra)
836 {
837 static pcre32_jit_stack *stack;
838
839 if (!extra) {
840 if (stack)
841 pcre32_jit_stack_free(stack);
842 stack = NULL;
843 return;
844 }
845
846 if (!stack)
847 stack = pcre32_jit_stack_alloc(1, 1024 * 1024);
848 /* Extra can be NULL. */
849 pcre32_assign_jit_stack(extra, callback32, stack);
850 }
851 #endif /* SUPPORT_PCRE8 */
852
853 #ifdef SUPPORT_PCRE16
854
855 static int convert_utf8_to_utf16(const char *input, PCRE_UCHAR16 *output, int *offsetmap, int max_length)
856 {
857 unsigned char *iptr = (unsigned char*)input;
858 PCRE_UCHAR16 *optr = output;
859 unsigned int c;
860
861 if (max_length == 0)
862 return 0;
863
864 while (*iptr && max_length > 1) {
865 c = 0;
866 if (offsetmap)
867 *offsetmap++ = (int)(iptr - (unsigned char*)input);
868
869 if (!(*iptr & 0x80))
870 c = *iptr++;
871 else if (!(*iptr & 0x20)) {
872 c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
873 iptr += 2;
874 } else if (!(*iptr & 0x10)) {
875 c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
876 iptr += 3;
877 } else if (!(*iptr & 0x08)) {
878 c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
879 iptr += 4;
880 }
881
882 if (c < 65536) {
883 *optr++ = c;
884 max_length--;
885 } else if (max_length <= 2) {
886 *optr = '\0';
887 return (int)(optr - output);
888 } else {
889 c -= 0x10000;
890 *optr++ = 0xd800 | ((c >> 10) & 0x3ff);
891 *optr++ = 0xdc00 | (c & 0x3ff);
892 max_length -= 2;
893 if (offsetmap)
894 offsetmap++;
895 }
896 }
897 if (offsetmap)
898 *offsetmap = (int)(iptr - (unsigned char*)input);
899 *optr = '\0';
900 return (int)(optr - output);
901 }
902
903 static int copy_char8_to_char16(const char *input, PCRE_UCHAR16 *output, int max_length)
904 {
905 unsigned char *iptr = (unsigned char*)input;
906 PCRE_UCHAR16 *optr = output;
907
908 if (max_length == 0)
909 return 0;
910
911 while (*iptr && max_length > 1) {
912 *optr++ = *iptr++;
913 max_length--;
914 }
915 *optr = '\0';
916 return (int)(optr - output);
917 }
918
919 #define REGTEST_MAX_LENGTH16 4096
920 static PCRE_UCHAR16 regtest_buf16[REGTEST_MAX_LENGTH16];
921 static int regtest_offsetmap16[REGTEST_MAX_LENGTH16];
922
923 #endif /* SUPPORT_PCRE16 */
924
925 #ifdef SUPPORT_PCRE32
926
927 static int convert_utf8_to_utf32(const char *input, PCRE_UCHAR32 *output, int *offsetmap, int max_length)
928 {
929 unsigned char *iptr = (unsigned char*)input;
930 PCRE_UCHAR32 *optr = output;
931 unsigned int c;
932
933 if (max_length == 0)
934 return 0;
935
936 while (*iptr && max_length > 1) {
937 c = 0;
938 if (offsetmap)
939 *offsetmap++ = (int)(iptr - (unsigned char*)input);
940
941 if (!(*iptr & 0x80))
942 c = *iptr++;
943 else if (!(*iptr & 0x20)) {
944 c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
945 iptr += 2;
946 } else if (!(*iptr & 0x10)) {
947 c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
948 iptr += 3;
949 } else if (!(*iptr & 0x08)) {
950 c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
951 iptr += 4;
952 }
953
954 *optr++ = c;
955 max_length--;
956 }
957 if (offsetmap)
958 *offsetmap = (int)(iptr - (unsigned char*)input);
959 *optr = 0;
960 return (int)(optr - output);
961 }
962
963 static int copy_char8_to_char32(const char *input, PCRE_UCHAR32 *output, int max_length)
964 {
965 unsigned char *iptr = (unsigned char*)input;
966 PCRE_UCHAR32 *optr = output;
967
968 if (max_length == 0)
969 return 0;
970
971 while (*iptr && max_length > 1) {
972 *optr++ = *iptr++;
973 max_length--;
974 }
975 *optr = '\0';
976 return (int)(optr - output);
977 }
978
979 #define REGTEST_MAX_LENGTH32 4096
980 static PCRE_UCHAR32 regtest_buf32[REGTEST_MAX_LENGTH32];
981 static int regtest_offsetmap32[REGTEST_MAX_LENGTH32];
982
983 #endif /* SUPPORT_PCRE32 */
984
985 static int check_ascii(const char *input)
986 {
987 const unsigned char *ptr = (unsigned char *)input;
988 while (*ptr) {
989 if (*ptr > 127)
990 return 0;
991 ptr++;
992 }
993 return 1;
994 }
995
996 static int regression_tests(void)
997 {
998 struct regression_test_case *current = regression_test_cases;
999 const char *error;
1000 char *cpu_info;
1001 int i, err_offs;
1002 int is_successful, is_ascii_pattern, is_ascii_input;
1003 int total = 0;
1004 int successful = 0;
1005 int successful_row = 0;
1006 int counter = 0;
1007 int study_mode;
1008 int utf = 0, ucp = 0;
1009 int disabled_flags = 0;
1010 #ifdef SUPPORT_PCRE8
1011 pcre *re8;
1012 pcre_extra *extra8;
1013 pcre_extra dummy_extra8;
1014 int ovector8_1[32];
1015 int ovector8_2[32];
1016 int return_value8[2];
1017 unsigned char *mark8_1, *mark8_2;
1018 #endif
1019 #ifdef SUPPORT_PCRE16
1020 pcre16 *re16;
1021 pcre16_extra *extra16;
1022 pcre16_extra dummy_extra16;
1023 int ovector16_1[32];
1024 int ovector16_2[32];
1025 int return_value16[2];
1026 PCRE_UCHAR16 *mark16_1, *mark16_2;
1027 int length16;
1028 #endif
1029 #ifdef SUPPORT_PCRE32
1030 pcre32 *re32;
1031 pcre32_extra *extra32;
1032 pcre32_extra dummy_extra32;
1033 int ovector32_1[32];
1034 int ovector32_2[32];
1035 int return_value32[2];
1036 PCRE_UCHAR32 *mark32_1, *mark32_2;
1037 int length32;
1038 #endif
1039
1040 /* This test compares the behaviour of interpreter and JIT. Although disabling
1041 utf or ucp may make tests fail, if the pcre_exec result is the SAME, it is
1042 still considered successful from pcre_jit_test point of view. */
1043
1044 #if defined SUPPORT_PCRE8
1045 pcre_config(PCRE_CONFIG_JITTARGET, &cpu_info);
1046 #elif defined SUPPORT_PCRE16
1047 pcre16_config(PCRE_CONFIG_JITTARGET, &cpu_info);
1048 #elif defined SUPPORT_PCRE32
1049 pcre32_config(PCRE_CONFIG_JITTARGET, &cpu_info);
1050 #endif
1051
1052 printf("Running JIT regression tests\n");
1053 printf(" target CPU of SLJIT compiler: %s\n", cpu_info);
1054
1055 #if defined SUPPORT_PCRE8
1056 pcre_config(PCRE_CONFIG_UTF8, &utf);
1057 pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
1058 #elif defined SUPPORT_PCRE16
1059 pcre16_config(PCRE_CONFIG_UTF16, &utf);
1060 pcre16_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
1061 #elif defined SUPPORT_PCRE16
1062 pcre32_config(PCRE_CONFIG_UTF32, &utf);
1063 pcre32_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
1064 #endif
1065
1066 if (!utf)
1067 disabled_flags |= PCRE_UTF8 | PCRE_UTF16 | PCRE_UTF32;
1068 if (!ucp)
1069 disabled_flags |= PCRE_UCP;
1070 #ifdef SUPPORT_PCRE8
1071 printf(" in 8 bit mode with UTF-8 %s and ucp %s:\n", utf ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
1072 #endif
1073 #ifdef SUPPORT_PCRE16
1074 printf(" in 16 bit mode with UTF-16 %s and ucp %s:\n", utf ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
1075 #endif
1076 #ifdef SUPPORT_PCRE32
1077 printf(" in 32 bit mode with UTF-32 %s and ucp %s:\n", utf ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
1078 #endif
1079
1080 while (current->pattern) {
1081 /* printf("\nPattern: %s :\n", current->pattern); */
1082 total++;
1083 if (current->start_offset & F_PROPERTY) {
1084 is_ascii_pattern = 0;
1085 is_ascii_input = 0;
1086 } else {
1087 is_ascii_pattern = check_ascii(current->pattern);
1088 is_ascii_input = check_ascii(current->input);
1089 }
1090
1091 if (current->flags & PCRE_PARTIAL_SOFT)
1092 study_mode = PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE;
1093 else if (current->flags & PCRE_PARTIAL_HARD)
1094 study_mode = PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE;
1095 else
1096 study_mode = PCRE_STUDY_JIT_COMPILE;
1097 error = NULL;
1098 #ifdef SUPPORT_PCRE8
1099 re8 = NULL;
1100 if (!(current->start_offset & F_NO8))
1101 re8 = pcre_compile(current->pattern,
1102 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags),
1103 &error, &err_offs, tables(0));
1104
1105 extra8 = NULL;
1106 if (re8) {
1107 error = NULL;
1108 extra8 = pcre_study(re8, study_mode, &error);
1109 if (!extra8) {
1110 printf("\n8 bit: Cannot study pattern: %s\n", current->pattern);
1111 pcre_free(re8);
1112 re8 = NULL;
1113 }
1114 else if (!(extra8->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
1115 printf("\n8 bit: JIT compiler does not support: %s\n", current->pattern);
1116 pcre_free_study(extra8);
1117 pcre_free(re8);
1118 re8 = NULL;
1119 }
1120 extra8->flags |= PCRE_EXTRA_MARK;
1121 } else if (((utf && ucp) || is_ascii_pattern) && !(current->start_offset & F_NO8))
1122 printf("\n8 bit: Cannot compile pattern \"%s\": %s\n", current->pattern, error);
1123 #endif
1124 #ifdef SUPPORT_PCRE16
1125 if ((current->flags & PCRE_UTF16) || (current->start_offset & F_FORCECONV))
1126 convert_utf8_to_utf16(current->pattern, regtest_buf16, NULL, REGTEST_MAX_LENGTH16);
1127 else
1128 copy_char8_to_char16(current->pattern, regtest_buf16, REGTEST_MAX_LENGTH16);
1129
1130 re16 = NULL;
1131 if (!(current->start_offset & F_NO16))
1132 re16 = pcre16_compile(regtest_buf16,
1133 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags),
1134 &error, &err_offs, tables(0));
1135
1136 extra16 = NULL;
1137 if (re16) {
1138 error = NULL;
1139 extra16 = pcre16_study(re16, study_mode, &error);
1140 if (!extra16) {
1141 printf("\n16 bit: Cannot study pattern: %s\n", current->pattern);
1142 pcre16_free(re16);
1143 re16 = NULL;
1144 }
1145 else if (!(extra16->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
1146 printf("\n16 bit: JIT compiler does not support: %s\n", current->pattern);
1147 pcre16_free_study(extra16);
1148 pcre16_free(re16);
1149 re16 = NULL;
1150 }
1151 extra16->flags |= PCRE_EXTRA_MARK;
1152 } else if (((utf && ucp) || is_ascii_pattern) && !(current->start_offset & F_NO16))
1153 printf("\n16 bit: Cannot compile pattern \"%s\": %s\n", current->pattern, error);
1154 #endif
1155 #ifdef SUPPORT_PCRE32
1156 if ((current->flags & PCRE_UTF32) || (current->start_offset & F_FORCECONV))
1157 convert_utf8_to_utf32(current->pattern, regtest_buf32, NULL, REGTEST_MAX_LENGTH32);
1158 else
1159 copy_char8_to_char32(current->pattern, regtest_buf32, REGTEST_MAX_LENGTH32);
1160
1161 re32 = NULL;
1162 if (!(current->start_offset & F_NO32))
1163 re32 = pcre32_compile(regtest_buf32,
1164 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags),
1165 &error, &err_offs, tables(0));
1166
1167 extra32 = NULL;
1168 if (re32) {
1169 error = NULL;
1170 extra32 = pcre32_study(re32, study_mode, &error);
1171 if (!extra32) {
1172 printf("\n32 bit: Cannot study pattern: %s\n", current->pattern);
1173 pcre32_free(re32);
1174 re32 = NULL;
1175 }
1176 if (!(extra32->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
1177 printf("\n32 bit: JIT compiler does not support: %s\n", current->pattern);
1178 pcre32_free_study(extra32);
1179 pcre32_free(re32);
1180 re32 = NULL;
1181 }
1182 extra32->flags |= PCRE_EXTRA_MARK;
1183 } else if (((utf && ucp) || is_ascii_pattern) && !(current->start_offset & F_NO32))
1184 printf("\n32 bit: Cannot compile pattern \"%s\": %s\n", current->pattern, error);
1185 #endif
1186
1187 counter++;
1188 if ((counter & 0x3) != 0) {
1189 #ifdef SUPPORT_PCRE8
1190 setstack8(NULL);
1191 #endif
1192 #ifdef SUPPORT_PCRE16
1193 setstack16(NULL);
1194 #endif
1195 #ifdef SUPPORT_PCRE32
1196 setstack32(NULL);
1197 #endif
1198 }
1199
1200 #ifdef SUPPORT_PCRE8
1201 return_value8[0] = -1000;
1202 return_value8[1] = -1000;
1203 for (i = 0; i < 32; ++i)
1204 ovector8_1[i] = -2;
1205 for (i = 0; i < 32; ++i)
1206 ovector8_2[i] = -2;
1207 if (re8) {
1208 mark8_1 = NULL;
1209 mark8_2 = NULL;
1210 setstack8(extra8);
1211 extra8->mark = &mark8_1;
1212 return_value8[0] = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1213 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_1, 32);
1214 memset(&dummy_extra8, 0, sizeof(pcre_extra));
1215 dummy_extra8.flags = PCRE_EXTRA_MARK;
1216 dummy_extra8.mark = &mark8_2;
1217 return_value8[1] = pcre_exec(re8, &dummy_extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1218 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_2, 32);
1219 }
1220 #endif
1221
1222 #ifdef SUPPORT_PCRE16
1223 return_value16[0] = -1000;
1224 return_value16[1] = -1000;
1225 for (i = 0; i < 32; ++i)
1226 ovector16_1[i] = -2;
1227 for (i = 0; i < 32; ++i)
1228 ovector16_2[i] = -2;
1229 if (re16) {
1230 mark16_1 = NULL;
1231 mark16_2 = NULL;
1232 setstack16(extra16);
1233 if ((current->flags & PCRE_UTF16) || (current->start_offset & F_FORCECONV))
1234 length16 = convert_utf8_to_utf16(current->input, regtest_buf16, regtest_offsetmap16, REGTEST_MAX_LENGTH16);
1235 else
1236 length16 = copy_char8_to_char16(current->input, regtest_buf16, REGTEST_MAX_LENGTH16);
1237 extra16->mark = &mark16_1;
1238 return_value16[0] = pcre16_exec(re16, extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
1239 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_1, 32);
1240 memset(&dummy_extra16, 0, sizeof(pcre16_extra));
1241 dummy_extra16.flags = PCRE_EXTRA_MARK;
1242 dummy_extra16.mark = &mark16_2;
1243 return_value16[1] = pcre16_exec(re16, &dummy_extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
1244 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_2, 32);
1245 }
1246 #endif
1247
1248 #ifdef SUPPORT_PCRE32
1249 return_value32[0] = -1000;
1250 return_value32[1] = -1000;
1251 for (i = 0; i < 32; ++i)
1252 ovector32_1[i] = -2;
1253 for (i = 0; i < 32; ++i)
1254 ovector32_2[i] = -2;
1255 if (re32) {
1256 mark32_1 = NULL;
1257 mark32_2 = NULL;
1258 setstack32(extra32);
1259 if ((current->flags & PCRE_UTF32) || (current->start_offset & F_FORCECONV))
1260 length32 = convert_utf8_to_utf32(current->input, regtest_buf32, regtest_offsetmap32, REGTEST_MAX_LENGTH32);
1261 else
1262 length32 = copy_char8_to_char32(current->input, regtest_buf32, REGTEST_MAX_LENGTH32);
1263 extra32->mark = &mark32_1;
1264 return_value32[0] = pcre32_exec(re32, extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
1265 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector32_1, 32);
1266 memset(&dummy_extra32, 0, sizeof(pcre32_extra));
1267 dummy_extra32.flags = PCRE_EXTRA_MARK;
1268 dummy_extra32.mark = &mark32_2;
1269 return_value32[1] = pcre32_exec(re32, &dummy_extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
1270 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector32_2, 32);
1271 }
1272 #endif
1273
1274 /* printf("[%d-%d-%d|%d-%d|%d-%d|%d-%d]%s",
1275 * return_value8[0], return_value16[0],
1276 * ovector8_1[0], ovector8_1[1],
1277 * ovector16_1[0], ovector16_1[1],
1278 * ovector32_1[0], ovector32_1[1],
1279 * (current->flags & PCRE_CASELESS) ? "C" : ""); */
1280
1281 /* If F_DIFF is set, just run the test, but do not compare the results.
1282 Segfaults can still be captured. */
1283
1284 is_successful = 1;
1285 if (!(current->start_offset & F_DIFF)) {
1286 #if defined SUPPORT_UTF && ((defined(SUPPORT_PCRE8) + defined(SUPPORT_PCRE16) + defined(SUPPORT_PCRE32)) >= 2)
1287 if (!(current->start_offset & F_FORCECONV)) {
1288 int return_value;
1289
1290 /* All results must be the same. */
1291 #ifdef SUPPORT_PCRE8
1292 if ((return_value = return_value8[0]) != return_value8[1]) {
1293 printf("\n8 bit: Return value differs(J8:%d,I8:%d): [%d] '%s' @ '%s'\n",
1294 return_value8[0], return_value8[1], total, current->pattern, current->input);
1295 is_successful = 0;
1296 } else
1297 #endif
1298 #ifdef SUPPORT_PCRE16
1299 if ((return_value = return_value16[0]) != return_value16[1]) {
1300 printf("\n16 bit: Return value differs(J16:%d,I16:%d): [%d] '%s' @ '%s'\n",
1301 return_value16[0], return_value16[1], total, current->pattern, current->input);
1302 is_successful = 0;
1303 } else
1304 #endif
1305 #ifdef SUPPORT_PCRE32
1306 if ((return_value = return_value32[0]) != return_value32[1]) {
1307 printf("\n32 bit: Return value differs(J32:%d,I32:%d): [%d] '%s' @ '%s'\n",
1308 return_value32[0], return_value32[1], total, current->pattern, current->input);
1309 is_successful = 0;
1310 } else
1311 #endif
1312 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1313 if (return_value8[0] != return_value16[0]) {
1314 printf("\n8 and 16 bit: Return value differs(J8:%d,J16:%d): [%d] '%s' @ '%s'\n",
1315 return_value8[0], return_value16[0],
1316 total, current->pattern, current->input);
1317 is_successful = 0;
1318 } else
1319 #endif
1320 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE32
1321 if (return_value8[0] != return_value32[0]) {
1322 printf("\n8 and 32 bit: Return value differs(J8:%d,J32:%d): [%d] '%s' @ '%s'\n",
1323 return_value8[0], return_value32[0],
1324 total, current->pattern, current->input);
1325 is_successful = 0;
1326 } else
1327 #endif
1328 #if defined SUPPORT_PCRE16 && defined SUPPORT_PCRE32
1329 if (return_value16[0] != return_value32[0]) {
1330 printf("\n16 and 32 bit: Return value differs(J16:%d,J32:%d): [%d] '%s' @ '%s'\n",
1331 return_value16[0], return_value32[0],
1332 total, current->pattern, current->input);
1333 is_successful = 0;
1334 } else
1335 #endif
1336 if (return_value >= 0 || return_value == PCRE_ERROR_PARTIAL) {
1337 if (return_value == PCRE_ERROR_PARTIAL) {
1338 return_value = 2;
1339 } else {
1340 return_value *= 2;
1341 }
1342 #ifdef SUPPORT_PCRE8
1343 return_value8[0] = return_value;
1344 #endif
1345 #ifdef SUPPORT_PCRE16
1346 return_value16[0] = return_value;
1347 #endif
1348 #ifdef SUPPORT_PCRE32
1349 return_value32[0] = return_value;
1350 #endif
1351 /* Transform back the results. */
1352 if (current->flags & PCRE_UTF8) {
1353 #ifdef SUPPORT_PCRE16
1354 for (i = 0; i < return_value; ++i) {
1355 if (ovector16_1[i] >= 0)
1356 ovector16_1[i] = regtest_offsetmap16[ovector16_1[i]];
1357 if (ovector16_2[i] >= 0)
1358 ovector16_2[i] = regtest_offsetmap16[ovector16_2[i]];
1359 }
1360 #endif
1361 #ifdef SUPPORT_PCRE32
1362 for (i = 0; i < return_value; ++i) {
1363 if (ovector32_1[i] >= 0)
1364 ovector32_1[i] = regtest_offsetmap32[ovector32_1[i]];
1365 if (ovector32_2[i] >= 0)
1366 ovector32_2[i] = regtest_offsetmap32[ovector32_2[i]];
1367 }
1368 #endif
1369 }
1370
1371 for (i = 0; i < return_value; ++i) {
1372 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1373 if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
1374 printf("\n8 and 16 bit: Ovector[%d] value differs(J8:%d,I8:%d,J16:%d,I16:%d): [%d] '%s' @ '%s' \n",
1375 i, ovector8_1[i], ovector8_2[i], ovector16_1[i], ovector16_2[i],
1376 total, current->pattern, current->input);
1377 is_successful = 0;
1378 }
1379 #endif
1380 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE32
1381 if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector32_1[i] || ovector8_1[i] != ovector32_2[i]) {
1382 printf("\n8 and 32 bit: Ovector[%d] value differs(J8:%d,I8:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
1383 i, ovector8_1[i], ovector8_2[i], ovector32_1[i], ovector32_2[i],
1384 total, current->pattern, current->input);
1385 is_successful = 0;
1386 }
1387 #endif
1388 #if defined SUPPORT_PCRE16 && defined SUPPORT_PCRE16
1389 if (ovector16_1[i] != ovector16_2[i] || ovector16_1[i] != ovector16_1[i] || ovector16_1[i] != ovector16_2[i]) {
1390 printf("\n16 and 16 bit: Ovector[%d] value differs(J16:%d,I16:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
1391 i, ovector16_1[i], ovector16_2[i], ovector16_1[i], ovector16_2[i],
1392 total, current->pattern, current->input);
1393 is_successful = 0;
1394 }
1395 #endif
1396 }
1397 }
1398 } else
1399 #endif /* more than one of SUPPORT_PCRE8, SUPPORT_PCRE16 and SUPPORT_PCRE32 */
1400 {
1401 /* Only the 8 bit and 16 bit results must be equal. */
1402 #ifdef SUPPORT_PCRE8
1403 if (return_value8[0] != return_value8[1]) {
1404 printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1405 return_value8[0], return_value8[1], total, current->pattern, current->input);
1406 is_successful = 0;
1407 } else if (return_value8[0] >= 0 || return_value8[0] == PCRE_ERROR_PARTIAL) {
1408 if (return_value8[0] == PCRE_ERROR_PARTIAL)
1409 return_value8[0] = 2;
1410 else
1411 return_value8[0] *= 2;
1412
1413 for (i = 0; i < return_value8[0]; ++i)
1414 if (ovector8_1[i] != ovector8_2[i]) {
1415 printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1416 i, ovector8_1[i], ovector8_2[i], total, current->pattern, current->input);
1417 is_successful = 0;
1418 }
1419 }
1420 #endif
1421
1422 #ifdef SUPPORT_PCRE16
1423 if (return_value16[0] != return_value16[1]) {
1424 printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1425 return_value16[0], return_value16[1], total, current->pattern, current->input);
1426 is_successful = 0;
1427 } else if (return_value16[0] >= 0 || return_value16[0] == PCRE_ERROR_PARTIAL) {
1428 if (return_value16[0] == PCRE_ERROR_PARTIAL)
1429 return_value16[0] = 2;
1430 else
1431 return_value16[0] *= 2;
1432
1433 for (i = 0; i < return_value16[0]; ++i)
1434 if (ovector16_1[i] != ovector16_2[i]) {
1435 printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1436 i, ovector16_1[i], ovector16_2[i], total, current->pattern, current->input);
1437 is_successful = 0;
1438 }
1439 }
1440 #endif
1441
1442 #ifdef SUPPORT_PCRE32
1443 if (return_value32[0] != return_value32[1]) {
1444 printf("\n32 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1445 return_value32[0], return_value32[1], total, current->pattern, current->input);
1446 is_successful = 0;
1447 } else if (return_value32[0] >= 0 || return_value32[0] == PCRE_ERROR_PARTIAL) {
1448 if (return_value32[0] == PCRE_ERROR_PARTIAL)
1449 return_value32[0] = 2;
1450 else
1451 return_value32[0] *= 2;
1452
1453 for (i = 0; i < return_value32[0]; ++i)
1454 if (ovector32_1[i] != ovector32_2[i]) {
1455 printf("\n32 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1456 i, ovector32_1[i], ovector32_2[i], total, current->pattern, current->input);
1457 is_successful = 0;
1458 }
1459 }
1460 #endif
1461 }
1462 }
1463
1464 if (is_successful) {
1465 #ifdef SUPPORT_PCRE8
1466 if (!(current->start_offset & F_NO8) && ((utf && ucp) || is_ascii_input)) {
1467 if (return_value8[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1468 printf("8 bit: Test should match: [%d] '%s' @ '%s'\n",
1469 total, current->pattern, current->input);
1470 is_successful = 0;
1471 }
1472
1473 if (return_value8[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1474 printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n",
1475 total, current->pattern, current->input);
1476 is_successful = 0;
1477 }
1478 }
1479 #endif
1480 #ifdef SUPPORT_PCRE16
1481 if (!(current->start_offset & F_NO16) && ((utf && ucp) || is_ascii_input)) {
1482 if (return_value16[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1483 printf("16 bit: Test should match: [%d] '%s' @ '%s'\n",
1484 total, current->pattern, current->input);
1485 is_successful = 0;
1486 }
1487
1488 if (return_value16[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1489 printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n",
1490 total, current->pattern, current->input);
1491 is_successful = 0;
1492 }
1493 }
1494 #endif
1495 #ifdef SUPPORT_PCRE32
1496 if (!(current->start_offset & F_NO32) && ((utf && ucp) || is_ascii_input)) {
1497 if (return_value32[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1498 printf("32 bit: Test should match: [%d] '%s' @ '%s'\n",
1499 total, current->pattern, current->input);
1500 is_successful = 0;
1501 }
1502
1503 if (return_value32[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1504 printf("32 bit: Test should not match: [%d] '%s' @ '%s'\n",
1505 total, current->pattern, current->input);
1506 is_successful = 0;
1507 }
1508 }
1509 #endif
1510 }
1511
1512 if (is_successful) {
1513 #ifdef SUPPORT_PCRE8
1514 if (mark8_1 != mark8_2) {
1515 printf("8 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1516 total, current->pattern, current->input);
1517 is_successful = 0;
1518 }
1519 #endif
1520 #ifdef SUPPORT_PCRE16
1521 if (mark16_1 != mark16_2) {
1522 printf("16 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1523 total, current->pattern, current->input);
1524 is_successful = 0;
1525 }
1526 #endif
1527 #ifdef SUPPORT_PCRE32
1528 if (mark32_1 != mark32_2) {
1529 printf("32 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1530 total, current->pattern, current->input);
1531 is_successful = 0;
1532 }
1533 #endif
1534 }
1535
1536 #ifdef SUPPORT_PCRE8
1537 if (re8) {
1538 pcre_free_study(extra8);
1539 pcre_free(re8);
1540 }
1541 #endif
1542 #ifdef SUPPORT_PCRE16
1543 if (re16) {
1544 pcre16_free_study(extra16);
1545 pcre16_free(re16);
1546 }
1547 #endif
1548 #ifdef SUPPORT_PCRE32
1549 if (re32) {
1550 pcre32_free_study(extra32);
1551 pcre32_free(re32);
1552 }
1553 #endif
1554
1555 if (is_successful) {
1556 successful++;
1557 successful_row++;
1558 printf(".");
1559 if (successful_row >= 60) {
1560 successful_row = 0;
1561 printf("\n");
1562 }
1563 } else
1564 successful_row = 0;
1565
1566 fflush(stdout);
1567 current++;
1568 }
1569 tables(1);
1570 #ifdef SUPPORT_PCRE8
1571 setstack8(NULL);
1572 #endif
1573 #ifdef SUPPORT_PCRE16
1574 setstack16(NULL);
1575 #endif
1576 #ifdef SUPPORT_PCRE32
1577 setstack32(NULL);
1578 #endif
1579
1580 if (total == successful) {
1581 printf("\nAll JIT regression tests are successfully passed.\n");
1582 return 0;
1583 } else {
1584 printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
1585 return 1;
1586 }
1587 }
1588
1589 /* End of pcre_jit_test.c */

  ViewVC Help
Powered by ViewVC 1.1.5