/[pcre]/code/trunk/pcre_jit_test.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_test.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1275 - (show annotations)
Sun Mar 10 05:32:10 2013 UTC (6 years, 6 months ago) by zherczeg
File MIME type: text/plain
File size: 66311 byte(s)
Experimental support of (*SKIP) backtracking verb in the JIT compiler.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Main Library written by Philip Hazel
9 Copyright (c) 1997-2012 University of Cambridge
10
11 This JIT compiler regression test program was written by Zoltan Herczeg
12 Copyright (c) 2010-2012
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include <stdio.h>
48 #include <string.h>
49 #include "pcre.h"
50
51
52 #include "pcre_internal.h"
53
54 #define PCRE_BUG 0x80000000
55
56 /*
57 Letter characters:
58 \xe6\x92\xad = 0x64ad = 25773 (kanji)
59 Non-letter characters:
60 \xc2\xa1 = 0xa1 = (Inverted Exclamation Mark)
61 \xf3\xa9\xb7\x80 = 0xe9dc0 = 957888
62 \xed\xa0\x80 = 55296 = 0xd800 (Invalid UTF character)
63 \xed\xb0\x80 = 56320 = 0xdc00 (Invalid UTF character)
64 Newlines:
65 \xc2\x85 = 0x85 = 133 (NExt Line = NEL)
66 \xe2\x80\xa8 = 0x2028 = 8232 (Line Separator)
67 Othercase pairs:
68 \xc3\xa9 = 0xe9 = 233 (e')
69 \xc3\x89 = 0xc9 = 201 (E')
70 \xc3\xa1 = 0xe1 = 225 (a')
71 \xc3\x81 = 0xc1 = 193 (A')
72 \xc8\xba = 0x23a = 570
73 \xe2\xb1\xa5 = 0x2c65 = 11365
74 \xe1\xbd\xb8 = 0x1f78 = 8056
75 \xe1\xbf\xb8 = 0x1ff8 = 8184
76 \xf0\x90\x90\x80 = 0x10400 = 66560
77 \xf0\x90\x90\xa8 = 0x10428 = 66600
78 Mark property:
79 \xcc\x8d = 0x30d = 781
80 Special:
81 \xdf\xbf = 0x7ff = 2047 (highest 2 byte character)
82 \xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character)
83 \xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character)
84 \xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character)
85 \xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character)
86 */
87
88 static int regression_tests(void);
89
90 int main(void)
91 {
92 int jit = 0;
93 #if defined SUPPORT_PCRE8
94 pcre_config(PCRE_CONFIG_JIT, &jit);
95 #elif defined SUPPORT_PCRE16
96 pcre16_config(PCRE_CONFIG_JIT, &jit);
97 #elif defined SUPPORT_PCRE32
98 pcre32_config(PCRE_CONFIG_JIT, &jit);
99 #endif
100 if (!jit) {
101 printf("JIT must be enabled to run pcre_jit_test\n");
102 return 1;
103 }
104 return regression_tests();
105 }
106
107 /* --------------------------------------------------------------------------------------- */
108
109 #if !(defined SUPPORT_PCRE8) && !(defined SUPPORT_PCRE16) && !(defined SUPPORT_PCRE32)
110 #error SUPPORT_PCRE8 or SUPPORT_PCRE16 or SUPPORT_PCRE32 must be defined
111 #endif
112
113 #define MUA (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
114 #define MUAP (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
115 #define CMUA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
116 #define CMUAP (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
117 #define MA (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
118 #define MAP (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
119 #define CMA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
120
121 #define OFFSET_MASK 0x00ffff
122 #define F_NO8 0x010000
123 #define F_NO16 0x020000
124 #define F_NO32 0x020000
125 #define F_NOMATCH 0x040000
126 #define F_DIFF 0x080000
127 #define F_FORCECONV 0x100000
128 #define F_PROPERTY 0x200000
129 #define F_STUDY 0x400000
130
131 struct regression_test_case {
132 int flags;
133 int start_offset;
134 const char *pattern;
135 const char *input;
136 };
137
138 static struct regression_test_case regression_test_cases[] = {
139 /* Constant strings. */
140 { MUA, 0, "AbC", "AbAbC" },
141 { MUA, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
142 { CMUA, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
143 { MA, 0, "[^a]", "aAbB" },
144 { CMA, 0, "[^m]", "mMnN" },
145 { MA, 0, "a[^b][^#]", "abacd" },
146 { CMA, 0, "A[^B][^E]", "abacd" },
147 { CMUA, 0, "[^x][^#]", "XxBll" },
148 { MUA, 0, "[^a]", "aaa\xc3\xa1#Ab" },
149 { CMUA, 0, "[^A]", "aA\xe6\x92\xad" },
150 { MUA, 0, "\\W(\\W)?\\w", "\r\n+bc" },
151 { MUA, 0, "\\W(\\W)?\\w", "\n\r+bc" },
152 { MUA, 0, "\\W(\\W)?\\w", "\r\r+bc" },
153 { MUA, 0, "\\W(\\W)?\\w", "\n\n+bc" },
154 { MUA, 0, "[axd]", "sAXd" },
155 { CMUA, 0, "[axd]", "sAXd" },
156 { CMUA, 0 | F_NOMATCH, "[^axd]", "DxA" },
157 { MUA, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
158 { MUA, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
159 { CMUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
160 { MUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
161 { MUA, 0, "[^a]", "\xc2\x80[]" },
162 { CMUA, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
163 { CMA, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
164 { PCRE_CASELESS, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
165 { PCRE_CASELESS, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
166 { PCRE_CASELESS, 0, "a1", "Aa1" },
167 { MA, 0, "\\Ca", "cda" },
168 { CMA, 0, "\\Ca", "CDA" },
169 { MA, 0 | F_NOMATCH, "\\Cx", "cda" },
170 { CMA, 0 | F_NOMATCH, "\\Cx", "CDA" },
171 { CMUAP, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
172 { CMUAP, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
173 { CMUAP, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
174 { CMUAP, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
175
176 /* Assertions. */
177 { MUA, 0, "\\b[^A]", "A_B#" },
178 { MA, 0 | F_NOMATCH, "\\b\\W", "\n*" },
179 { MUA, 0, "\\B[^,]\\b[^s]\\b", "#X" },
180 { MAP, 0, "\\B", "_\xa1" },
181 { MAP, 0, "\\b_\\b[,A]\\B", "_," },
182 { MUAP, 0, "\\b", "\xe6\x92\xad!" },
183 { MUAP, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
184 { MUAP, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
185 { MUAP, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
186 { MUA, 0 | F_NOMATCH, "\\b.", "\xcd\xbe" },
187 { CMUAP, 0, "\\By", "\xf0\x90\x90\xa8y" },
188 { MA, 0 | F_NOMATCH, "\\R^", "\n" },
189 { MA, 1 | F_NOMATCH, "^", "\n" },
190 { 0, 0, "^ab", "ab" },
191 { 0, 0 | F_NOMATCH, "^ab", "aab" },
192 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "^a", "\r\raa\n\naa\r\naa" },
193 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
194 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^-", "a--b--\x85--" },
195 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xe2\x80\xa8--" },
196 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xc2\x85--" },
197 { 0, 0, "ab$", "ab" },
198 { 0, 0 | F_NOMATCH, "ab$", "abab\n\n" },
199 { PCRE_DOLLAR_ENDONLY, 0 | F_NOMATCH, "ab$", "abab\r\n" },
200 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "a$", "\r\raa\n\naa\r\naa" },
201 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aaa" },
202 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "#$", "#\xc2\x85###\r#" },
203 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "#$", "#\xe2\x80\xa9" },
204 { PCRE_NOTBOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "^a", "aa\naa" },
205 { PCRE_NOTBOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },
206 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\naa" },
207 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\r\n" },
208 { PCRE_UTF8 | PCRE_DOLLAR_ENDONLY | PCRE_NEWLINE_ANY, 0 | F_PROPERTY, "\\p{Any}{2,}$", "aa\r\n" },
209 { PCRE_NOTEOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },
210 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa" },
211 { PCRE_NEWLINE_CR | PCRE_UTF8, 0, "a\\Z", "aaa\r" },
212 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa\n" },
213 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r" },
214 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
215 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r\n" },
216 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
217 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
218 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
219 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
220 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
221 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
222 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
223 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
224 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
225 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xc2\x85" },
226 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
227 { MA, 0, "\\Aa", "aaa" },
228 { MA, 1 | F_NOMATCH, "\\Aa", "aaa" },
229 { MA, 1, "\\Ga", "aaa" },
230 { MA, 1 | F_NOMATCH, "\\Ga", "aba" },
231 { MA, 0, "a\\z", "aaa" },
232 { MA, 0 | F_NOMATCH, "a\\z", "aab" },
233
234 /* Brackets. */
235 { MUA, 0, "(ab|bb|cd)", "bacde" },
236 { MUA, 0, "(?:ab|a)(bc|c)", "ababc" },
237 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
238 { CMUA, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
239 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
240 { MUA, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
241
242 /* Greedy and non-greedy ? operators. */
243 { MUA, 0, "(?:a)?a", "laab" },
244 { CMUA, 0, "(A)?A", "llaab" },
245 { MUA, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
246 { MUA, 0, "(a)?a", "manm" },
247 { CMUA, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
248 { MUA, 0, "(a|b)?\?d((?:e)?)", "abcde" },
249 { MUA, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
250
251 /* Greedy and non-greedy + operators */
252 { MUA, 0, "(aa)+aa", "aaaaaaa" },
253 { MUA, 0, "(aa)+?aa", "aaaaaaa" },
254 { MUA, 0, "(?:aba|ab|a)+l", "ababamababal" },
255 { MUA, 0, "(?:aba|ab|a)+?l", "ababamababal" },
256 { MUA, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
257 { MUA, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
258 { MUA, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
259
260 /* Greedy and non-greedy * operators */
261 { CMUA, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
262 { MUA, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
263 { MUA, 0, "(aa|ab)*ab", "aaabaaab" },
264 { CMUA, 0, "(aa|Ab)*?aB", "aaabaaab" },
265 { MUA, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
266 { MUA, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
267 { MA, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
268 { MA, 0, "((?:a|)*){0}a", "a" },
269
270 /* Combining ? + * operators */
271 { MUA, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
272 { MUA, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
273 { MUA, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
274 { MUA, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
275 { MUA, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
276
277 /* Single character iterators. */
278 { MUA, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
279 { MUA, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
280 { MUA, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
281 { MUA, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
282 { MUA, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
283 { MUA, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
284 { MUA, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
285 { MUA, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
286 { MUA, 0, "(ba{2})+c", "baabaaabacbaabaac" },
287 { MUA, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
288 { MUA, 0, "(a?+[^b])+", "babaacacb" },
289 { MUA, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
290 { CMUA, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
291 { CMUA, 0, "[c-f]+k", "DemmFke" },
292 { MUA, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
293 { MUA, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
294 { CMUA, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
295 { CMUA, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
296 { CMUA, 0, "[ace]{3,}", "AcbDAcEEcEd" },
297 { CMUA, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
298 { MUA, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
299 { CMUA, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
300 { MUA, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
301 { MUA, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
302 { MUA, 0, "\\b\\w+\\B", "x,a_cd" },
303 { MUAP, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
304 { CMUA, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
305 { CMUAP, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" },
306 { CMUA, 0, "[^\xf0\x90\x90\x80]{2,4}@", "\xf0\x90\x90\xa8\xf0\x90\x90\x80###\xf0\x90\x90\x80@@@" },
307 { CMUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
308 { MUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
309 { MUA, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" },
310
311 /* Basic character sets. */
312 { MUA, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
313 { MUA, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
314 { MUA, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
315 { MUA, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
316 { MUA, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
317 { MUA, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
318
319 /* Unicode properties. */
320 { MUAP, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
321 { MUAP, 0 | F_PROPERTY, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
322 { MUAP, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
323 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}]", "abc" },
324 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}]", "abc" },
325 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
326 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
327 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
328 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
329 { MUAP, 0 | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
330 { MUAP, 0 | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
331 { MUAP, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
332 { CMUAP, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
333 { MUAP, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
334 { MUAP, 0 | F_PROPERTY, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
335 { MUA, 0 | F_PROPERTY, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
336 { CMUAP, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
337 { MUAP, 0 | F_PROPERTY, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
338 { MUAP, 0 | F_PROPERTY, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
339 { PCRE_UCP, 0, "[a-b\\s]{2,5}[^a]", "AB baaa" },
340
341 /* Possible empty brackets. */
342 { MUA, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
343 { MUA, 0, "(|ab||bc|a)+d", "abcxabcabd" },
344 { MUA, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
345 { MUA, 0, "(|ab||bc|a)*d", "abcxabcabd" },
346 { MUA, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
347 { MUA, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
348 { MUA, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
349 { MUA, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
350 { MUA, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
351 { MUA, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
352
353 /* Start offset. */
354 { MUA, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
355 { MUA, 4 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
356 { MUA, 2 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
357 { MUA, 1, "(\\w\\W\\w)+", "ab#d" },
358
359 /* Newline. */
360 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
361 { PCRE_MULTILINE | PCRE_NEWLINE_CR, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
362 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{1,3}[^#]", "\r\n##...." },
363
364 /* Any character except newline or any newline. */
365 { PCRE_NEWLINE_CRLF, 0, ".", "\r" },
366 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
367 { PCRE_NEWLINE_ANYCRLF, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
368 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
369 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
370 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0 | F_NOMATCH, ".(.).", "\xe2\x80\xa8\nb\r" },
371 { PCRE_NEWLINE_ANY, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
372 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
373 { PCRE_BSR_ANYCRLF, 0, "\\R", "\r" },
374 { PCRE_BSR_ANYCRLF, 0, "\\R", "\x85#\r\n#" },
375 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\xe2\x80\xa8#c" },
376 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\r\nc" },
377 { PCRE_NEWLINE_CRLF | PCRE_BSR_UNICODE | PCRE_UTF8, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
378 { MUA, 0 | F_NOMATCH, "\\R+", "ab" },
379 { MUA, 0, "\\R+", "ab\r\n\r" },
380 { MUA, 0, "\\R*", "ab\r\n\r" },
381 { MUA, 0, "\\R*", "\r\n\r" },
382 { MUA, 0, "\\R{2,4}", "\r\nab\r\r" },
383 { MUA, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
384 { MUA, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
385 { MUA, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
386 { MUA, 0 | F_NOMATCH, "\\R+\\R\\R", "\r\n\r\n" },
387 { MUA, 0, "\\R+\\R\\R", "\r\r\r" },
388 { MUA, 0, "\\R*\\R\\R", "\n\r" },
389 { MUA, 0 | F_NOMATCH, "\\R{2,4}\\R\\R", "\r\r\r" },
390 { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
391
392 /* Atomic groups (no fallback from "next" direction). */
393 { MUA, 0 | F_NOMATCH, "(?>ab)ab", "bab" },
394 { MUA, 0 | F_NOMATCH, "(?>(ab))ab", "bab" },
395 { MUA, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
396 "bababcdedefgheijijklmlmnop" },
397 { MUA, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
398 { MUA, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
399 { MUA, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
400 { MUA, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
401 { MUA, 0, "((?>a|)+?)b", "aaacaaab" },
402 { MUA, 0, "(?>x|)*$", "aaa" },
403 { MUA, 0, "(?>(x)|)*$", "aaa" },
404 { MUA, 0, "(?>x|())*$", "aaa" },
405 { MUA, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
406 { MUA, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
407 { MUA, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
408 { MUA, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
409 { MUA, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
410 { MUA, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
411 { MUA, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
412 { MUA, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
413 { MUA, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
414 { MUA, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
415 { MUA, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
416 { MUA, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
417 { MUA, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
418 { MUA, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
419 { CMA, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
420 { MUA, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
421 { MUA, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
422 { MUA, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d" },
423 { MUA, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
424 { MUA, 0 | F_PROPERTY, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
425 { MUA, 0 | F_PROPERTY, "\\X{2,4}", "abcdef" },
426 { MUA, 0 | F_PROPERTY, "\\X{2,4}?", "abcdef" },
427 { MUA, 0 | F_NOMATCH | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d##" },
428 { MUA, 0 | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
429 { MUA, 0, "(c(ab)?+ab)+", "cabcababcab" },
430 { MUA, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
431
432 /* Possessive quantifiers. */
433 { MUA, 0, "(?:a|b)++m", "mababbaaxababbaam" },
434 { MUA, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
435 { MUA, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
436 { MUA, 0, "(a|b)++m", "mababbaaxababbaam" },
437 { MUA, 0, "(a|b)*+m", "mababbaaxababbaam" },
438 { MUA, 0, "(a|b)*+m", "ababbaaxababbaam" },
439 { MUA, 0, "(a|b(*ACCEPT))++m", "maaxab" },
440 { MUA, 0, "(?:b*)++m", "bxbbxbbbxm" },
441 { MUA, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
442 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxm" },
443 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
444 { MUA, 0, "(b*)++m", "bxbbxbbbxm" },
445 { MUA, 0, "(b*)++m", "bxbbxbbbxbbm" },
446 { MUA, 0, "(b*)*+m", "bxbbxbbbxm" },
447 { MUA, 0, "(b*)*+m", "bxbbxbbbxbbm" },
448 { MUA, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
449 { MUA, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
450 { MUA, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
451 { MUA, 0, "(a|(b))++m", "mababbaaxababbaam" },
452 { MUA, 0, "((a)|b)*+m", "mababbaaxababbaam" },
453 { MUA, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
454 { MUA, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
455 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxm" },
456 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
457 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
458 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
459 { MUA, 0, "((b*))++m", "bxbbxbbbxm" },
460 { MUA, 0, "((b*))++m", "bxbbxbbbxbbm" },
461 { MUA, 0, "((b*))*+m", "bxbbxbbbxm" },
462 { MUA, 0, "((b*))*+m", "bxbbxbbbxbbm" },
463 { MUA, 0 | F_NOMATCH, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
464 { MUA, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
465 { MUA, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
466 { MUA, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
467 { MUA, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
468
469 /* Back references. */
470 { MUA, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
471 { CMUA, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
472 { CMA, 0, "(a{2,4})\\1", "AaAaaAaA" },
473 { MUA, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
474 { MUA, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
475 { MUA, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
476 { MUA, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
477 { MUA, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
478 { MUA, 0, "(?:(aa)|b)\\1?b", "bb" },
479 { CMUA, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
480 { MUA, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
481 { CMUA, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
482 { MUA, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
483 { CMA, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
484 { MUA, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
485 { MUA, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
486 { MA, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
487 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
488 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{0,2}", "wwwww." },
489 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwww" },
490 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwwww" },
491 { PCRE_UCP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
492 { CMUAP, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" },
493
494 /* Assertions. */
495 { MUA, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
496 { MUA, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
497 { MUA, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
498 { MUA, 0, "(?<=aaa|aa|a)a", "aaa" },
499 { MUA, 2, "(?<=aaa|aa|a)a", "aaa" },
500 { MA, 0, "(?<=aaa|aa|a)a", "aaa" },
501 { MA, 2, "(?<=aaa|aa|a)a", "aaa" },
502 { MUA, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
503 { MUA, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
504 { MUA, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
505 { MUA, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
506 { MUA, 0, "((?(?=(a))a)+k)", "bbak" },
507 { MUA, 0, "((?(?=a)a)+k)", "bbak" },
508 { MUA, 0 | F_NOMATCH, "(?=(?>(a))m)amk", "a k" },
509 { MUA, 0 | F_NOMATCH, "(?!(?>(a))m)amk", "a k" },
510 { MUA, 0 | F_NOMATCH, "(?>(?=(a))am)amk", "a k" },
511 { MUA, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
512 { MUA, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
513 { MUA, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
514 { MUA, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
515 { MUA, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
516 { MUA, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
517 { MUA, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
518 { MUA, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
519
520 /* Not empty, ACCEPT, FAIL */
521 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*", "bcx" },
522 { MUA | PCRE_NOTEMPTY, 0, "a*", "bcaad" },
523 { MUA | PCRE_NOTEMPTY, 0, "a*?", "bcaad" },
524 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
525 { MUA, 0, "a(*ACCEPT)b", "ab" },
526 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*(*ACCEPT)b", "bcx" },
527 { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
528 { MUA | PCRE_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
529 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "(?:z|a*(*ACCEPT)b)", "bcx" },
530 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
531 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
532 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
533 { MUA | PCRE_NOTEMPTY_ATSTART, 0 | F_NOMATCH, "a*(*ACCEPT)b", "" },
534 { MUA, 0, "((a(*ACCEPT)b))", "ab" },
535 { MUA, 0, "(a(*FAIL)a|a)", "aaa" },
536 { MUA, 0, "(?=ab(*ACCEPT)b)a", "ab" },
537 { MUA, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
538 { MUA, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
539 { MUA | PCRE_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
540
541 /* Conditional blocks. */
542 { MUA, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
543 { MUA, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
544 { MUA, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
545 { MUA, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
546 { MUA, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
547 { MUA, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
548 { MUA, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
549 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
550 { MUA, 0 | F_DIFF, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
551 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
552 { MUA, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
553 { MUA, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
554 { MUA, 0, "(?(?=a)ab)", "a" },
555 { MUA, 0, "(?(?<!b)c)", "b" },
556 { MUA, 0, "(?(DEFINE)a(b))", "a" },
557 { MUA, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
558 { MUA, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
559 { MUA, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
560 { MUA, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
561 { MUA, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
562 { MUA, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
563 { MUA, 0, "(c)?\?(?(1)a|b)", "cbb" },
564 { MUA, 0 | F_DIFF, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
565 { MUA, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
566 { MUA, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
567 { MUA, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
568 { MUA, 0 | F_DIFF, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
569 { MUA, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
570 { MUA, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
571 { MUA, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
572 { MUA, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
573 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)*l", "bc ddd abccabccl" },
574 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+?dd", "bcabcacdb bdddd" },
575 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+l", "ababccddabdbccd abcccl" },
576 { MUA, 0, "((?:a|aa)(?(1)aaa))x", "aax" },
577
578 /* Set start of match. */
579 { MUA, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
580 { MUA, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
581 { MUA, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
582 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a\\K(*ACCEPT)b", "aa" },
583 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
584
585 /* First line. */
586 { MUA | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}a", "bb\naaa" },
587 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}a", "bb\r\naaa" },
588 { MUA | PCRE_FIRSTLINE, 0, "(?<=a)", "a" },
589 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[^a][^b]", "ab" },
590 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "a", "\na" },
591 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[abc]", "\na" },
592 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^a", "\na" },
593 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^(?<=\n)", "\na" },
594 { MUA | PCRE_FIRSTLINE, 0, "\xf0\x90\x90\x80", "\xf0\x90\x90\x80" },
595 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\xc2\x85#" },
596 { PCRE_MULTILINE | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\x85#" },
597 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^#", "\xe2\x80\xa8#" },
598 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}", "\r\na" },
599 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, ".", "\r" },
600 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "a", "\ra" },
601 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH, "ba", "bbb\r\nba" },
602 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}{4}|a", "\r\na" },
603 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 1, ".", "\r\n" },
604 { PCRE_FIRSTLINE | PCRE_NEWLINE_LF | PCRE_DOTALL, 0 | F_NOMATCH, "ab.", "ab" },
605
606 /* Recurse. */
607 { MUA, 0, "(a)(?1)", "aa" },
608 { MUA, 0, "((a))(?1)", "aa" },
609 { MUA, 0, "(b|a)(?1)", "aa" },
610 { MUA, 0, "(b|(a))(?1)", "aa" },
611 { MUA, 0 | F_NOMATCH, "((a)(b)(?:a*))(?1)", "aba" },
612 { MUA, 0, "((a)(b)(?:a*))(?1)", "abab" },
613 { MUA, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
614 { MUA, 0, "((?2)b|(a)){2}(?1)", "aabab" },
615 { MUA, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
616 { MUA, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
617 { MUA, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
618 { MUA, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
619 { MUA, 0 | F_NOMATCH, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
620 { MUA, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
621 { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
622 { MUA, 0 | F_NOMATCH, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
623 { MUA, 0, "b|<(?R)*>", "<<b>" },
624 { MUA, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
625 { MUA, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
626 { MUA, 0, "(a)((?(R)a|b))(?2)", "aabbabaa" },
627 { MUA, 0, "(a)((?(R2)a|b))(?2)", "aabbabaa" },
628 { MUA, 0, "(a)((?(R1)a|b))(?2)", "ababba" },
629 { MUA, 0, "(?(R0)aa|bb(?R))", "abba aabb bbaa" },
630 { MUA, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" },
631 { MUA, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" },
632
633 /* 16 bit specific tests. */
634 { CMA, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" },
635 { CMA, 0 | F_FORCECONV, "\xe1\xbd\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
636 { CMA, 0 | F_FORCECONV, "[\xc3\xa1]", "\xc3\x81\xc3\xa1" },
637 { CMA, 0 | F_FORCECONV, "[\xe1\xbd\xb8]", "\xe1\xbf\xb8\xe1\xbd\xb8" },
638 { CMA, 0 | F_FORCECONV, "[a-\xed\xb0\x80]", "A" },
639 { CMA, 0 | F_NO8 | F_FORCECONV, "[a-\\x{dc00}]", "B" },
640 { CMA, 0 | F_NO8 | F_NOMATCH | F_FORCECONV, "[b-\\x{dc00}]", "a" },
641 { CMA, 0 | F_NO8 | F_FORCECONV, "\xed\xa0\x80\\x{d800}\xed\xb0\x80\\x{dc00}", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80" },
642 { CMA, 0 | F_NO8 | F_FORCECONV, "[\xed\xa0\x80\\x{d800}]{1,2}?[\xed\xb0\x80\\x{dc00}]{1,2}?#", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80#" },
643 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80\xed\xb0\x80#]{0,3}(?<=\xed\xb0\x80.)", "\xed\xa0\x80#\xed\xa0\x80##\xed\xb0\x80\xed\xa0\x80" },
644 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\x9f\xbf\xed\xa0\x83" },
645 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\xb4\x80\xed\xb3\xb0" },
646 { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" },
647 { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" },
648 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" },
649 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80][\xed\xb0\x80]{2,}", "\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" },
650 { MA, 0 | F_FORCECONV, "[^\xed\xb0\x80]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
651 { MA, 0 | F_NO8 | F_FORCECONV, "[^\\x{dc00}]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
652 { CMA, 0 | F_FORCECONV, ".\\B.", "\xed\xa0\x80\xed\xb0\x80" },
653 { CMA, 0 | F_FORCECONV, "\\D+(?:\\d+|.)\\S+(?:\\s+|.)\\W+(?:\\w+|.)\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80" },
654 { CMA, 0 | F_FORCECONV, "\\d*\\s*\\w*\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80" },
655 { CMA, 0 | F_FORCECONV | F_NOMATCH, "\\d*?\\D*?\\s*?\\S*?\\w*?\\W*?##", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80#" },
656 { CMA | PCRE_EXTENDED, 0 | F_FORCECONV, "\xed\xa0\x80 \xed\xb0\x80 !", "\xed\xa0\x80\xed\xb0\x80!" },
657 { CMA, 0 | F_FORCECONV, "\xed\xa0\x80+#[^#]+\xed\xa0\x80", "\xed\xa0\x80#a\xed\xa0\x80" },
658 { CMA, 0 | F_FORCECONV, "(\xed\xa0\x80+)#\\1", "\xed\xa0\x80\xed\xa0\x80#\xed\xa0\x80\xed\xa0\x80" },
659 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0 | F_NO8 | F_FORCECONV, "^-", "a--\xe2\x80\xa8--" },
660 { PCRE_BSR_UNICODE, 0 | F_NO8 | F_FORCECONV, "\\R", "ab\xe2\x80\xa8" },
661 { 0, 0 | F_NO8 | F_FORCECONV, "\\v", "ab\xe2\x80\xa9" },
662 { 0, 0 | F_NO8 | F_FORCECONV, "\\h", "ab\xe1\xa0\x8e" },
663 { 0, 0 | F_NO8 | F_FORCECONV, "\\v+?\\V+?#", "\xe2\x80\xa9\xe2\x80\xa9\xef\xbf\xbf\xef\xbf\xbf#" },
664 { 0, 0 | F_NO8 | F_FORCECONV, "\\h+?\\H+?#", "\xe1\xa0\x8e\xe1\xa0\x8e\xef\xbf\xbf\xef\xbf\xbf#" },
665
666 /* Partial matching. */
667 { MUA | PCRE_PARTIAL_SOFT, 0, "ab", "a" },
668 { MUA | PCRE_PARTIAL_SOFT, 0, "ab|a", "a" },
669 { MUA | PCRE_PARTIAL_HARD, 0, "ab|a", "a" },
670 { MUA | PCRE_PARTIAL_SOFT, 0, "\\b#", "a" },
671 { MUA | PCRE_PARTIAL_SOFT, 0, "(?<=a)b", "a" },
672 { MUA | PCRE_PARTIAL_SOFT, 0, "abc|(?<=xxa)bc", "xxab" },
673 { MUA | PCRE_PARTIAL_SOFT, 0, "a\\B", "a" },
674 { MUA | PCRE_PARTIAL_HARD, 0, "a\\b", "a" },
675
676 /* (*MARK) verb. */
677 { MUA, 0, "a(*MARK:aa)a", "ababaa" },
678 { MUA, 0 | F_NOMATCH, "a(*:aa)a", "abab" },
679 { MUA, 0, "a(*:aa)(b(*:bb)b|bc)", "abc" },
680 { MUA, 0 | F_NOMATCH, "a(*:1)x|b(*:2)y", "abc" },
681 { MUA, 0, "(?>a(*:aa))b|ac", "ac" },
682 { MUA, 0, "(?(DEFINE)(a(*:aa)))(?1)", "a" },
683 { MUA, 0 | F_NOMATCH, "(?(DEFINE)((a)(*:aa)))(?1)b", "aa" },
684 { MUA, 0, "(?(DEFINE)(a(*:aa)))a(?1)b|aac", "aac" },
685 { MUA, 0, "(a(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
686 { MUA, 0, "(a(*:aa)){0}(?:b(?1)b)+", "babba" },
687 { MUA, 0 | F_NOMATCH | F_STUDY, "(a(*:aa)){0}(?:b(?1)b)+", "ba" },
688 { MUA, 0, "(a\\K(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
689 { MUA, 0, "(a\\K(*:aa)){0}(?:b(?1)b)+", "babba" },
690 { MUA, 0 | F_NOMATCH | F_STUDY, "(a\\K(*:aa)){0}(?:b(?1)b)+", "ba" },
691 { MUA, 0 | F_NOMATCH | F_STUDY, "(*:mark)m", "a" },
692
693 /* (*COMMIT) verb. */
694 { MUA, 0 | F_NOMATCH, "a(*COMMIT)b", "ac" },
695 { MUA, 0, "aa(*COMMIT)b", "xaxaab" },
696 { MUA, 0 | F_NOMATCH, "a(*COMMIT)(*:msg)b|ac", "ac" },
697 { MUA, 0, "(?=a(*COMMIT)b|ac)ac|(*:m)(a)c", "ac" },
698 { MUA, 0, "(?!a(*COMMIT)(*:msg)b)a(c)|cd", "acd" },
699 { MUA, 0, "(?=(a)(*COMMIT)b)|ac", "ac" },
700 { MUA, 0, "(?=(a)+(*COMMIT)b)|ac", "ac" },
701
702 /* (*PRUNE) verb. */
703 { MUA, 0, "aa\\K(*PRUNE)b", "aaab" },
704 { MUA, 0, "aa(*PRUNE:bb)b|a", "aa" },
705 { MUA, 0, "(a)(a)(*PRUNE)b|(a)", "aa" },
706 { MUA, 0, "(a)(a)(a)(a)(a)(a)(a)(a)(*PRUNE)b|(a)", "aaaaaaaa" },
707 { MUA | PCRE_PARTIAL_SOFT, 0, "a(*PRUNE)a|", "a" },
708 { MUA | PCRE_PARTIAL_SOFT, 0, "a(*PRUNE)a|m", "a" },
709 { MUA, 0 | F_NOMATCH, "a(*COMMIT)(*PRUNE)d|bc", "abc" },
710 { MUA, 0, "(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
711 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
712 { MUA, 0, "(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
713 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
714 { MUA, 0, "(a(*COMMIT)b){0}a(?1)(*PRUNE)c|bc", "abc" },
715 { MUA, 0 | F_NOMATCH, "(a(*COMMIT)b){0}a(*COMMIT)(?1)(*PRUNE)c|bc", "abc" },
716
717 /* Deep recursion. */
718 { MUA, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
719 { MUA, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
720 { MUA, 0, "((a?)+)+b", "aaaaaaaaaaaa b" },
721
722 /* Deep recursion: Stack limit reached. */
723 { MA, 0 | F_NOMATCH, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
724 { MA, 0 | F_NOMATCH, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
725 { MA, 0 | F_NOMATCH, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
726 { MA, 0 | F_NOMATCH, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
727 { MA, 0 | F_NOMATCH, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
728
729 { 0, 0, NULL, NULL }
730 };
731
732 static const unsigned char *tables(int mode)
733 {
734 /* The purpose of this function to allow valgrind
735 for reporting invalid reads and writes. */
736 static unsigned char *tables_copy;
737 const char *errorptr;
738 int erroroffset;
739 unsigned char *default_tables;
740 #if defined SUPPORT_PCRE8
741 pcre *regex;
742 char null_str[1] = { 0 };
743 #elif defined SUPPORT_PCRE16
744 pcre16 *regex;
745 PCRE_UCHAR16 null_str[1] = { 0 };
746 #elif defined SUPPORT_PCRE32
747 pcre32 *regex;
748 PCRE_UCHAR32 null_str[1] = { 0 };
749 #endif
750
751 if (mode) {
752 if (tables_copy)
753 free(tables_copy);
754 tables_copy = NULL;
755 return NULL;
756 }
757
758 if (tables_copy)
759 return tables_copy;
760
761 default_tables = NULL;
762 #if defined SUPPORT_PCRE8
763 regex = pcre_compile(null_str, 0, &errorptr, &erroroffset, NULL);
764 if (regex) {
765 pcre_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
766 pcre_free(regex);
767 }
768 #elif defined SUPPORT_PCRE16
769 regex = pcre16_compile(null_str, 0, &errorptr, &erroroffset, NULL);
770 if (regex) {
771 pcre16_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
772 pcre16_free(regex);
773 }
774 #elif defined SUPPORT_PCRE32
775 regex = pcre32_compile(null_str, 0, &errorptr, &erroroffset, NULL);
776 if (regex) {
777 pcre32_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
778 pcre32_free(regex);
779 }
780 #endif
781 /* Shouldn't ever happen. */
782 if (!default_tables)
783 return NULL;
784
785 /* Unfortunately this value cannot get from pcre_fullinfo.
786 Since this is a test program, this is acceptable at the moment. */
787 tables_copy = (unsigned char *)malloc(1088);
788 if (!tables_copy)
789 return NULL;
790
791 memcpy(tables_copy, default_tables, 1088);
792 return tables_copy;
793 }
794
795 #ifdef SUPPORT_PCRE8
796 static pcre_jit_stack* callback8(void *arg)
797 {
798 return (pcre_jit_stack *)arg;
799 }
800 #endif
801
802 #ifdef SUPPORT_PCRE16
803 static pcre16_jit_stack* callback16(void *arg)
804 {
805 return (pcre16_jit_stack *)arg;
806 }
807 #endif
808
809 #ifdef SUPPORT_PCRE32
810 static pcre32_jit_stack* callback32(void *arg)
811 {
812 return (pcre32_jit_stack *)arg;
813 }
814 #endif
815
816 #ifdef SUPPORT_PCRE8
817 static pcre_jit_stack *stack8;
818
819 static pcre_jit_stack *getstack8(void)
820 {
821 if (!stack8)
822 stack8 = pcre_jit_stack_alloc(1, 1024 * 1024);
823 return stack8;
824 }
825
826 static void setstack8(pcre_extra *extra)
827 {
828 if (!extra) {
829 if (stack8)
830 pcre_jit_stack_free(stack8);
831 stack8 = NULL;
832 return;
833 }
834
835 pcre_assign_jit_stack(extra, callback8, getstack8());
836 }
837 #endif /* SUPPORT_PCRE8 */
838
839 #ifdef SUPPORT_PCRE16
840 static pcre16_jit_stack *stack16;
841
842 static pcre16_jit_stack *getstack16(void)
843 {
844 if (!stack16)
845 stack16 = pcre16_jit_stack_alloc(1, 1024 * 1024);
846 return stack16;
847 }
848
849 static void setstack16(pcre16_extra *extra)
850 {
851 if (!extra) {
852 if (stack16)
853 pcre16_jit_stack_free(stack16);
854 stack16 = NULL;
855 return;
856 }
857
858 pcre16_assign_jit_stack(extra, callback16, getstack16());
859 }
860 #endif /* SUPPORT_PCRE8 */
861
862 #ifdef SUPPORT_PCRE32
863 static pcre32_jit_stack *stack32;
864
865 static pcre32_jit_stack *getstack32(void)
866 {
867 if (!stack32)
868 stack32 = pcre32_jit_stack_alloc(1, 1024 * 1024);
869 return stack32;
870 }
871
872 static void setstack32(pcre32_extra *extra)
873 {
874 if (!extra) {
875 if (stack32)
876 pcre32_jit_stack_free(stack32);
877 stack32 = NULL;
878 return;
879 }
880
881 pcre32_assign_jit_stack(extra, callback32, getstack32());
882 }
883 #endif /* SUPPORT_PCRE8 */
884
885 #ifdef SUPPORT_PCRE16
886
887 static int convert_utf8_to_utf16(const char *input, PCRE_UCHAR16 *output, int *offsetmap, int max_length)
888 {
889 unsigned char *iptr = (unsigned char*)input;
890 PCRE_UCHAR16 *optr = output;
891 unsigned int c;
892
893 if (max_length == 0)
894 return 0;
895
896 while (*iptr && max_length > 1) {
897 c = 0;
898 if (offsetmap)
899 *offsetmap++ = (int)(iptr - (unsigned char*)input);
900
901 if (!(*iptr & 0x80))
902 c = *iptr++;
903 else if (!(*iptr & 0x20)) {
904 c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
905 iptr += 2;
906 } else if (!(*iptr & 0x10)) {
907 c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
908 iptr += 3;
909 } else if (!(*iptr & 0x08)) {
910 c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
911 iptr += 4;
912 }
913
914 if (c < 65536) {
915 *optr++ = c;
916 max_length--;
917 } else if (max_length <= 2) {
918 *optr = '\0';
919 return (int)(optr - output);
920 } else {
921 c -= 0x10000;
922 *optr++ = 0xd800 | ((c >> 10) & 0x3ff);
923 *optr++ = 0xdc00 | (c & 0x3ff);
924 max_length -= 2;
925 if (offsetmap)
926 offsetmap++;
927 }
928 }
929 if (offsetmap)
930 *offsetmap = (int)(iptr - (unsigned char*)input);
931 *optr = '\0';
932 return (int)(optr - output);
933 }
934
935 static int copy_char8_to_char16(const char *input, PCRE_UCHAR16 *output, int max_length)
936 {
937 unsigned char *iptr = (unsigned char*)input;
938 PCRE_UCHAR16 *optr = output;
939
940 if (max_length == 0)
941 return 0;
942
943 while (*iptr && max_length > 1) {
944 *optr++ = *iptr++;
945 max_length--;
946 }
947 *optr = '\0';
948 return (int)(optr - output);
949 }
950
951 #define REGTEST_MAX_LENGTH16 4096
952 static PCRE_UCHAR16 regtest_buf16[REGTEST_MAX_LENGTH16];
953 static int regtest_offsetmap16[REGTEST_MAX_LENGTH16];
954
955 #endif /* SUPPORT_PCRE16 */
956
957 #ifdef SUPPORT_PCRE32
958
959 static int convert_utf8_to_utf32(const char *input, PCRE_UCHAR32 *output, int *offsetmap, int max_length)
960 {
961 unsigned char *iptr = (unsigned char*)input;
962 PCRE_UCHAR32 *optr = output;
963 unsigned int c;
964
965 if (max_length == 0)
966 return 0;
967
968 while (*iptr && max_length > 1) {
969 c = 0;
970 if (offsetmap)
971 *offsetmap++ = (int)(iptr - (unsigned char*)input);
972
973 if (!(*iptr & 0x80))
974 c = *iptr++;
975 else if (!(*iptr & 0x20)) {
976 c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
977 iptr += 2;
978 } else if (!(*iptr & 0x10)) {
979 c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
980 iptr += 3;
981 } else if (!(*iptr & 0x08)) {
982 c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
983 iptr += 4;
984 }
985
986 *optr++ = c;
987 max_length--;
988 }
989 if (offsetmap)
990 *offsetmap = (int)(iptr - (unsigned char*)input);
991 *optr = 0;
992 return (int)(optr - output);
993 }
994
995 static int copy_char8_to_char32(const char *input, PCRE_UCHAR32 *output, int max_length)
996 {
997 unsigned char *iptr = (unsigned char*)input;
998 PCRE_UCHAR32 *optr = output;
999
1000 if (max_length == 0)
1001 return 0;
1002
1003 while (*iptr && max_length > 1) {
1004 *optr++ = *iptr++;
1005 max_length--;
1006 }
1007 *optr = '\0';
1008 return (int)(optr - output);
1009 }
1010
1011 #define REGTEST_MAX_LENGTH32 4096
1012 static PCRE_UCHAR32 regtest_buf32[REGTEST_MAX_LENGTH32];
1013 static int regtest_offsetmap32[REGTEST_MAX_LENGTH32];
1014
1015 #endif /* SUPPORT_PCRE32 */
1016
1017 static int check_ascii(const char *input)
1018 {
1019 const unsigned char *ptr = (unsigned char *)input;
1020 while (*ptr) {
1021 if (*ptr > 127)
1022 return 0;
1023 ptr++;
1024 }
1025 return 1;
1026 }
1027
1028 static int regression_tests(void)
1029 {
1030 struct regression_test_case *current = regression_test_cases;
1031 const char *error;
1032 char *cpu_info;
1033 int i, err_offs;
1034 int is_successful, is_ascii_pattern, is_ascii_input;
1035 int total = 0;
1036 int successful = 0;
1037 int successful_row = 0;
1038 int counter = 0;
1039 int study_mode;
1040 int utf = 0, ucp = 0;
1041 int disabled_flags = 0;
1042 #ifdef SUPPORT_PCRE8
1043 pcre *re8;
1044 pcre_extra *extra8;
1045 pcre_extra dummy_extra8;
1046 int ovector8_1[32];
1047 int ovector8_2[32];
1048 int return_value8[2];
1049 unsigned char *mark8_1, *mark8_2;
1050 #endif
1051 #ifdef SUPPORT_PCRE16
1052 pcre16 *re16;
1053 pcre16_extra *extra16;
1054 pcre16_extra dummy_extra16;
1055 int ovector16_1[32];
1056 int ovector16_2[32];
1057 int return_value16[2];
1058 PCRE_UCHAR16 *mark16_1, *mark16_2;
1059 int length16;
1060 #endif
1061 #ifdef SUPPORT_PCRE32
1062 pcre32 *re32;
1063 pcre32_extra *extra32;
1064 pcre32_extra dummy_extra32;
1065 int ovector32_1[32];
1066 int ovector32_2[32];
1067 int return_value32[2];
1068 PCRE_UCHAR32 *mark32_1, *mark32_2;
1069 int length32;
1070 #endif
1071
1072 /* This test compares the behaviour of interpreter and JIT. Although disabling
1073 utf or ucp may make tests fail, if the pcre_exec result is the SAME, it is
1074 still considered successful from pcre_jit_test point of view. */
1075
1076 #if defined SUPPORT_PCRE8
1077 pcre_config(PCRE_CONFIG_JITTARGET, &cpu_info);
1078 #elif defined SUPPORT_PCRE16
1079 pcre16_config(PCRE_CONFIG_JITTARGET, &cpu_info);
1080 #elif defined SUPPORT_PCRE32
1081 pcre32_config(PCRE_CONFIG_JITTARGET, &cpu_info);
1082 #endif
1083
1084 printf("Running JIT regression tests\n");
1085 printf(" target CPU of SLJIT compiler: %s\n", cpu_info);
1086
1087 #if defined SUPPORT_PCRE8
1088 pcre_config(PCRE_CONFIG_UTF8, &utf);
1089 pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
1090 #elif defined SUPPORT_PCRE16
1091 pcre16_config(PCRE_CONFIG_UTF16, &utf);
1092 pcre16_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
1093 #elif defined SUPPORT_PCRE16
1094 pcre32_config(PCRE_CONFIG_UTF32, &utf);
1095 pcre32_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
1096 #endif
1097
1098 if (!utf)
1099 disabled_flags |= PCRE_UTF8 | PCRE_UTF16 | PCRE_UTF32;
1100 if (!ucp)
1101 disabled_flags |= PCRE_UCP;
1102 #ifdef SUPPORT_PCRE8
1103 printf(" in 8 bit mode with UTF-8 %s and ucp %s:\n", utf ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
1104 #endif
1105 #ifdef SUPPORT_PCRE16
1106 printf(" in 16 bit mode with UTF-16 %s and ucp %s:\n", utf ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
1107 #endif
1108 #ifdef SUPPORT_PCRE32
1109 printf(" in 32 bit mode with UTF-32 %s and ucp %s:\n", utf ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
1110 #endif
1111
1112 while (current->pattern) {
1113 /* printf("\nPattern: %s :\n", current->pattern); */
1114 total++;
1115 if (current->start_offset & F_PROPERTY) {
1116 is_ascii_pattern = 0;
1117 is_ascii_input = 0;
1118 } else {
1119 is_ascii_pattern = check_ascii(current->pattern);
1120 is_ascii_input = check_ascii(current->input);
1121 }
1122
1123 if (current->flags & PCRE_PARTIAL_SOFT)
1124 study_mode = PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE;
1125 else if (current->flags & PCRE_PARTIAL_HARD)
1126 study_mode = PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE;
1127 else
1128 study_mode = PCRE_STUDY_JIT_COMPILE;
1129 error = NULL;
1130 #ifdef SUPPORT_PCRE8
1131 re8 = NULL;
1132 if (!(current->start_offset & F_NO8))
1133 re8 = pcre_compile(current->pattern,
1134 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags),
1135 &error, &err_offs, tables(0));
1136
1137 extra8 = NULL;
1138 if (re8) {
1139 error = NULL;
1140 extra8 = pcre_study(re8, study_mode, &error);
1141 if (!extra8) {
1142 printf("\n8 bit: Cannot study pattern: %s\n", current->pattern);
1143 pcre_free(re8);
1144 re8 = NULL;
1145 }
1146 else if (!(extra8->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
1147 printf("\n8 bit: JIT compiler does not support: %s\n", current->pattern);
1148 pcre_free_study(extra8);
1149 pcre_free(re8);
1150 re8 = NULL;
1151 }
1152 extra8->flags |= PCRE_EXTRA_MARK;
1153 } else if (((utf && ucp) || is_ascii_pattern) && !(current->start_offset & F_NO8))
1154 printf("\n8 bit: Cannot compile pattern \"%s\": %s\n", current->pattern, error);
1155 #endif
1156 #ifdef SUPPORT_PCRE16
1157 if ((current->flags & PCRE_UTF16) || (current->start_offset & F_FORCECONV))
1158 convert_utf8_to_utf16(current->pattern, regtest_buf16, NULL, REGTEST_MAX_LENGTH16);
1159 else
1160 copy_char8_to_char16(current->pattern, regtest_buf16, REGTEST_MAX_LENGTH16);
1161
1162 re16 = NULL;
1163 if (!(current->start_offset & F_NO16))
1164 re16 = pcre16_compile(regtest_buf16,
1165 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags),
1166 &error, &err_offs, tables(0));
1167
1168 extra16 = NULL;
1169 if (re16) {
1170 error = NULL;
1171 extra16 = pcre16_study(re16, study_mode, &error);
1172 if (!extra16) {
1173 printf("\n16 bit: Cannot study pattern: %s\n", current->pattern);
1174 pcre16_free(re16);
1175 re16 = NULL;
1176 }
1177 else if (!(extra16->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
1178 printf("\n16 bit: JIT compiler does not support: %s\n", current->pattern);
1179 pcre16_free_study(extra16);
1180 pcre16_free(re16);
1181 re16 = NULL;
1182 }
1183 extra16->flags |= PCRE_EXTRA_MARK;
1184 } else if (((utf && ucp) || is_ascii_pattern) && !(current->start_offset & F_NO16))
1185 printf("\n16 bit: Cannot compile pattern \"%s\": %s\n", current->pattern, error);
1186 #endif
1187 #ifdef SUPPORT_PCRE32
1188 if ((current->flags & PCRE_UTF32) || (current->start_offset & F_FORCECONV))
1189 convert_utf8_to_utf32(current->pattern, regtest_buf32, NULL, REGTEST_MAX_LENGTH32);
1190 else
1191 copy_char8_to_char32(current->pattern, regtest_buf32, REGTEST_MAX_LENGTH32);
1192
1193 re32 = NULL;
1194 if (!(current->start_offset & F_NO32))
1195 re32 = pcre32_compile(regtest_buf32,
1196 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags),
1197 &error, &err_offs, tables(0));
1198
1199 extra32 = NULL;
1200 if (re32) {
1201 error = NULL;
1202 extra32 = pcre32_study(re32, study_mode, &error);
1203 if (!extra32) {
1204 printf("\n32 bit: Cannot study pattern: %s\n", current->pattern);
1205 pcre32_free(re32);
1206 re32 = NULL;
1207 }
1208 if (!(extra32->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
1209 printf("\n32 bit: JIT compiler does not support: %s\n", current->pattern);
1210 pcre32_free_study(extra32);
1211 pcre32_free(re32);
1212 re32 = NULL;
1213 }
1214 extra32->flags |= PCRE_EXTRA_MARK;
1215 } else if (((utf && ucp) || is_ascii_pattern) && !(current->start_offset & F_NO32))
1216 printf("\n32 bit: Cannot compile pattern \"%s\": %s\n", current->pattern, error);
1217 #endif
1218
1219 counter++;
1220 if ((counter & 0x3) != 0) {
1221 #ifdef SUPPORT_PCRE8
1222 setstack8(NULL);
1223 #endif
1224 #ifdef SUPPORT_PCRE16
1225 setstack16(NULL);
1226 #endif
1227 #ifdef SUPPORT_PCRE32
1228 setstack32(NULL);
1229 #endif
1230 }
1231
1232 #ifdef SUPPORT_PCRE8
1233 return_value8[0] = -1000;
1234 return_value8[1] = -1000;
1235 for (i = 0; i < 32; ++i)
1236 ovector8_1[i] = -2;
1237 for (i = 0; i < 32; ++i)
1238 ovector8_2[i] = -2;
1239 if (re8) {
1240 mark8_1 = NULL;
1241 mark8_2 = NULL;
1242 extra8->mark = &mark8_1;
1243
1244 if ((counter & 0x1) != 0) {
1245 setstack8(extra8);
1246 return_value8[0] = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1247 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_1, 32);
1248 } else
1249 return_value8[0] = pcre_jit_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1250 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_1, 32, getstack8());
1251 memset(&dummy_extra8, 0, sizeof(pcre_extra));
1252 dummy_extra8.flags = PCRE_EXTRA_MARK;
1253 if (current->start_offset & F_STUDY) {
1254 dummy_extra8.flags |= PCRE_EXTRA_STUDY_DATA;
1255 dummy_extra8.study_data = extra8->study_data;
1256 }
1257 dummy_extra8.mark = &mark8_2;
1258 return_value8[1] = pcre_exec(re8, &dummy_extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1259 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_2, 32);
1260 }
1261 #endif
1262
1263 #ifdef SUPPORT_PCRE16
1264 return_value16[0] = -1000;
1265 return_value16[1] = -1000;
1266 for (i = 0; i < 32; ++i)
1267 ovector16_1[i] = -2;
1268 for (i = 0; i < 32; ++i)
1269 ovector16_2[i] = -2;
1270 if (re16) {
1271 mark16_1 = NULL;
1272 mark16_2 = NULL;
1273 if ((current->flags & PCRE_UTF16) || (current->start_offset & F_FORCECONV))
1274 length16 = convert_utf8_to_utf16(current->input, regtest_buf16, regtest_offsetmap16, REGTEST_MAX_LENGTH16);
1275 else
1276 length16 = copy_char8_to_char16(current->input, regtest_buf16, REGTEST_MAX_LENGTH16);
1277 extra16->mark = &mark16_1;
1278 if ((counter & 0x1) != 0) {
1279 setstack16(extra16);
1280 return_value16[0] = pcre16_exec(re16, extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
1281 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_1, 32);
1282 } else
1283 return_value16[0] = pcre16_jit_exec(re16, extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
1284 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_1, 32, getstack16());
1285 memset(&dummy_extra16, 0, sizeof(pcre16_extra));
1286 dummy_extra16.flags = PCRE_EXTRA_MARK;
1287 if (current->start_offset & F_STUDY) {
1288 dummy_extra16.flags |= PCRE_EXTRA_STUDY_DATA;
1289 dummy_extra16.study_data = extra16->study_data;
1290 }
1291 dummy_extra16.mark = &mark16_2;
1292 return_value16[1] = pcre16_exec(re16, &dummy_extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
1293 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_2, 32);
1294 }
1295 #endif
1296
1297 #ifdef SUPPORT_PCRE32
1298 return_value32[0] = -1000;
1299 return_value32[1] = -1000;
1300 for (i = 0; i < 32; ++i)
1301 ovector32_1[i] = -2;
1302 for (i = 0; i < 32; ++i)
1303 ovector32_2[i] = -2;
1304 if (re32) {
1305 mark32_1 = NULL;
1306 mark32_2 = NULL;
1307 if ((current->flags & PCRE_UTF32) || (current->start_offset & F_FORCECONV))
1308 length32 = convert_utf8_to_utf32(current->input, regtest_buf32, regtest_offsetmap32, REGTEST_MAX_LENGTH32);
1309 else
1310 length32 = copy_char8_to_char32(current->input, regtest_buf32, REGTEST_MAX_LENGTH32);
1311 extra32->mark = &mark32_1;
1312 if ((counter & 0x1) != 0) {
1313 setstack32(extra32);
1314 return_value32[0] = pcre32_exec(re32, extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
1315 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector32_1, 32);
1316 } else
1317 return_value32[0] = pcre32_jit_exec(re32, extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
1318 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector32_1, 32, getstack32());
1319 memset(&dummy_extra32, 0, sizeof(pcre32_extra));
1320 dummy_extra32.flags = PCRE_EXTRA_MARK;
1321 if (current->start_offset & F_STUDY) {
1322 dummy_extra32.flags |= PCRE_EXTRA_STUDY_DATA;
1323 dummy_extra32.study_data = extra32->study_data;
1324 }
1325 dummy_extra32.mark = &mark32_2;
1326 return_value32[1] = pcre32_exec(re32, &dummy_extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
1327 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector32_2, 32);
1328 }
1329 #endif
1330
1331 /* printf("[%d-%d-%d|%d-%d|%d-%d|%d-%d]%s",
1332 return_value8[0], return_value16[0],
1333 ovector8_1[0], ovector8_1[1],
1334 ovector16_1[0], ovector16_1[1],
1335 ovector32_1[0], ovector32_1[1],
1336 (current->flags & PCRE_CASELESS) ? "C" : ""); */
1337
1338 /* If F_DIFF is set, just run the test, but do not compare the results.
1339 Segfaults can still be captured. */
1340
1341 is_successful = 1;
1342 if (!(current->start_offset & F_DIFF)) {
1343 #if defined SUPPORT_UTF && ((defined(SUPPORT_PCRE8) + defined(SUPPORT_PCRE16) + defined(SUPPORT_PCRE32)) >= 2)
1344 if (!(current->start_offset & F_FORCECONV)) {
1345 int return_value;
1346
1347 /* All results must be the same. */
1348 #ifdef SUPPORT_PCRE8
1349 if ((return_value = return_value8[0]) != return_value8[1]) {
1350 printf("\n8 bit: Return value differs(J8:%d,I8:%d): [%d] '%s' @ '%s'\n",
1351 return_value8[0], return_value8[1], total, current->pattern, current->input);
1352 is_successful = 0;
1353 } else
1354 #endif
1355 #ifdef SUPPORT_PCRE16
1356 if ((return_value = return_value16[0]) != return_value16[1]) {
1357 printf("\n16 bit: Return value differs(J16:%d,I16:%d): [%d] '%s' @ '%s'\n",
1358 return_value16[0], return_value16[1], total, current->pattern, current->input);
1359 is_successful = 0;
1360 } else
1361 #endif
1362 #ifdef SUPPORT_PCRE32
1363 if ((return_value = return_value32[0]) != return_value32[1]) {
1364 printf("\n32 bit: Return value differs(J32:%d,I32:%d): [%d] '%s' @ '%s'\n",
1365 return_value32[0], return_value32[1], total, current->pattern, current->input);
1366 is_successful = 0;
1367 } else
1368 #endif
1369 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1370 if (return_value8[0] != return_value16[0]) {
1371 printf("\n8 and 16 bit: Return value differs(J8:%d,J16:%d): [%d] '%s' @ '%s'\n",
1372 return_value8[0], return_value16[0],
1373 total, current->pattern, current->input);
1374 is_successful = 0;
1375 } else
1376 #endif
1377 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE32
1378 if (return_value8[0] != return_value32[0]) {
1379 printf("\n8 and 32 bit: Return value differs(J8:%d,J32:%d): [%d] '%s' @ '%s'\n",
1380 return_value8[0], return_value32[0],
1381 total, current->pattern, current->input);
1382 is_successful = 0;
1383 } else
1384 #endif
1385 #if defined SUPPORT_PCRE16 && defined SUPPORT_PCRE32
1386 if (return_value16[0] != return_value32[0]) {
1387 printf("\n16 and 32 bit: Return value differs(J16:%d,J32:%d): [%d] '%s' @ '%s'\n",
1388 return_value16[0], return_value32[0],
1389 total, current->pattern, current->input);
1390 is_successful = 0;
1391 } else
1392 #endif
1393 if (return_value >= 0 || return_value == PCRE_ERROR_PARTIAL) {
1394 if (return_value == PCRE_ERROR_PARTIAL) {
1395 return_value = 2;
1396 } else {
1397 return_value *= 2;
1398 }
1399 #ifdef SUPPORT_PCRE8
1400 return_value8[0] = return_value;
1401 #endif
1402 #ifdef SUPPORT_PCRE16
1403 return_value16[0] = return_value;
1404 #endif
1405 #ifdef SUPPORT_PCRE32
1406 return_value32[0] = return_value;
1407 #endif
1408 /* Transform back the results. */
1409 if (current->flags & PCRE_UTF8) {
1410 #ifdef SUPPORT_PCRE16
1411 for (i = 0; i < return_value; ++i) {
1412 if (ovector16_1[i] >= 0)
1413 ovector16_1[i] = regtest_offsetmap16[ovector16_1[i]];
1414 if (ovector16_2[i] >= 0)
1415 ovector16_2[i] = regtest_offsetmap16[ovector16_2[i]];
1416 }
1417 #endif
1418 #ifdef SUPPORT_PCRE32
1419 for (i = 0; i < return_value; ++i) {
1420 if (ovector32_1[i] >= 0)
1421 ovector32_1[i] = regtest_offsetmap32[ovector32_1[i]];
1422 if (ovector32_2[i] >= 0)
1423 ovector32_2[i] = regtest_offsetmap32[ovector32_2[i]];
1424 }
1425 #endif
1426 }
1427
1428 for (i = 0; i < return_value; ++i) {
1429 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1430 if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
1431 printf("\n8 and 16 bit: Ovector[%d] value differs(J8:%d,I8:%d,J16:%d,I16:%d): [%d] '%s' @ '%s' \n",
1432 i, ovector8_1[i], ovector8_2[i], ovector16_1[i], ovector16_2[i],
1433 total, current->pattern, current->input);
1434 is_successful = 0;
1435 }
1436 #endif
1437 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE32
1438 if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector32_1[i] || ovector8_1[i] != ovector32_2[i]) {
1439 printf("\n8 and 32 bit: Ovector[%d] value differs(J8:%d,I8:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
1440 i, ovector8_1[i], ovector8_2[i], ovector32_1[i], ovector32_2[i],
1441 total, current->pattern, current->input);
1442 is_successful = 0;
1443 }
1444 #endif
1445 #if defined SUPPORT_PCRE16 && defined SUPPORT_PCRE16
1446 if (ovector16_1[i] != ovector16_2[i] || ovector16_1[i] != ovector16_1[i] || ovector16_1[i] != ovector16_2[i]) {
1447 printf("\n16 and 16 bit: Ovector[%d] value differs(J16:%d,I16:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
1448 i, ovector16_1[i], ovector16_2[i], ovector16_1[i], ovector16_2[i],
1449 total, current->pattern, current->input);
1450 is_successful = 0;
1451 }
1452 #endif
1453 }
1454 }
1455 } else
1456 #endif /* more than one of SUPPORT_PCRE8, SUPPORT_PCRE16 and SUPPORT_PCRE32 */
1457 {
1458 /* Only the 8 bit and 16 bit results must be equal. */
1459 #ifdef SUPPORT_PCRE8
1460 if (return_value8[0] != return_value8[1]) {
1461 printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1462 return_value8[0], return_value8[1], total, current->pattern, current->input);
1463 is_successful = 0;
1464 } else if (return_value8[0] >= 0 || return_value8[0] == PCRE_ERROR_PARTIAL) {
1465 if (return_value8[0] == PCRE_ERROR_PARTIAL)
1466 return_value8[0] = 2;
1467 else
1468 return_value8[0] *= 2;
1469
1470 for (i = 0; i < return_value8[0]; ++i)
1471 if (ovector8_1[i] != ovector8_2[i]) {
1472 printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1473 i, ovector8_1[i], ovector8_2[i], total, current->pattern, current->input);
1474 is_successful = 0;
1475 }
1476 }
1477 #endif
1478
1479 #ifdef SUPPORT_PCRE16
1480 if (return_value16[0] != return_value16[1]) {
1481 printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1482 return_value16[0], return_value16[1], total, current->pattern, current->input);
1483 is_successful = 0;
1484 } else if (return_value16[0] >= 0 || return_value16[0] == PCRE_ERROR_PARTIAL) {
1485 if (return_value16[0] == PCRE_ERROR_PARTIAL)
1486 return_value16[0] = 2;
1487 else
1488 return_value16[0] *= 2;
1489
1490 for (i = 0; i < return_value16[0]; ++i)
1491 if (ovector16_1[i] != ovector16_2[i]) {
1492 printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1493 i, ovector16_1[i], ovector16_2[i], total, current->pattern, current->input);
1494 is_successful = 0;
1495 }
1496 }
1497 #endif
1498
1499 #ifdef SUPPORT_PCRE32
1500 if (return_value32[0] != return_value32[1]) {
1501 printf("\n32 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1502 return_value32[0], return_value32[1], total, current->pattern, current->input);
1503 is_successful = 0;
1504 } else if (return_value32[0] >= 0 || return_value32[0] == PCRE_ERROR_PARTIAL) {
1505 if (return_value32[0] == PCRE_ERROR_PARTIAL)
1506 return_value32[0] = 2;
1507 else
1508 return_value32[0] *= 2;
1509
1510 for (i = 0; i < return_value32[0]; ++i)
1511 if (ovector32_1[i] != ovector32_2[i]) {
1512 printf("\n32 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1513 i, ovector32_1[i], ovector32_2[i], total, current->pattern, current->input);
1514 is_successful = 0;
1515 }
1516 }
1517 #endif
1518 }
1519 }
1520
1521 if (is_successful) {
1522 #ifdef SUPPORT_PCRE8
1523 if (!(current->start_offset & F_NO8) && ((utf && ucp) || is_ascii_input)) {
1524 if (return_value8[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1525 printf("8 bit: Test should match: [%d] '%s' @ '%s'\n",
1526 total, current->pattern, current->input);
1527 is_successful = 0;
1528 }
1529
1530 if (return_value8[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1531 printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n",
1532 total, current->pattern, current->input);
1533 is_successful = 0;
1534 }
1535 }
1536 #endif
1537 #ifdef SUPPORT_PCRE16
1538 if (!(current->start_offset & F_NO16) && ((utf && ucp) || is_ascii_input)) {
1539 if (return_value16[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1540 printf("16 bit: Test should match: [%d] '%s' @ '%s'\n",
1541 total, current->pattern, current->input);
1542 is_successful = 0;
1543 }
1544
1545 if (return_value16[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1546 printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n",
1547 total, current->pattern, current->input);
1548 is_successful = 0;
1549 }
1550 }
1551 #endif
1552 #ifdef SUPPORT_PCRE32
1553 if (!(current->start_offset & F_NO32) && ((utf && ucp) || is_ascii_input)) {
1554 if (return_value32[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1555 printf("32 bit: Test should match: [%d] '%s' @ '%s'\n",
1556 total, current->pattern, current->input);
1557 is_successful = 0;
1558 }
1559
1560 if (return_value32[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1561 printf("32 bit: Test should not match: [%d] '%s' @ '%s'\n",
1562 total, current->pattern, current->input);
1563 is_successful = 0;
1564 }
1565 }
1566 #endif
1567 }
1568
1569 if (is_successful) {
1570 #ifdef SUPPORT_PCRE8
1571 if (mark8_1 != mark8_2) {
1572 printf("8 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1573 total, current->pattern, current->input);
1574 is_successful = 0;
1575 }
1576 #endif
1577 #ifdef SUPPORT_PCRE16
1578 if (mark16_1 != mark16_2) {
1579 printf("16 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1580 total, current->pattern, current->input);
1581 is_successful = 0;
1582 }
1583 #endif
1584 #ifdef SUPPORT_PCRE32
1585 if (mark32_1 != mark32_2) {
1586 printf("32 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1587 total, current->pattern, current->input);
1588 is_successful = 0;
1589 }
1590 #endif
1591 }
1592
1593 #ifdef SUPPORT_PCRE8
1594 if (re8) {
1595 pcre_free_study(extra8);
1596 pcre_free(re8);
1597 }
1598 #endif
1599 #ifdef SUPPORT_PCRE16
1600 if (re16) {
1601 pcre16_free_study(extra16);
1602 pcre16_free(re16);
1603 }
1604 #endif
1605 #ifdef SUPPORT_PCRE32
1606 if (re32) {
1607 pcre32_free_study(extra32);
1608 pcre32_free(re32);
1609 }
1610 #endif
1611
1612 if (is_successful) {
1613 successful++;
1614 successful_row++;
1615 printf(".");
1616 if (successful_row >= 60) {
1617 successful_row = 0;
1618 printf("\n");
1619 }
1620 } else
1621 successful_row = 0;
1622
1623 fflush(stdout);
1624 current++;
1625 }
1626 tables(1);
1627 #ifdef SUPPORT_PCRE8
1628 setstack8(NULL);
1629 #endif
1630 #ifdef SUPPORT_PCRE16
1631 setstack16(NULL);
1632 #endif
1633 #ifdef SUPPORT_PCRE32
1634 setstack32(NULL);
1635 #endif
1636
1637 if (total == successful) {
1638 printf("\nAll JIT regression tests are successfully passed.\n");
1639 return 0;
1640 } else {
1641 printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
1642 return 1;
1643 }
1644 }
1645
1646 /* End of pcre_jit_test.c */

  ViewVC Help
Powered by ViewVC 1.1.5