/[pcre]/code/branches/pcre16/pcre_jit_test.c
ViewVC logotype

Contents of /code/branches/pcre16/pcre_jit_test.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 804 - (show annotations)
Wed Dec 14 11:18:01 2011 UTC (9 years, 4 months ago) by zherczeg
File MIME type: text/plain
File size: 47211 byte(s)
PUBL macro added, single char optimization is fixed, MAX_255 checks are added, pcre_jit_test now copy the default tables to help valgrind
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Main Library written by Philip Hazel
9 Copyright (c) 1997-2011 University of Cambridge
10
11 This JIT compiler regression test program was written by Zoltan Herczeg
12 Copyright (c) 2010-2011
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include <stdio.h>
48 #include <string.h>
49 #include "pcre.h"
50
51 #define PCRE_BUG 0x80000000
52
53 /*
54 Letter characters:
55 \xe6\x92\xad = 0x64ad = 25773 (kanji)
56 Non-letter characters:
57 \xc2\xa1 = 0xa1 = (Inverted Exclamation Mark)
58 \xf3\xa9\xb7\x80 = 0xe9dc0 = 957888
59 \xed\xa0\x80 = 55296 = 0xd800 (Invalid UTF character)
60 \xed\xb0\x80 = 56320 = 0xdc00 (Invalid UTF character)
61 Newlines:
62 \xc2\x85 = 0x85 = 133 (NExt Line = NEL)
63 \xe2\x80\xa8 = 0x2028 = 8232 (Line Separator)
64 Othercase pairs:
65 \xc3\xa9 = 0xe9 = 233 (e')
66 \xc3\x89 = 0xc9 = 201 (E')
67 \xc3\xa1 = 0xe1 = 225 (a')
68 \xc3\x81 = 0xc1 = 193 (A')
69 \xc8\xba = 0x23a = 570
70 \xe2\xb1\xa5 = 0x2c65 = 11365
71 \xe1\xbd\xb8 = 0x1f78 = 8056
72 \xe1\xbf\xb8 = 0x1ff8 = 8184
73 \xf0\x90\x90\x80 = 0x10400 = 66560
74 \xf0\x90\x90\xa8 = 0x10428 = 66600
75 Mark property:
76 \xcc\x8d = 0x30d = 781
77 Special:
78 \xdf\xbf = 0x7ff = 2047 (highest 2 byte character)
79 \xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character)
80 \xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character)
81 \xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character)
82 \xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character)
83 */
84
85 static int regression_tests(void);
86
87 int main(void)
88 {
89 int jit = 0;
90 pcre_config(PCRE_CONFIG_JIT, &jit);
91 if (!jit) {
92 printf("JIT must be enabled to run pcre_jit_test\n");
93 return 1;
94 }
95 return regression_tests();
96 }
97
98 /* --------------------------------------------------------------------------------------- */
99
100 #if !(defined SUPPORT_PCRE8) && !(defined SUPPORT_PCRE16)
101 #error SUPPORT_PCRE8 or SUPPORT_PCRE16 must be defined
102 #endif
103
104 #define MUA (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
105 #define MUAP (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
106 #define CMUA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
107 #define CMUAP (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
108 #define MA (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
109 #define MAP (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
110 #define CMA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
111
112 #define OFFSET_MASK 0x00ffff
113 #define F_NO8 0x010000
114 #define F_NO16 0x020000
115 #define F_NOMATCH 0x040000
116 #define F_DIFF 0x080000
117 #define F_FORCECONV 0x100000
118
119 struct regression_test_case {
120 int flags;
121 int start_offset;
122 const char *pattern;
123 const char *input;
124 };
125
126 static struct regression_test_case regression_test_cases[] = {
127 /* Constant strings. */
128 { MUA, 0, "AbC", "AbAbC" },
129 { MUA, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
130 { CMUA, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
131 { MA, 0, "[^a]", "aAbB" },
132 { CMA, 0, "[^m]", "mMnN" },
133 { MA, 0, "a[^b][^#]", "abacd" },
134 { CMA, 0, "A[^B][^E]", "abacd" },
135 { CMUA, 0, "[^x][^#]", "XxBll" },
136 { MUA, 0, "[^a]", "aaa\xc3\xa1#Ab" },
137 { CMUA, 0, "[^A]", "aA\xe6\x92\xad" },
138 { MUA, 0, "\\W(\\W)?\\w", "\r\n+bc" },
139 { MUA, 0, "\\W(\\W)?\\w", "\n\r+bc" },
140 { MUA, 0, "\\W(\\W)?\\w", "\r\r+bc" },
141 { MUA, 0, "\\W(\\W)?\\w", "\n\n+bc" },
142 { MUA, 0, "[axd]", "sAXd" },
143 { CMUA, 0, "[axd]", "sAXd" },
144 { CMUA, 0 | F_NOMATCH, "[^axd]", "DxA" },
145 { MUA, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
146 { MUA, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
147 { CMUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
148 { MUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
149 { MUA, 0, "[^a]", "\xc2\x80[]" },
150 { CMUA, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
151 { CMA, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
152 { PCRE_CASELESS, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
153 { PCRE_CASELESS, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
154 { PCRE_CASELESS, 0, "a1", "Aa1" },
155 { MA, 0, "\\Ca", "cda" },
156 { CMA, 0, "\\Ca", "CDA" },
157 { MA, 0 | F_NOMATCH, "\\Cx", "cda" },
158 { CMA, 0 | F_NOMATCH, "\\Cx", "CDA" },
159 { CMUAP, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
160 { CMUAP, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
161 { CMUAP, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
162 { CMUAP, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
163
164 /* Assertions. */
165 { MUA, 0, "\\b[^A]", "A_B#" },
166 { MA, 0 | F_NOMATCH, "\\b\\W", "\n*" },
167 { MUA, 0, "\\B[^,]\\b[^s]\\b", "#X" },
168 { MAP, 0, "\\B", "_\xa1" },
169 { MAP, 0, "\\b_\\b[,A]\\B", "_," },
170 { MUAP, 0, "\\b", "\xe6\x92\xad!" },
171 { MUAP, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
172 { MUAP, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
173 { MUAP, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
174 { MUA, 0 | F_NOMATCH, "\\b.", "\xcd\xbe" },
175 { CMUAP, 0, "\\By", "\xf0\x90\x90\xa8y" },
176 { MA, 0 | F_NOMATCH, "\\R^", "\n" },
177 { MA, 1 | F_NOMATCH, "^", "\n" },
178 { 0, 0, "^ab", "ab" },
179 { 0, 0 | F_NOMATCH, "^ab", "aab" },
180 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "^a", "\r\raa\n\naa\r\naa" },
181 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
182 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^-", "a--b--\x85--" },
183 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xe2\x80\xa8--" },
184 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xc2\x85--" },
185 { 0, 0, "ab$", "ab" },
186 { 0, 0 | F_NOMATCH, "ab$", "ab\r\n" },
187 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "a$", "\r\raa\n\naa\r\naa" },
188 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aaa" },
189 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "#$", "#\xc2\x85###\r#" },
190 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "#$", "#\xe2\x80\xa9" },
191 { PCRE_NOTBOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "^a", "aa\naa" },
192 { PCRE_NOTBOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },
193 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\naa" },
194 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\r\n" },
195 { PCRE_UTF8 | PCRE_DOLLAR_ENDONLY | PCRE_NEWLINE_ANY, 0, "\\p{Any}{2,}$", "aa\r\n" },
196 { PCRE_NOTEOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },
197 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa" },
198 { PCRE_NEWLINE_CR | PCRE_UTF8, 0, "a\\Z", "aaa\r" },
199 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa\n" },
200 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r" },
201 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
202 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r\n" },
203 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
204 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
205 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
206 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
207 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
208 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
209 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
210 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
211 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
212 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xc2\x85" },
213 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
214 { MA, 0, "\\Aa", "aaa" },
215 { MA, 1 | F_NOMATCH, "\\Aa", "aaa" },
216 { MA, 1, "\\Ga", "aaa" },
217 { MA, 1 | F_NOMATCH, "\\Ga", "aba" },
218 { MA, 0, "a\\z", "aaa" },
219 { MA, 0 | F_NOMATCH, "a\\z", "aab" },
220
221 /* Brackets. */
222 { MUA, 0, "(ab|bb|cd)", "bacde" },
223 { MUA, 0, "(?:ab|a)(bc|c)", "ababc" },
224 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
225 { CMUA, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
226 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
227 { MUA, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
228
229 /* Greedy and non-greedy ? operators. */
230 { MUA, 0, "(?:a)?a", "laab" },
231 { CMUA, 0, "(A)?A", "llaab" },
232 { MUA, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
233 { MUA, 0, "(a)?a", "manm" },
234 { CMUA, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
235 { MUA, 0, "(a|b)?\?d((?:e)?)", "abcde" },
236 { MUA, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
237
238 /* Greedy and non-greedy + operators */
239 { MUA, 0, "(aa)+aa", "aaaaaaa" },
240 { MUA, 0, "(aa)+?aa", "aaaaaaa" },
241 { MUA, 0, "(?:aba|ab|a)+l", "ababamababal" },
242 { MUA, 0, "(?:aba|ab|a)+?l", "ababamababal" },
243 { MUA, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
244 { MUA, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
245 { MUA, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
246
247 /* Greedy and non-greedy * operators */
248 { CMUA, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
249 { MUA, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
250 { MUA, 0, "(aa|ab)*ab", "aaabaaab" },
251 { CMUA, 0, "(aa|Ab)*?aB", "aaabaaab" },
252 { MUA, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
253 { MUA, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
254 { MA, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
255 { MA, 0, "((?:a|)*){0}a", "a" },
256
257 /* Combining ? + * operators */
258 { MUA, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
259 { MUA, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
260 { MUA, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
261 { MUA, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
262 { MUA, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
263
264 /* Single character iterators. */
265 { MUA, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
266 { MUA, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
267 { MUA, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
268 { MUA, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
269 { MUA, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
270 { MUA, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
271 { MUA, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
272 { MUA, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
273 { MUA, 0, "(ba{2})+c", "baabaaabacbaabaac" },
274 { MUA, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
275 { MUA, 0, "(a?+[^b])+", "babaacacb" },
276 { MUA, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
277 { CMUA, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
278 { CMUA, 0, "[c-f]+k", "DemmFke" },
279 { MUA, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
280 { MUA, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
281 { CMUA, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
282 { CMUA, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
283 { CMUA, 0, "[ace]{3,}", "AcbDAcEEcEd" },
284 { CMUA, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
285 { MUA, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
286 { CMUA, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
287 { MUA, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
288 { MUA, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
289 { MUA, 0, "\\b\\w+\\B", "x,a_cd" },
290 { MUAP, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
291 { CMUA, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
292 { CMUAP, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" },
293 { CMUA, 0, "[^\xf0\x90\x90\x80]{2,4}@", "\xf0\x90\x90\xa8\xf0\x90\x90\x80###\xf0\x90\x90\x80@@@" },
294 { CMUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
295 { MUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
296 { MUA, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" },
297
298 /* Basic character sets. */
299 { MUA, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
300 { MUA, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
301 { MUA, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
302 { MUA, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
303 { MUA, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
304 { MUA, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
305
306 /* Unicode properties. */
307 { MUAP, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
308 { MUAP, 0, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
309 { MUAP, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
310 { MUAP, 0 | F_NOMATCH, "[\\P{Any}]", "abc" },
311 { MUAP, 0 | F_NOMATCH, "[^\\p{Any}]", "abc" },
312 { MUAP, 0 | F_NOMATCH, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
313 { MUAP, 0 | F_NOMATCH, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
314 { MUAP, 0 | F_NOMATCH, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
315 { MUAP, 0 | F_NOMATCH, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
316 { MUAP, 0, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
317 { MUAP, 0, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
318 { MUAP, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
319 { CMUAP, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
320 { MUAP, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
321 { MUAP, 0, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
322 { MUA, 0, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
323 { CMUAP, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
324 { MUAP, 0, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
325 { MUAP, 0, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
326 { PCRE_UCP, 0, "[a-b\\s]{2,5}[^a]", "AB baaa" },
327
328 /* Possible empty brackets. */
329 { MUA, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
330 { MUA, 0, "(|ab||bc|a)+d", "abcxabcabd" },
331 { MUA, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
332 { MUA, 0, "(|ab||bc|a)*d", "abcxabcabd" },
333 { MUA, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
334 { MUA, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
335 { MUA, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
336 { MUA, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
337 { MUA, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
338 { MUA, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
339
340 /* Start offset. */
341 { MUA, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
342 { MUA, 4 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
343 { MUA, 2 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
344 { MUA, 1, "(\\w\\W\\w)+", "ab#d" },
345
346 /* Newline. */
347 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
348 { PCRE_MULTILINE | PCRE_NEWLINE_CR, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
349 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{1,3}[^#]", "\r\n##...." },
350
351 /* Any character except newline or any newline. */
352 { PCRE_NEWLINE_CRLF, 0, ".", "\r" },
353 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
354 { PCRE_NEWLINE_ANYCRLF, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
355 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
356 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
357 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0 | F_NOMATCH, ".(.).", "\xe2\x80\xa8\nb\r" },
358 { PCRE_NEWLINE_ANY, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
359 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
360 { PCRE_BSR_ANYCRLF, 0, "\\R", "\r" },
361 { PCRE_BSR_ANYCRLF, 0, "\\R", "\x85#\r\n#" },
362 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\xe2\x80\xa8#c" },
363 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\r\nc" },
364 { PCRE_NEWLINE_CRLF | PCRE_BSR_UNICODE | PCRE_UTF8, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
365 { MUA, 0 | F_NOMATCH, "\\R+", "ab" },
366 { MUA, 0, "\\R+", "ab\r\n\r" },
367 { MUA, 0, "\\R*", "ab\r\n\r" },
368 { MUA, 0, "\\R*", "\r\n\r" },
369 { MUA, 0, "\\R{2,4}", "\r\nab\r\r" },
370 { MUA, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
371 { MUA, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
372 { MUA, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
373 { MUA, 0 | F_NOMATCH, "\\R+\\R\\R", "\r\n\r\n" },
374 { MUA, 0, "\\R+\\R\\R", "\r\r\r" },
375 { MUA, 0, "\\R*\\R\\R", "\n\r" },
376 { MUA, 0 | F_NOMATCH, "\\R{2,4}\\R\\R", "\r\r\r" },
377 { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
378
379 /* Atomic groups (no fallback from "next" direction). */
380 { MUA, 0 | F_NOMATCH, "(?>ab)ab", "bab" },
381 { MUA, 0 | F_NOMATCH, "(?>(ab))ab", "bab" },
382 { MUA, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
383 "bababcdedefgheijijklmlmnop" },
384 { MUA, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
385 { MUA, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
386 { MUA, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
387 { MUA, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
388 { MUA, 0, "((?>a|)+?)b", "aaacaaab" },
389 { MUA, 0, "(?>x|)*$", "aaa" },
390 { MUA, 0, "(?>(x)|)*$", "aaa" },
391 { MUA, 0, "(?>x|())*$", "aaa" },
392 { MUA, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
393 { MUA, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
394 { MUA, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
395 { MUA, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
396 { MUA, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
397 { MUA, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
398 { MUA, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
399 { MUA, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
400 { MUA, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
401 { MUA, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
402 { MUA, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
403 { MUA, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
404 { MUA, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
405 { MUA, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
406 { CMA, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
407 { MUA, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
408 { MUA, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
409 { MUA, 0 | F_NOMATCH, "\\X", "\xcc\x8d\xcc\x8d" },
410 { MUA, 0, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
411 { MUA, 0, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
412 { MUA, 0, "\\X{2,4}", "abcdef" },
413 { MUA, 0, "\\X{2,4}?", "abcdef" },
414 { MUA, 0 | F_NOMATCH, "\\X{2,4}..", "#\xcc\x8d##" },
415 { MUA, 0, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
416 { MUA, 0, "(c(ab)?+ab)+", "cabcababcab" },
417 { MUA, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
418
419 /* Possessive quantifiers. */
420 { MUA, 0, "(?:a|b)++m", "mababbaaxababbaam" },
421 { MUA, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
422 { MUA, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
423 { MUA, 0, "(a|b)++m", "mababbaaxababbaam" },
424 { MUA, 0, "(a|b)*+m", "mababbaaxababbaam" },
425 { MUA, 0, "(a|b)*+m", "ababbaaxababbaam" },
426 { MUA, 0, "(a|b(*ACCEPT))++m", "maaxab" },
427 { MUA, 0, "(?:b*)++m", "bxbbxbbbxm" },
428 { MUA, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
429 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxm" },
430 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
431 { MUA, 0, "(b*)++m", "bxbbxbbbxm" },
432 { MUA, 0, "(b*)++m", "bxbbxbbbxbbm" },
433 { MUA, 0, "(b*)*+m", "bxbbxbbbxm" },
434 { MUA, 0, "(b*)*+m", "bxbbxbbbxbbm" },
435 { MUA, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
436 { MUA, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
437 { MUA, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
438 { MUA, 0, "(a|(b))++m", "mababbaaxababbaam" },
439 { MUA, 0, "((a)|b)*+m", "mababbaaxababbaam" },
440 { MUA, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
441 { MUA, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
442 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxm" },
443 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
444 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
445 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
446 { MUA, 0, "((b*))++m", "bxbbxbbbxm" },
447 { MUA, 0, "((b*))++m", "bxbbxbbbxbbm" },
448 { MUA, 0, "((b*))*+m", "bxbbxbbbxm" },
449 { MUA, 0, "((b*))*+m", "bxbbxbbbxbbm" },
450 { MUA, 0 | F_NOMATCH, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
451 { MUA, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
452 { MUA, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
453 { MUA, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
454 { MUA, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
455
456 /* Back references. */
457 { MUA, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
458 { CMUA, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
459 { CMA, 0, "(a{2,4})\\1", "AaAaaAaA" },
460 { MUA, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
461 { MUA, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
462 { MUA, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
463 { MUA, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
464 { MUA, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
465 { MUA, 0, "(?:(aa)|b)\\1?b", "bb" },
466 { CMUA, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
467 { MUA, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
468 { CMUA, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
469 { MUA, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
470 { CMA, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
471 { MUA, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
472 { MUA, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
473 { MA, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
474 { MUAP, 0, "(\\P{N})\\1{2,}", ".www." },
475 { MUAP, 0, "(\\P{N})\\1{0,2}", "wwwww." },
476 { MUAP, 0, "(\\P{N})\\1{1,2}ww", "wwww" },
477 { MUAP, 0, "(\\P{N})\\1{1,2}ww", "wwwww" },
478 { PCRE_UCP, 0, "(\\P{N})\\1{2,}", ".www." },
479 { CMUAP, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" },
480
481 /* Assertions. */
482 { MUA, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
483 { MUA, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
484 { MUA, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
485 { MUA, 0, "(?<=aaa|aa|a)a", "aaa" },
486 { MUA, 2, "(?<=aaa|aa|a)a", "aaa" },
487 { MA, 0, "(?<=aaa|aa|a)a", "aaa" },
488 { MA, 2, "(?<=aaa|aa|a)a", "aaa" },
489 { MUA, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
490 { MUA, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
491 { MUA, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
492 { MUA, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
493 { MUA, 0, "((?(?=(a))a)+k)", "bbak" },
494 { MUA, 0, "((?(?=a)a)+k)", "bbak" },
495 { MUA, 0 | F_NOMATCH, "(?=(?>(a))m)amk", "a k" },
496 { MUA, 0 | F_NOMATCH, "(?!(?>(a))m)amk", "a k" },
497 { MUA, 0 | F_NOMATCH, "(?>(?=(a))am)amk", "a k" },
498 { MUA, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
499 { MUA, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
500 { MUA, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
501 { MUA, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
502 { MUA, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
503 { MUA, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
504 { MUA, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
505 { MUA, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
506
507 /* Not empty, ACCEPT, FAIL */
508 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*", "bcx" },
509 { MUA | PCRE_NOTEMPTY, 0, "a*", "bcaad" },
510 { MUA | PCRE_NOTEMPTY, 0, "a*?", "bcaad" },
511 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
512 { MUA, 0, "a(*ACCEPT)b", "ab" },
513 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*(*ACCEPT)b", "bcx" },
514 { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
515 { MUA | PCRE_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
516 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "(?:z|a*(*ACCEPT)b)", "bcx" },
517 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
518 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
519 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
520 { MUA | PCRE_NOTEMPTY_ATSTART, 0 | F_NOMATCH, "a*(*ACCEPT)b", "" },
521 { MUA, 0, "((a(*ACCEPT)b))", "ab" },
522 { MUA, 0, "(a(*FAIL)a|a)", "aaa" },
523 { MUA, 0, "(?=ab(*ACCEPT)b)a", "ab" },
524 { MUA, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
525 { MUA, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
526 { MUA | PCRE_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
527
528 /* Conditional blocks. */
529 { MUA, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
530 { MUA, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
531 { MUA, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
532 { MUA, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
533 { MUA, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
534 { MUA, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
535 { MUA, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
536 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
537 { MUA, 0 | F_DIFF, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
538 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
539 { MUA, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
540 { MUA, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
541 { MUA, 0, "(?(?=a)ab)", "a" },
542 { MUA, 0, "(?(?<!b)c)", "b" },
543 { MUA, 0, "(?(DEFINE)a(b))", "a" },
544 { MUA, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
545 { MUA, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
546 { MUA, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
547 { MUA, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
548 { MUA, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
549 { MUA, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
550 { MUA, 0, "(c)?\?(?(1)a|b)", "cbb" },
551 { MUA, 0 | F_DIFF, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
552 { MUA, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
553 { MUA, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
554 { MUA, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
555 { MUA, 0 | F_DIFF, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
556 { MUA, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
557 { MUA, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
558 { MUA, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
559 { MUA, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
560 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)*l", "bc ddd abccabccl" },
561 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+?dd", "bcabcacdb bdddd" },
562 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+l", "ababccddabdbccd abcccl" },
563
564 /* Set start of match. */
565 { MUA, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
566 { MUA, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
567 { MUA, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
568 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a\\K(*ACCEPT)b", "aa" },
569 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
570
571 /* First line. */
572 { MUA | PCRE_FIRSTLINE, 0, "\\p{Any}a", "bb\naaa" },
573 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "\\p{Any}a", "bb\r\naaa" },
574 { MUA | PCRE_FIRSTLINE, 0, "(?<=a)", "a" },
575 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[^a][^b]", "ab" },
576 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "a", "\na" },
577 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[abc]", "\na" },
578 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^a", "\na" },
579 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^(?<=\n)", "\na" },
580 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\xc2\x85#" },
581 { PCRE_MULTILINE | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\x85#" },
582 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^#", "\xe2\x80\xa8#" },
583 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "\\p{Any}", "\r\na" },
584 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, ".", "\r" },
585 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "a", "\ra" },
586 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH, "ba", "bbb\r\nba" },
587 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH, "\\p{Any}{4}|a", "\r\na" },
588 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 1, ".", "\r\n" },
589
590 /* Recurse. */
591 { MUA, 0, "(a)(?1)", "aa" },
592 { MUA, 0, "((a))(?1)", "aa" },
593 { MUA, 0, "(b|a)(?1)", "aa" },
594 { MUA, 0, "(b|(a))(?1)", "aa" },
595 { MUA, 0 | F_NOMATCH, "((a)(b)(?:a*))(?1)", "aba" },
596 { MUA, 0, "((a)(b)(?:a*))(?1)", "abab" },
597 { MUA, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
598 { MUA, 0, "((?2)b|(a)){2}(?1)", "aabab" },
599 { MUA, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
600 { MUA, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
601 { MUA, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
602 { MUA, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
603 { MUA, 0 | F_NOMATCH, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
604 { MUA, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
605 { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
606 { MUA, 0 | F_NOMATCH, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
607 { MUA, 0, "b|<(?R)*>", "<<b>" },
608 { MUA, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
609 { MUA, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
610 { MUA, 0, "(a)((?(R)a|b))(?2)", "aabbabaa" },
611 { MUA, 0, "(a)((?(R2)a|b))(?2)", "aabbabaa" },
612 { MUA, 0, "(a)((?(R1)a|b))(?2)", "ababba" },
613 { MUA, 0, "(?(R0)aa|bb(?R))", "abba aabb bbaa" },
614 { MUA, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" },
615 { MUA, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" },
616
617 /* 16 bit specific tests. */
618 { CMA, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" },
619 { CMA, 0 | F_FORCECONV, "\xe1\xbd\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
620 { CMA, 0 | F_FORCECONV, "[\xc3\xa1]", "\xc3\x81\xc3\xa1" },
621 { CMA, 0 | F_FORCECONV, "[\xe1\xbd\xb8]", "\xe1\xbf\xb8\xe1\xbd\xb8" },
622 { CMA, 0 | F_FORCECONV, "[a-\xed\xb0\x80]", "A" },
623 { CMA, 0 | F_NO8 | F_FORCECONV, "[a-\\x{dc00}]", "B" },
624 { CMA, 0 | F_NO8 | F_NOMATCH | F_FORCECONV, "[b-\\x{dc00}]", "a" },
625 { CMA, 0 | F_NO8 | F_FORCECONV, "\xed\xa0\x80\\x{d800}\xed\xb0\x80\\x{dc00}", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80" },
626 { CMA, 0 | F_NO8 | F_FORCECONV, "[\xed\xa0\x80\\x{d800}]{1,2}?[\xed\xb0\x80\\x{dc00}]{1,2}?#", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80#" },
627 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80\xed\xb0\x80#]{0,3}(?<=\xed\xb0\x80.)", "\xed\xa0\x80#\xed\xa0\x80##\xed\xb0\x80\xed\xa0\x80" },
628 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\x9f\xbf\xed\xa0\x83" },
629 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\xb4\x80\xed\xb3\xb0" },
630 { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" },
631 { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" },
632 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" },
633 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80][\xed\xb0\x80]{2,}", "\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" },
634 { MA, 0 | F_FORCECONV, "[^\xed\xb0\x80]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
635 { MA, 0 | F_NO8 | F_FORCECONV, "[^\\x{dc00}]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
636 { CMA, 0 | F_FORCECONV, ".\\B.", "\xed\xa0\x80\xed\xb0\x80" },
637 { CMA, 0 | F_FORCECONV, "\\D+(?:\\d+|.)\\S+(?:\\s+|.)\\W+(?:\\w+|.)\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80" },
638 { CMA, 0 | F_FORCECONV, "\\d*\\s*\\w*\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80" },
639 { CMA, 0 | F_FORCECONV | F_NOMATCH, "\\d*?\\D*?\\s*?\\S*?\\w*?\\W*?##", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80#" },
640 { CMA | PCRE_EXTENDED, 0 | F_FORCECONV, "\xed\xa0\x80 \xed\xb0\x80 !", "\xed\xa0\x80\xed\xb0\x80!" },
641
642 /* Deep recursion. */
643 { MUA, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
644 { MUA, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
645 { MUA, 0, "((a?)+)+b", "aaaaaaaaaaaaa b" },
646
647 /* Deep recursion: Stack limit reached. */
648 { MA, 0 | F_NOMATCH, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
649 { MA, 0 | F_NOMATCH, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
650 { MA, 0 | F_NOMATCH, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
651 { MA, 0 | F_NOMATCH, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
652 { MA, 0 | F_NOMATCH, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
653
654 { 0, 0, NULL, NULL }
655 };
656
657 static const unsigned char *tables(int release)
658 {
659 /* The purpose of this function to allow valgrind
660 for reporting invalid reads and writes. */
661 static unsigned char *tables_copy;
662 pcre *regex;
663 const char *errorptr;
664 int erroroffset;
665 const unsigned char *default_tables;
666 #ifdef SUPPORT_PCRE8
667 char null_str[1] = { 0 };
668 #else
669 PCRE_SCHAR16 null_str[1] = { 0 };
670 #endif
671
672 if (release) {
673 if (tables_copy)
674 free(tables_copy);
675 tables_copy = NULL;
676 return NULL;
677 }
678
679 if (tables_copy)
680 return tables_copy;
681
682 default_tables = NULL;
683 #ifdef SUPPORT_PCRE8
684 regex = pcre_compile(null_str, 0, &errorptr, &erroroffset, NULL);
685 if (regex) {
686 pcre_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
687 pcre_free(regex);
688 }
689 #else
690 regex = pcre16_compile(null_str, 0, &errorptr, &erroroffset, NULL);
691 if (regex) {
692 pcre16_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
693 pcre16_free(regex);
694 }
695 #endif
696 /* Shouldn't ever happen. */
697 if (!default_tables)
698 return NULL;
699
700 /* This value cannot get from pcre_fullinfo. Since this is a test program,
701 we can live with it at the moment. */
702 tables_copy = (unsigned char *)malloc(1088);
703 if (!tables_copy)
704 return NULL;
705
706 memcpy(tables_copy, default_tables, 1088);
707 return tables_copy;
708 }
709
710 static pcre_jit_stack* callback(void *arg)
711 {
712 return (pcre_jit_stack *)arg;
713 }
714
715 static void setstack(pcre_extra *extra, int alloc_again)
716 {
717 static pcre_jit_stack *stack;
718
719 if (alloc_again) {
720 if (stack)
721 pcre_jit_stack_free(stack);
722 stack = pcre_jit_stack_alloc(1, 1024 * 1024);
723 }
724 /* Extra can be NULL. */
725 pcre_assign_jit_stack(extra, callback, stack);
726 }
727
728 #ifdef SUPPORT_PCRE16
729
730 static int convert_utf8_to_utf16(const char *input, PCRE_SCHAR16 *output, int *offsetmap, int max_length)
731 {
732 unsigned char *iptr = (unsigned char*)input;
733 unsigned short *optr = (unsigned short *)output;
734 unsigned int c;
735
736 if (max_length == 0)
737 return 0;
738
739 while (*iptr && max_length > 1) {
740 c = 0;
741 if (offsetmap)
742 *offsetmap++ = (int)(iptr - (unsigned char*)input);
743
744 if (!(*iptr & 0x80))
745 c = *iptr++;
746 else if (!(*iptr & 0x20)) {
747 c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
748 iptr += 2;
749 } else if (!(*iptr & 0x10)) {
750 c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
751 iptr += 3;
752 } else if (!(*iptr & 0x08)) {
753 c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
754 iptr += 4;
755 }
756
757 if (c < 65536) {
758 *optr++ = c;
759 max_length--;
760 } else if (max_length <= 2) {
761 *optr = '\0';
762 return (int)(optr - (unsigned short *)output);
763 } else {
764 c -= 0x10000;
765 *optr++ = 0xd800 | ((c >> 10) & 0x3ff);
766 *optr++ = 0xdc00 | (c & 0x3ff);
767 max_length -= 2;
768 if (offsetmap)
769 offsetmap++;
770 }
771 }
772 if (offsetmap)
773 *offsetmap = (int)(iptr - (unsigned char*)input);
774 *optr = '\0';
775 return (int)(optr - (unsigned short *)output);
776 }
777
778 static int copy_char8_to_char16(const char *input, PCRE_SCHAR16 *output, int max_length)
779 {
780 unsigned char *iptr = (unsigned char*)input;
781 unsigned short *optr = (unsigned short *)output;
782
783 if (max_length == 0)
784 return 0;
785
786 while (*iptr && max_length > 1) {
787 *optr++ = *iptr++;
788 max_length--;
789 }
790 *optr = '\0';
791 return (int)(optr - (unsigned short *)output);
792 }
793
794 #define REGTEST_MAX_LENGTH 4096
795 static PCRE_SCHAR16 regtest_buf[REGTEST_MAX_LENGTH];
796 static int regtest_offsetmap[REGTEST_MAX_LENGTH];
797
798 #endif /* SUPPORT_PCRE16 */
799
800 static int regression_tests(void)
801 {
802 struct regression_test_case *current = regression_test_cases;
803 const char *error;
804 int i, err_offs, is_successful;
805 int total = 0;
806 int successful = 0;
807 int counter = 0;
808 #ifdef SUPPORT_PCRE8
809 pcre *re8;
810 pcre_extra *extra8;
811 int ovector8_1[32];
812 int ovector8_2[32];
813 int return_value8_1, return_value8_2;
814 int utf8 = 0, ucp8 = 0;
815 int disabled_flags8 = 0;
816 #endif
817 #ifdef SUPPORT_PCRE16
818 pcre *re16;
819 pcre_extra *extra16;
820 int ovector16_1[32];
821 int ovector16_2[32];
822 int return_value16_1, return_value16_2;
823 int utf16 = 0, ucp16 = 0;
824 int disabled_flags16 = 0;
825 int length16;
826 #endif
827
828 /* This test compares the behaviour of interpreter and JIT. Although disabling
829 utf or ucp may make tests fail, if the pcre_exec result is the SAME, it is
830 still considered successful from pcre_jit_test point of view. */
831
832 printf("Running JIT regression\n");
833
834 #ifdef SUPPORT_PCRE8
835 pcre_config(PCRE_CONFIG_UTF8, &utf8);
836 pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp8);
837 if (!utf8)
838 disabled_flags8 |= PCRE_UTF8;
839 if (!ucp8)
840 disabled_flags8 |= PCRE_UCP;
841 printf(" in 8 bit mode with utf8 %s and ucp %s:\n", utf8 ? "enabled" : "disabled", ucp8 ? "enabled" : "disabled");
842 #endif
843 #ifdef SUPPORT_PCRE16
844 pcre16_config(PCRE_CONFIG_UTF16, &utf16);
845 pcre16_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp16);
846 if (!utf16)
847 disabled_flags16 |= PCRE_UTF8;
848 if (!ucp16)
849 disabled_flags16 |= PCRE_UCP;
850 printf(" in 16 bit mode with utf16 %s and ucp %s:\n", utf16 ? "enabled" : "disabled", ucp16 ? "enabled" : "disabled");
851 #endif
852
853 while (current->pattern) {
854 /* printf("\nPattern: %s :\n", current->pattern); */
855 total++;
856
857 error = NULL;
858 #ifdef SUPPORT_PCRE8
859 re8 = NULL;
860 if (!(current->start_offset & F_NO8))
861 re8 = pcre_compile(current->pattern,
862 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags8),
863 &error, &err_offs, tables(0));
864
865 extra8 = NULL;
866 if (re8) {
867 error = NULL;
868 extra8 = pcre_study(re8, PCRE_STUDY_JIT_COMPILE, &error);
869 if (!extra8) {
870 printf("\n8 bit: Cannot study pattern: %s\n", current->pattern);
871 pcre_free(re8);
872 re8 = NULL;
873 }
874 if (!(extra8->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
875 printf("\n8 bit: JIT compiler does not support: %s\n", current->pattern);
876 pcre_free_study(extra8);
877 pcre_free(re8);
878 re8 = NULL;
879 }
880 } else if (utf8 && ucp8 && !(current->start_offset & F_NO8))
881 printf("\n8 bit: Cannot compile pattern: %s\n", current->pattern);
882 #endif
883 #ifdef SUPPORT_PCRE16
884 if ((current->flags & PCRE_UTF8) || (current->start_offset & F_FORCECONV))
885 convert_utf8_to_utf16(current->pattern, regtest_buf, NULL, REGTEST_MAX_LENGTH);
886 else
887 copy_char8_to_char16(current->pattern, regtest_buf, REGTEST_MAX_LENGTH);
888
889 re16 = NULL;
890 if (!(current->start_offset & F_NO16))
891 re16 = pcre16_compile(regtest_buf,
892 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags16),
893 &error, &err_offs, tables(0));
894
895 extra16 = NULL;
896 if (re16) {
897 error = NULL;
898 extra16 = pcre16_study(re16, PCRE_STUDY_JIT_COMPILE, &error);
899 if (!extra16) {
900 printf("\n16 bit: Cannot study pattern: %s\n", current->pattern);
901 pcre16_free(re16);
902 re16 = NULL;
903 }
904 if (!(extra16->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
905 printf("\n16 bit: JIT compiler does not support: %s\n", current->pattern);
906 pcre16_free_study(extra16);
907 pcre16_free(re16);
908 re16 = NULL;
909 }
910 } else if (utf16 && ucp16 && !(current->start_offset & F_NO16))
911 printf("\n16 bit: Cannot compile pattern: %s\n", current->pattern);
912 #endif
913
914 counter++;
915 if ((counter & 0x3) != 0)
916 setstack(NULL, 1);
917
918 #ifdef SUPPORT_PCRE8
919 return_value8_1 = -1000;
920 return_value8_2 = -1000;
921 for (i = 0; i < 32; ++i)
922 ovector8_1[i] = -2;
923 for (i = 0; i < 32; ++i)
924 ovector8_2[i] = -2;
925 if (re8) {
926 setstack(extra8, 0);
927 return_value8_1 = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
928 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector8_1, 32);
929 return_value8_2 = pcre_exec(re8, NULL, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
930 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector8_2, 32);
931 }
932 #endif
933
934 #ifdef SUPPORT_PCRE16
935 return_value16_1 = -1000;
936 return_value16_2 = -1000;
937 for (i = 0; i < 32; ++i)
938 ovector16_1[i] = -2;
939 for (i = 0; i < 32; ++i)
940 ovector16_2[i] = -2;
941 if (re16) {
942 setstack(extra16, 0);
943 if ((current->flags & PCRE_UTF8) || (current->start_offset & F_FORCECONV))
944 length16 = convert_utf8_to_utf16(current->input, regtest_buf, regtest_offsetmap, REGTEST_MAX_LENGTH);
945 else
946 length16 = copy_char8_to_char16(current->input, regtest_buf, REGTEST_MAX_LENGTH);
947 return_value16_1 = pcre16_exec(re16, extra16, regtest_buf, length16, current->start_offset & OFFSET_MASK,
948 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector16_1, 32);
949 return_value16_2 = pcre16_exec(re16, NULL, regtest_buf, length16, current->start_offset & OFFSET_MASK,
950 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector16_2, 32);
951 }
952 #endif
953
954 /* If F_DIFF is set, just run the test, but do not compare the results.
955 Segfaults can still be captured. */
956
957 is_successful = 1;
958 if (!(current->start_offset & F_DIFF)) {
959 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
960 if (utf8 == utf16 && !(current->start_offset & F_FORCECONV)) {
961 /* All results must be the same. */
962 if (return_value8_1 != return_value8_2 || return_value8_1 != return_value16_1 || return_value8_1 != return_value16_2) {
963 printf("\n8 and 16 bit: Return value differs(%d:%d:%d:%d): [%d] '%s' @ '%s'\n",
964 return_value8_1, return_value8_2, return_value16_1, return_value16_2,
965 total, current->pattern, current->input);
966 is_successful = 0;
967 } else if (return_value8_1 >= 0) {
968 return_value8_1 *= 2;
969 /* Transform back the results. */
970 if (current->flags & PCRE_UTF8) {
971 for (i = 0; i < return_value8_1; ++i) {
972 if (ovector16_1[i] >= 0)
973 ovector16_1[i] = regtest_offsetmap[ovector16_1[i]];
974 if (ovector16_2[i] >= 0)
975 ovector16_2[i] = regtest_offsetmap[ovector16_2[i]];
976 }
977 }
978
979 for (i = 0; i < return_value8_1; ++i)
980 if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
981 printf("\n8 and 16 bit: Ovector[%d] value differs(%d:%d:%d:%d): [%d] '%s' @ '%s' \n",
982 i, ovector8_1[i], ovector8_2[i], ovector16_1[i], ovector16_2[i],
983 total, current->pattern, current->input);
984 is_successful = 0;
985 }
986 }
987 } else {
988 #endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */
989 /* Only the 8 bit and 16 bit results must be equal. */
990 #ifdef SUPPORT_PCRE8
991 if (return_value8_1 != return_value8_2) {
992 printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
993 return_value8_1, return_value8_2, total, current->pattern, current->input);
994 is_successful = 0;
995 } else if (return_value8_1 >= 0) {
996 return_value8_1 *= 2;
997 for (i = 0; i < return_value8_1; ++i)
998 if (ovector8_1[i] != ovector8_2[i]) {
999 printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1000 i, ovector8_1[i], ovector8_2[i], total, current->pattern, current->input);
1001 is_successful = 0;
1002 }
1003 }
1004 #endif
1005
1006 #ifdef SUPPORT_PCRE16
1007 if (return_value16_1 != return_value16_2) {
1008 printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1009 return_value16_1, return_value16_2, total, current->pattern, current->input);
1010 is_successful = 0;
1011 } else if (return_value16_1 >= 0) {
1012 return_value16_1 *= 2;
1013 for (i = 0; i < return_value16_1; ++i)
1014 if (ovector16_1[i] != ovector16_2[i]) {
1015 printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1016 i, ovector16_1[i], ovector16_2[i], total, current->pattern, current->input);
1017 is_successful = 0;
1018 }
1019 }
1020 #endif
1021
1022 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1023 }
1024 #endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */
1025 }
1026
1027 if (is_successful) {
1028 #ifdef SUPPORT_PCRE8
1029 if (!(current->start_offset & F_NO8)) {
1030 if (return_value8_1 < 0 && !(current->start_offset & F_NOMATCH)) {
1031 printf("8 bit: Test should match: [%d] '%s' @ '%s'\n",
1032 total, current->pattern, current->input);
1033 is_successful = 0;
1034 }
1035
1036 if (return_value8_1 >= 0 && (current->start_offset & F_NOMATCH)) {
1037 printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n",
1038 total, current->pattern, current->input);
1039 is_successful = 0;
1040 }
1041 }
1042 #endif
1043 #ifdef SUPPORT_PCRE16
1044 if (!(current->start_offset & F_NO16)) {
1045 if (return_value16_1 < 0 && !(current->start_offset & F_NOMATCH)) {
1046 printf("16 bit: Test should match: [%d] '%s' @ '%s'\n",
1047 total, current->pattern, current->input);
1048 is_successful = 0;
1049 }
1050
1051 if (return_value16_1 >= 0 && (current->start_offset & F_NOMATCH)) {
1052 printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n",
1053 total, current->pattern, current->input);
1054 is_successful = 0;
1055 }
1056 }
1057 #endif
1058 }
1059
1060 if (is_successful)
1061 successful++;
1062
1063 #ifdef SUPPORT_PCRE8
1064 if (re8) {
1065 pcre_free_study(extra8);
1066 pcre_free(re8);
1067 }
1068 #endif
1069 #ifdef SUPPORT_PCRE16
1070 if (re16) {
1071 pcre16_free_study(extra16);
1072 pcre16_free(re16);
1073 }
1074 #endif
1075
1076 /* printf("[%d-%d|%d-%d]%s", ovector8_1[0], ovector8_1[1], ovector16_1[0], ovector16_1[1], (current->flags & PCRE_CASELESS) ? "C" : ""); */
1077 printf(".");
1078 fflush(stdout);
1079 current++;
1080 }
1081 tables(1);
1082
1083 if (total == successful) {
1084 printf("\nAll JIT regression tests are successfully passed.\n");
1085 return 0;
1086 } else {
1087 printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
1088 return 1;
1089 }
1090 }
1091
1092 /* End of pcre_jit_test.c */

  ViewVC Help
Powered by ViewVC 1.1.5