/[pcre]/code/branches/pcre16/pcre_jit_test.c
ViewVC logotype

Contents of /code/branches/pcre16/pcre_jit_test.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 798 - (show annotations)
Sun Dec 11 18:07:25 2011 UTC (9 years, 4 months ago) by zherczeg
File MIME type: text/plain
File size: 45334 byte(s)
Optimization fixes for ranges contains only a single character
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Main Library written by Philip Hazel
9 Copyright (c) 1997-2011 University of Cambridge
10
11 This JIT compiler regression test program was written by Zoltan Herczeg
12 Copyright (c) 2010-2011
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include <stdio.h>
48 #include <string.h>
49 #include "pcre.h"
50
51 #define PCRE_BUG 0x80000000
52
53 /*
54 Letter characters:
55 \xe6\x92\xad = 0x64ad = 25773 (kanji)
56 Non-letter characters:
57 \xc2\xa1 = 0xa1 = (Inverted Exclamation Mark)
58 \xf3\xa9\xb7\x80 = 0xe9dc0 = 957888
59 \xed\xa0\x80 = 55296 = 0xd800 (Invalid UTF character)
60 \xed\xb0\x80 = 56320 = 0xdc00 (Invalid UTF character)
61 Newlines:
62 \xc2\x85 = 0x85 = 133 (NExt Line = NEL)
63 \xe2\x80\xa8 = 0x2028 = 8232 (Line Separator)
64 Othercase pairs:
65 \xc3\xa9 = 0xe9 = 233 (e')
66 \xc3\x89 = 0xc9 = 201 (E')
67 \xc3\xa1 = 0xe1 = 225 (a')
68 \xc3\x81 = 0xc1 = 193 (A')
69 \xc8\xba = 0x23a = 570
70 \xe2\xb1\xa5 = 0x2c65 = 11365
71 \xe1\xbd\xb8 = 0x1f78 = 8056
72 \xe1\xbf\xb8 = 0x1ff8 = 8184
73 \xf0\x90\x90\x80 = 0x10400 = 66560
74 \xf0\x90\x90\xa8 = 0x10428 = 66600
75 Mark property:
76 \xcc\x8d = 0x30d = 781
77 Special:
78 \xdf\xbf = 0x7ff = 2047 (highest 2 byte character)
79 \xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character)
80 \xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character)
81 \xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character)
82 \xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character)
83 */
84
85 static int regression_tests(void);
86
87 int main(void)
88 {
89 int jit = 0;
90 pcre_config(PCRE_CONFIG_JIT, &jit);
91 if (!jit) {
92 printf("JIT must be enabled to run pcre_jit_test\n");
93 return 1;
94 }
95 return regression_tests();
96 }
97
98 /* --------------------------------------------------------------------------------------- */
99
100 #if !(defined SUPPORT_PCRE8) && !(defined SUPPORT_PCRE16)
101 #error SUPPORT_PCRE8 or SUPPORT_PCRE16 must be defined
102 #endif
103
104 #define MUA (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
105 #define MUAP (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
106 #define CMUA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
107 #define CMUAP (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
108 #define MA (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
109 #define MAP (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
110 #define CMA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
111
112 #define OFFSET_MASK 0x00ffff
113 #define F_NO8 0x010000
114 #define F_NO16 0x020000
115 #define F_NOMATCH 0x040000
116 #define F_DIFF 0x080000
117 #define F_FORCECONV 0x100000
118
119 struct regression_test_case {
120 int flags;
121 int start_offset;
122 const char *pattern;
123 const char *input;
124 };
125
126 static struct regression_test_case regression_test_cases[] = {
127 /* Constant strings. */
128 { MUA, 0, "AbC", "AbAbC" },
129 { MUA, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
130 { CMUA, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
131 { MA, 0, "[^a]", "aAbB" },
132 { CMA, 0, "[^m]", "mMnN" },
133 { MA, 0, "a[^b][^#]", "abacd" },
134 { CMA, 0, "A[^B][^E]", "abacd" },
135 { CMUA, 0, "[^x][^#]", "XxBll" },
136 { MUA, 0, "[^a]", "aaa\xc3\xa1#Ab" },
137 { CMUA, 0, "[^A]", "aA\xe6\x92\xad" },
138 { MUA, 0, "\\W(\\W)?\\w", "\r\n+bc" },
139 { MUA, 0, "\\W(\\W)?\\w", "\n\r+bc" },
140 { MUA, 0, "\\W(\\W)?\\w", "\r\r+bc" },
141 { MUA, 0, "\\W(\\W)?\\w", "\n\n+bc" },
142 { MUA, 0, "[axd]", "sAXd" },
143 { CMUA, 0, "[axd]", "sAXd" },
144 { CMUA, 0 | F_NOMATCH, "[^axd]", "DxA" },
145 { MUA, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
146 { MUA, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
147 { CMUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
148 { MUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
149 { MUA, 0, "[^a]", "\xc2\x80[]" },
150 { CMUA, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
151 { CMA, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
152 { PCRE_CASELESS, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
153 { PCRE_CASELESS, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
154 { PCRE_CASELESS, 0, "a1", "Aa1" },
155 { MA, 0, "\\Ca", "cda" },
156 { CMA, 0, "\\Ca", "CDA" },
157 { MA, 0 | F_NOMATCH, "\\Cx", "cda" },
158 { CMA, 0 | F_NOMATCH, "\\Cx", "CDA" },
159 { CMUAP, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
160 { CMUAP, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
161 { CMUAP, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
162 { CMUAP, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
163
164 /* Assertions. */
165 { MUA, 0, "\\b[^A]", "A_B#" },
166 { MA, 0 | F_NOMATCH, "\\b\\W", "\n*" },
167 { MUA, 0, "\\B[^,]\\b[^s]\\b", "#X" },
168 { MAP, 0, "\\B", "_\xa1" },
169 { MAP, 0, "\\b_\\b[,A]\\B", "_," },
170 { MUAP, 0, "\\b", "\xe6\x92\xad!" },
171 { MUAP, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
172 { MUAP, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
173 { MUAP, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
174 { MUA, 0 | F_NOMATCH, "\\b.", "\xcd\xbe" },
175 { CMUAP, 0, "\\By", "\xf0\x90\x90\xa8y" },
176 { MA, 0 | F_NOMATCH, "\\R^", "\n" },
177 { MA, 1 | F_NOMATCH, "^", "\n" },
178 { 0, 0, "^ab", "ab" },
179 { 0, 0 | F_NOMATCH, "^ab", "aab" },
180 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "^a", "\r\raa\n\naa\r\naa" },
181 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
182 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^-", "a--b--\x85--" },
183 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xe2\x80\xa8--" },
184 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xc2\x85--" },
185 { 0, 0, "ab$", "ab" },
186 { 0, 0 | F_NOMATCH, "ab$", "ab\r\n" },
187 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "a$", "\r\raa\n\naa\r\naa" },
188 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aaa" },
189 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "#$", "#\xc2\x85###\r#" },
190 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "#$", "#\xe2\x80\xa9" },
191 { PCRE_NOTBOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "^a", "aa\naa" },
192 { PCRE_NOTBOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },
193 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\naa" },
194 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\r\n" },
195 { PCRE_UTF8 | PCRE_DOLLAR_ENDONLY | PCRE_NEWLINE_ANY, 0, "\\p{Any}{2,}$", "aa\r\n" },
196 { PCRE_NOTEOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },
197 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa" },
198 { PCRE_NEWLINE_CR | PCRE_UTF8, 0, "a\\Z", "aaa\r" },
199 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa\n" },
200 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r" },
201 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
202 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r\n" },
203 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
204 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
205 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
206 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
207 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
208 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
209 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
210 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
211 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
212 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xc2\x85" },
213 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
214 { MA, 0, "\\Aa", "aaa" },
215 { MA, 1 | F_NOMATCH, "\\Aa", "aaa" },
216 { MA, 1, "\\Ga", "aaa" },
217 { MA, 1 | F_NOMATCH, "\\Ga", "aba" },
218 { MA, 0, "a\\z", "aaa" },
219 { MA, 0 | F_NOMATCH, "a\\z", "aab" },
220
221 /* Brackets. */
222 { MUA, 0, "(ab|bb|cd)", "bacde" },
223 { MUA, 0, "(?:ab|a)(bc|c)", "ababc" },
224 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
225 { CMUA, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
226 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
227 { MUA, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
228
229 /* Greedy and non-greedy ? operators. */
230 { MUA, 0, "(?:a)?a", "laab" },
231 { CMUA, 0, "(A)?A", "llaab" },
232 { MUA, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
233 { MUA, 0, "(a)?a", "manm" },
234 { CMUA, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
235 { MUA, 0, "(a|b)?\?d((?:e)?)", "abcde" },
236 { MUA, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
237
238 /* Greedy and non-greedy + operators */
239 { MUA, 0, "(aa)+aa", "aaaaaaa" },
240 { MUA, 0, "(aa)+?aa", "aaaaaaa" },
241 { MUA, 0, "(?:aba|ab|a)+l", "ababamababal" },
242 { MUA, 0, "(?:aba|ab|a)+?l", "ababamababal" },
243 { MUA, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
244 { MUA, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
245 { MUA, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
246
247 /* Greedy and non-greedy * operators */
248 { CMUA, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
249 { MUA, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
250 { MUA, 0, "(aa|ab)*ab", "aaabaaab" },
251 { CMUA, 0, "(aa|Ab)*?aB", "aaabaaab" },
252 { MUA, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
253 { MUA, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
254 { MA, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
255 { MA, 0, "((?:a|)*){0}a", "a" },
256
257 /* Combining ? + * operators */
258 { MUA, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
259 { MUA, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
260 { MUA, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
261 { MUA, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
262 { MUA, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
263
264 /* Single character iterators. */
265 { MUA, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
266 { MUA, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
267 { MUA, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
268 { MUA, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
269 { MUA, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
270 { MUA, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
271 { MUA, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
272 { MUA, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
273 { MUA, 0, "(ba{2})+c", "baabaaabacbaabaac" },
274 { MUA, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
275 { MUA, 0, "(a?+[^b])+", "babaacacb" },
276 { MUA, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
277 { CMUA, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
278 { CMUA, 0, "[c-f]+k", "DemmFke" },
279 { MUA, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
280 { MUA, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
281 { CMUA, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
282 { CMUA, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
283 { CMUA, 0, "[ace]{3,}", "AcbDAcEEcEd" },
284 { CMUA, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
285 { MUA, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
286 { CMUA, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
287 { MUA, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
288 { MUA, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
289 { MUA, 0, "\\b\\w+\\B", "x,a_cd" },
290 { MUAP, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
291 { CMUA, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
292 { CMUAP, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" },
293 { CMUA, 0, "[^\xf0\x90\x90\x80]{2,4}@", "\xf0\x90\x90\xa8\xf0\x90\x90\x80###\xf0\x90\x90\x80@@@" },
294 { CMUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
295 { MUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
296 { MUA, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" },
297
298 /* Basic character sets. */
299 { MUA, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
300 { MUA, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
301 { MUA, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
302 { MUA, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
303 { MUA, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
304 { MUA, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
305
306 /* Unicode properties. */
307 { MUAP, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
308 { MUAP, 0, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
309 { MUAP, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
310 { MUAP, 0 | F_NOMATCH, "[\\P{Any}]", "abc" },
311 { MUAP, 0 | F_NOMATCH, "[^\\p{Any}]", "abc" },
312 { MUAP, 0 | F_NOMATCH, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
313 { MUAP, 0 | F_NOMATCH, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
314 { MUAP, 0 | F_NOMATCH, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
315 { MUAP, 0 | F_NOMATCH, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
316 { MUAP, 0, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
317 { MUAP, 0, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
318 { MUAP, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
319 { CMUAP, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
320 { MUAP, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
321 { MUAP, 0, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
322 { MUA, 0, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
323 { CMUAP, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
324 { MUAP, 0, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
325 { MUAP, 0, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
326 { PCRE_UCP, 0, "[a-b\\s]{2,5}[^a]", "AB baaa" },
327
328 /* Possible empty brackets. */
329 { MUA, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
330 { MUA, 0, "(|ab||bc|a)+d", "abcxabcabd" },
331 { MUA, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
332 { MUA, 0, "(|ab||bc|a)*d", "abcxabcabd" },
333 { MUA, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
334 { MUA, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
335 { MUA, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
336 { MUA, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
337 { MUA, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
338 { MUA, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
339
340 /* Start offset. */
341 { MUA, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
342 { MUA, 4 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
343 { MUA, 2 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
344 { MUA, 1, "(\\w\\W\\w)+", "ab#d" },
345
346 /* Newline. */
347 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
348 { PCRE_MULTILINE | PCRE_NEWLINE_CR, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
349 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{1,3}[^#]", "\r\n##...." },
350
351 /* Any character except newline or any newline. */
352 { PCRE_NEWLINE_CRLF, 0, ".", "\r" },
353 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
354 { PCRE_NEWLINE_ANYCRLF, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
355 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
356 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
357 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0 | F_NOMATCH, ".(.).", "\xe2\x80\xa8\nb\r" },
358 { PCRE_NEWLINE_ANY, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
359 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
360 { PCRE_BSR_ANYCRLF, 0, "\\R", "\r" },
361 { PCRE_BSR_ANYCRLF, 0, "\\R", "\x85#\r\n#" },
362 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\xe2\x80\xa8#c" },
363 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\r\nc" },
364 { PCRE_NEWLINE_CRLF | PCRE_BSR_UNICODE | PCRE_UTF8, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
365 { MUA, 0 | F_NOMATCH, "\\R+", "ab" },
366 { MUA, 0, "\\R+", "ab\r\n\r" },
367 { MUA, 0, "\\R*", "ab\r\n\r" },
368 { MUA, 0, "\\R*", "\r\n\r" },
369 { MUA, 0, "\\R{2,4}", "\r\nab\r\r" },
370 { MUA, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
371 { MUA, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
372 { MUA, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
373 { MUA, 0 | F_NOMATCH, "\\R+\\R\\R", "\r\n\r\n" },
374 { MUA, 0, "\\R+\\R\\R", "\r\r\r" },
375 { MUA, 0, "\\R*\\R\\R", "\n\r" },
376 { MUA, 0 | F_NOMATCH, "\\R{2,4}\\R\\R", "\r\r\r" },
377 { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
378
379 /* Atomic groups (no fallback from "next" direction). */
380 { MUA, 0 | F_NOMATCH, "(?>ab)ab", "bab" },
381 { MUA, 0 | F_NOMATCH, "(?>(ab))ab", "bab" },
382 { MUA, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
383 "bababcdedefgheijijklmlmnop" },
384 { MUA, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
385 { MUA, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
386 { MUA, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
387 { MUA, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
388 { MUA, 0, "((?>a|)+?)b", "aaacaaab" },
389 { MUA, 0, "(?>x|)*$", "aaa" },
390 { MUA, 0, "(?>(x)|)*$", "aaa" },
391 { MUA, 0, "(?>x|())*$", "aaa" },
392 { MUA, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
393 { MUA, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
394 { MUA, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
395 { MUA, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
396 { MUA, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
397 { MUA, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
398 { MUA, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
399 { MUA, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
400 { MUA, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
401 { MUA, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
402 { MUA, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
403 { MUA, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
404 { MUA, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
405 { MUA, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
406 { CMA, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
407 { MUA, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
408 { MUA, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
409 { MUA, 0 | F_NOMATCH, "\\X", "\xcc\x8d\xcc\x8d" },
410 { MUA, 0, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
411 { MUA, 0, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
412 { MUA, 0, "\\X{2,4}", "abcdef" },
413 { MUA, 0, "\\X{2,4}?", "abcdef" },
414 { MUA, 0 | F_NOMATCH, "\\X{2,4}..", "#\xcc\x8d##" },
415 { MUA, 0, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
416 { MUA, 0, "(c(ab)?+ab)+", "cabcababcab" },
417 { MUA, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
418
419 /* Possessive quantifiers. */
420 { MUA, 0, "(?:a|b)++m", "mababbaaxababbaam" },
421 { MUA, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
422 { MUA, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
423 { MUA, 0, "(a|b)++m", "mababbaaxababbaam" },
424 { MUA, 0, "(a|b)*+m", "mababbaaxababbaam" },
425 { MUA, 0, "(a|b)*+m", "ababbaaxababbaam" },
426 { MUA, 0, "(a|b(*ACCEPT))++m", "maaxab" },
427 { MUA, 0, "(?:b*)++m", "bxbbxbbbxm" },
428 { MUA, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
429 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxm" },
430 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
431 { MUA, 0, "(b*)++m", "bxbbxbbbxm" },
432 { MUA, 0, "(b*)++m", "bxbbxbbbxbbm" },
433 { MUA, 0, "(b*)*+m", "bxbbxbbbxm" },
434 { MUA, 0, "(b*)*+m", "bxbbxbbbxbbm" },
435 { MUA, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
436 { MUA, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
437 { MUA, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
438 { MUA, 0, "(a|(b))++m", "mababbaaxababbaam" },
439 { MUA, 0, "((a)|b)*+m", "mababbaaxababbaam" },
440 { MUA, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
441 { MUA, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
442 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxm" },
443 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
444 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
445 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
446 { MUA, 0, "((b*))++m", "bxbbxbbbxm" },
447 { MUA, 0, "((b*))++m", "bxbbxbbbxbbm" },
448 { MUA, 0, "((b*))*+m", "bxbbxbbbxm" },
449 { MUA, 0, "((b*))*+m", "bxbbxbbbxbbm" },
450 { MUA, 0 | F_NOMATCH, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
451 { MUA, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
452 { MUA, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
453 { MUA, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
454 { MUA, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
455
456 /* Back references. */
457 { MUA, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
458 { CMUA, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
459 { CMA, 0, "(a{2,4})\\1", "AaAaaAaA" },
460 { MUA, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
461 { MUA, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
462 { MUA, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
463 { MUA, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
464 { MUA, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
465 { MUA, 0, "(?:(aa)|b)\\1?b", "bb" },
466 { CMUA, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
467 { MUA, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
468 { CMUA, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
469 { MUA, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
470 { CMA, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
471 { MUA, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
472 { MUA, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
473 { MA, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
474 { MUAP, 0, "(\\P{N})\\1{2,}", ".www." },
475 { MUAP, 0, "(\\P{N})\\1{0,2}", "wwwww." },
476 { MUAP, 0, "(\\P{N})\\1{1,2}ww", "wwww" },
477 { MUAP, 0, "(\\P{N})\\1{1,2}ww", "wwwww" },
478 { PCRE_UCP, 0, "(\\P{N})\\1{2,}", ".www." },
479 { CMUAP, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" },
480
481 /* Assertions. */
482 { MUA, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
483 { MUA, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
484 { MUA, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
485 { MUA, 0, "(?<=aaa|aa|a)a", "aaa" },
486 { MUA, 2, "(?<=aaa|aa|a)a", "aaa" },
487 { MA, 0, "(?<=aaa|aa|a)a", "aaa" },
488 { MA, 2, "(?<=aaa|aa|a)a", "aaa" },
489 { MUA, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
490 { MUA, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
491 { MUA, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
492 { MUA, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
493 { MUA, 0, "((?(?=(a))a)+k)", "bbak" },
494 { MUA, 0, "((?(?=a)a)+k)", "bbak" },
495 { MUA, 0 | F_NOMATCH, "(?=(?>(a))m)amk", "a k" },
496 { MUA, 0 | F_NOMATCH, "(?!(?>(a))m)amk", "a k" },
497 { MUA, 0 | F_NOMATCH, "(?>(?=(a))am)amk", "a k" },
498 { MUA, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
499 { MUA, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
500 { MUA, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
501 { MUA, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
502 { MUA, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
503 { MUA, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
504 { MUA, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
505 { MUA, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
506
507 /* Not empty, ACCEPT, FAIL */
508 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*", "bcx" },
509 { MUA | PCRE_NOTEMPTY, 0, "a*", "bcaad" },
510 { MUA | PCRE_NOTEMPTY, 0, "a*?", "bcaad" },
511 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
512 { MUA, 0, "a(*ACCEPT)b", "ab" },
513 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*(*ACCEPT)b", "bcx" },
514 { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
515 { MUA | PCRE_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
516 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "(?:z|a*(*ACCEPT)b)", "bcx" },
517 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
518 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
519 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
520 { MUA | PCRE_NOTEMPTY_ATSTART, 0 | F_NOMATCH, "a*(*ACCEPT)b", "" },
521 { MUA, 0, "((a(*ACCEPT)b))", "ab" },
522 { MUA, 0, "(a(*FAIL)a|a)", "aaa" },
523 { MUA, 0, "(?=ab(*ACCEPT)b)a", "ab" },
524 { MUA, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
525 { MUA, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
526 { MUA | PCRE_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
527
528 /* Conditional blocks. */
529 { MUA, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
530 { MUA, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
531 { MUA, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
532 { MUA, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
533 { MUA, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
534 { MUA, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
535 { MUA, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
536 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
537 { MUA, 0 | F_DIFF, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
538 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
539 { MUA, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
540 { MUA, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
541 { MUA, 0, "(?(?=a)ab)", "a" },
542 { MUA, 0, "(?(?<!b)c)", "b" },
543 { MUA, 0, "(?(DEFINE)a(b))", "a" },
544 { MUA, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
545 { MUA, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
546 { MUA, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
547 { MUA, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
548 { MUA, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
549 { MUA, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
550 { MUA, 0, "(c)?\?(?(1)a|b)", "cbb" },
551 { MUA, 0 | F_DIFF, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
552 { MUA, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
553 { MUA, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
554 { MUA, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
555 { MUA, 0 | F_DIFF, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
556 { MUA, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
557 { MUA, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
558 { MUA, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
559 { MUA, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
560 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)*l", "bc ddd abccabccl" },
561 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+?dd", "bcabcacdb bdddd" },
562 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+l", "ababccddabdbccd abcccl" },
563
564 /* Set start of match. */
565 { MUA, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
566 { MUA, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
567 { MUA, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
568 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a\\K(*ACCEPT)b", "aa" },
569 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
570
571 /* First line. */
572 { MUA | PCRE_FIRSTLINE, 0, "\\p{Any}a", "bb\naaa" },
573 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "\\p{Any}a", "bb\r\naaa" },
574 { MUA | PCRE_FIRSTLINE, 0, "(?<=a)", "a" },
575 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[^a][^b]", "ab" },
576 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "a", "\na" },
577 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[abc]", "\na" },
578 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^a", "\na" },
579 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^(?<=\n)", "\na" },
580 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\xc2\x85#" },
581 { PCRE_MULTILINE | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\x85#" },
582 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^#", "\xe2\x80\xa8#" },
583 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "\\p{Any}", "\r\na" },
584 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, ".", "\r" },
585 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "a", "\ra" },
586 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH, "ba", "bbb\r\nba" },
587 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH, "\\p{Any}{4}|a", "\r\na" },
588 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 1, ".", "\r\n" },
589
590 /* Recurse. */
591 { MUA, 0, "(a)(?1)", "aa" },
592 { MUA, 0, "((a))(?1)", "aa" },
593 { MUA, 0, "(b|a)(?1)", "aa" },
594 { MUA, 0, "(b|(a))(?1)", "aa" },
595 { MUA, 0 | F_NOMATCH, "((a)(b)(?:a*))(?1)", "aba" },
596 { MUA, 0, "((a)(b)(?:a*))(?1)", "abab" },
597 { MUA, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
598 { MUA, 0, "((?2)b|(a)){2}(?1)", "aabab" },
599 { MUA, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
600 { MUA, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
601 { MUA, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
602 { MUA, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
603 { MUA, 0 | F_NOMATCH, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
604 { MUA, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
605 { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
606 { MUA, 0 | F_NOMATCH, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
607 { MUA, 0, "b|<(?R)*>", "<<b>" },
608 { MUA, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
609 { MUA, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
610 { MUA, 0, "(a)((?(R)a|b))(?2)", "aabbabaa" },
611 { MUA, 0, "(a)((?(R2)a|b))(?2)", "aabbabaa" },
612 { MUA, 0, "(a)((?(R1)a|b))(?2)", "ababba" },
613 { MUA, 0, "(?(R0)aa|bb(?R))", "abba aabb bbaa" },
614 { MUA, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" },
615 { MUA, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" },
616
617 /* 16 bit specific tests. */
618 { CMA, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" },
619 { CMA, 0 | F_FORCECONV, "\xe1\xbd\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
620 { CMA, 0 | F_FORCECONV, "[\xc3\xa1]", "\xc3\x81\xc3\xa1" },
621 { CMA, 0 | F_FORCECONV, "[\xe1\xbd\xb8]", "\xe1\xbf\xb8\xe1\xbd\xb8" },
622 { CMA, 0 | F_FORCECONV, "[a-\xed\xb0\x80]", "A" },
623 { CMA, 0 | F_NO8 | F_FORCECONV, "[a-\\x{dc00}]", "B" },
624 { CMA, 0 | F_NO8 | F_NOMATCH | F_FORCECONV, "[b-\\x{dc00}]", "a" },
625 { CMA, 0 | F_NO8 | F_FORCECONV, "\xed\xa0\x80\\x{d800}\xed\xb0\x80\\x{dc00}", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80" },
626 { CMA, 0 | F_NO8 | F_FORCECONV, "[\xed\xa0\x80\\x{d800}]{1,2}?[\xed\xb0\x80\\x{dc00}]{1,2}?#", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80#" },
627 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80\xed\xb0\x80#]{0,3}(?<=\xed\xb0\x80.)", "\xed\xa0\x80#\xed\xa0\x80##\xed\xb0\x80\xed\xa0\x80" },
628 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\x9f\xbf\xed\xa0\x83" },
629 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\xb4\x80\xed\xb3\xb0" },
630 { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" },
631 { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" },
632 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" },
633 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80][\xed\xb0\x80]{2,}", "\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" },
634 { MA, 0 | F_FORCECONV, "[^\xed\xb0\x80]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
635 { MA, 0 | F_NO8 | F_FORCECONV, "[^\\x{dc00}]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
636
637 /* Deep recursion. */
638 { MUA, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
639 { MUA, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
640 { MUA, 0, "((a?)+)+b", "aaaaaaaaaaaaa b" },
641
642 /* Deep recursion: Stack limit reached. */
643 { MA, 0 | F_NOMATCH, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
644 { MA, 0 | F_NOMATCH, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
645 { MA, 0 | F_NOMATCH, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
646 { MA, 0 | F_NOMATCH, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
647 { MA, 0 | F_NOMATCH, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
648
649 { 0, 0, NULL, NULL }
650 };
651
652 static pcre_jit_stack* callback(void *arg)
653 {
654 return (pcre_jit_stack *)arg;
655 }
656
657 static void setstack(pcre_extra *extra, int alloc_again)
658 {
659 static pcre_jit_stack *stack;
660
661 if (alloc_again) {
662 if (stack)
663 pcre_jit_stack_free(stack);
664 stack = pcre_jit_stack_alloc(1, 1024 * 1024);
665 }
666 /* Extra can be NULL. */
667 pcre_assign_jit_stack(extra, callback, stack);
668 }
669
670 #ifdef SUPPORT_PCRE16
671
672 static int convert_utf8_to_utf16(const char *input, PCRE_SCHAR16 *output, int *offsetmap, int max_length)
673 {
674 unsigned char *iptr = (unsigned char*)input;
675 unsigned short *optr = (unsigned short *)output;
676 unsigned int c;
677
678 if (max_length == 0)
679 return 0;
680
681 while (*iptr && max_length > 1) {
682 c = 0;
683 if (offsetmap)
684 *offsetmap++ = (int)(iptr - (unsigned char*)input);
685
686 if (!(*iptr & 0x80))
687 c = *iptr++;
688 else if (!(*iptr & 0x20)) {
689 c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
690 iptr += 2;
691 } else if (!(*iptr & 0x10)) {
692 c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
693 iptr += 3;
694 } else if (!(*iptr & 0x08)) {
695 c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
696 iptr += 4;
697 }
698
699 if (c < 65536) {
700 *optr++ = c;
701 max_length--;
702 } else if (max_length <= 2) {
703 *optr = '\0';
704 return (int)(optr - (unsigned short *)output);
705 } else {
706 c -= 0x10000;
707 *optr++ = 0xd800 | ((c >> 10) & 0x3ff);
708 *optr++ = 0xdc00 | (c & 0x3ff);
709 max_length -= 2;
710 if (offsetmap)
711 offsetmap++;
712 }
713 }
714 if (offsetmap)
715 *offsetmap = (int)(iptr - (unsigned char*)input);
716 *optr = '\0';
717 return (int)(optr - (unsigned short *)output);
718 }
719
720 static int copy_char8_to_char16(const char *input, PCRE_SCHAR16 *output, int max_length)
721 {
722 unsigned char *iptr = (unsigned char*)input;
723 unsigned short *optr = (unsigned short *)output;
724
725 if (max_length == 0)
726 return 0;
727
728 while (*iptr && max_length > 1) {
729 *optr++ = *iptr++;
730 max_length--;
731 }
732 *optr = '\0';
733 return (int)(optr - (unsigned short *)output);
734 }
735
736 #define REGTEST_MAX_LENGTH 4096
737 static PCRE_SCHAR16 regtest_buf[REGTEST_MAX_LENGTH];
738 static int regtest_offsetmap[REGTEST_MAX_LENGTH];
739
740 #endif /* SUPPORT_PCRE16 */
741
742 static int regression_tests(void)
743 {
744 struct regression_test_case *current = regression_test_cases;
745 const char *error;
746 int i, err_offs, is_successful;
747 int total = 0;
748 int successful = 0;
749 int counter = 0;
750 #ifdef SUPPORT_PCRE8
751 pcre *re8;
752 pcre_extra *extra8;
753 int ovector8_1[32];
754 int ovector8_2[32];
755 int return_value8_1, return_value8_2;
756 int utf8 = 0, ucp8 = 0;
757 int disabled_flags8 = 0;
758 #endif
759 #ifdef SUPPORT_PCRE16
760 pcre *re16;
761 pcre_extra *extra16;
762 int ovector16_1[32];
763 int ovector16_2[32];
764 int return_value16_1, return_value16_2;
765 int utf16 = 0, ucp16 = 0;
766 int disabled_flags16 = 0;
767 int length16;
768 #endif
769
770 /* This test compares the behaviour of interpreter and JIT. Although disabling
771 utf or ucp may make tests fail, if the pcre_exec result is the SAME, it is
772 still considered successful from pcre_jit_test point of view. */
773
774 printf("Running JIT regression\n");
775
776 #ifdef SUPPORT_PCRE8
777 pcre_config(PCRE_CONFIG_UTF8, &utf8);
778 pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp8);
779 if (!utf8)
780 disabled_flags8 |= PCRE_UTF8;
781 if (!ucp8)
782 disabled_flags8 |= PCRE_UCP;
783 printf(" in 8 bit mode with utf8 %s and ucp %s:\n", utf8 ? "enabled" : "disabled", ucp8 ? "enabled" : "disabled");
784 #endif
785 #ifdef SUPPORT_PCRE16
786 pcre16_config(PCRE_CONFIG_UTF16, &utf16);
787 pcre16_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp16);
788 if (!utf16)
789 disabled_flags16 |= PCRE_UTF8;
790 if (!ucp16)
791 disabled_flags16 |= PCRE_UCP;
792 printf(" in 16 bit mode with utf16 %s and ucp %s:\n", utf16 ? "enabled" : "disabled", ucp16 ? "enabled" : "disabled");
793 #endif
794
795 while (current->pattern) {
796 /* printf("\nPattern: %s :\n", current->pattern); */
797 total++;
798
799 error = NULL;
800 #ifdef SUPPORT_PCRE8
801 re8 = NULL;
802 if (!(current->start_offset & F_NO8))
803 re8 = pcre_compile(current->pattern,
804 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags8),
805 &error, &err_offs, NULL);
806
807 extra8 = NULL;
808 if (re8) {
809 error = NULL;
810 extra8 = pcre_study(re8, PCRE_STUDY_JIT_COMPILE, &error);
811 if (!extra8) {
812 printf("\n8 bit: Cannot study pattern: %s\n", current->pattern);
813 pcre_free(re8);
814 re8 = NULL;
815 }
816 if (!(extra8->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
817 printf("\n8 bit: JIT compiler does not support: %s\n", current->pattern);
818 pcre_free_study(extra8);
819 pcre_free(re8);
820 re8 = NULL;
821 }
822 } else if (utf8 && ucp8 && !(current->start_offset & F_NO8))
823 printf("\n8 bit: Cannot compile pattern: %s\n", current->pattern);
824 #endif
825 #ifdef SUPPORT_PCRE16
826 if ((current->flags & PCRE_UTF8) || (current->start_offset & F_FORCECONV))
827 convert_utf8_to_utf16(current->pattern, regtest_buf, NULL, REGTEST_MAX_LENGTH);
828 else
829 copy_char8_to_char16(current->pattern, regtest_buf, REGTEST_MAX_LENGTH);
830
831 re16 = NULL;
832 if (!(current->start_offset & F_NO16))
833 re16 = pcre16_compile(regtest_buf,
834 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags16),
835 &error, &err_offs, NULL);
836
837 extra16 = NULL;
838 if (re16) {
839 error = NULL;
840 extra16 = pcre16_study(re16, PCRE_STUDY_JIT_COMPILE, &error);
841 if (!extra16) {
842 printf("\n16 bit: Cannot study pattern: %s\n", current->pattern);
843 pcre_free(re16);
844 re16 = NULL;
845 }
846 if (!(extra16->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
847 printf("\n16 bit: JIT compiler does not support: %s\n", current->pattern);
848 pcre_free_study(extra16);
849 pcre_free(re16);
850 re16 = NULL;
851 }
852 } else if (utf16 && ucp16 && !(current->start_offset & F_NO16))
853 printf("\n16 bit: Cannot compile pattern: %s\n", current->pattern);
854 #endif
855
856 counter++;
857 if ((counter & 0x3) != 0)
858 setstack(NULL, 1);
859
860 #ifdef SUPPORT_PCRE8
861 return_value8_1 = -1000;
862 return_value8_2 = -1000;
863 for (i = 0; i < 32; ++i)
864 ovector8_1[i] = -2;
865 for (i = 0; i < 32; ++i)
866 ovector8_2[i] = -2;
867 if (re8) {
868 setstack(extra8, 0);
869 return_value8_1 = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
870 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector8_1, 32);
871 return_value8_2 = pcre_exec(re8, NULL, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
872 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector8_2, 32);
873 }
874 #endif
875
876 #ifdef SUPPORT_PCRE16
877 return_value16_1 = -1000;
878 return_value16_2 = -1000;
879 for (i = 0; i < 32; ++i)
880 ovector16_1[i] = -2;
881 for (i = 0; i < 32; ++i)
882 ovector16_2[i] = -2;
883 if (re16) {
884 setstack(extra16, 0);
885 if ((current->flags & PCRE_UTF8) || (current->start_offset & F_FORCECONV))
886 length16 = convert_utf8_to_utf16(current->input, regtest_buf, regtest_offsetmap, REGTEST_MAX_LENGTH);
887 else
888 length16 = copy_char8_to_char16(current->input, regtest_buf, REGTEST_MAX_LENGTH);
889 return_value16_1 = pcre16_exec(re16, extra16, regtest_buf, length16, current->start_offset & OFFSET_MASK,
890 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector16_1, 32);
891 return_value16_2 = pcre16_exec(re16, NULL, regtest_buf, length16, current->start_offset & OFFSET_MASK,
892 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector16_2, 32);
893 }
894 #endif
895
896 /* If F_DIFF is set, just run the test, but do not compare the results.
897 Segfaults can still be captured. */
898
899 is_successful = 1;
900 if (!(current->start_offset & F_DIFF)) {
901 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
902 if (utf8 == utf16 && !(current->start_offset & F_FORCECONV)) {
903 /* All results must be the same. */
904 if (return_value8_1 != return_value8_2 || return_value8_1 != return_value16_1 || return_value8_1 != return_value16_2) {
905 printf("\n8 and 16 bit: Return value differs(%d:%d:%d:%d): [%d] '%s' @ '%s'\n",
906 return_value8_1, return_value8_2, return_value16_1, return_value16_2,
907 total, current->pattern, current->input);
908 is_successful = 0;
909 } else if (return_value8_1 >= 0) {
910 return_value8_1 *= 2;
911 /* Transform back the results. */
912 if (current->flags & PCRE_UTF8) {
913 for (i = 0; i < return_value8_1; ++i) {
914 if (ovector16_1[i] >= 0)
915 ovector16_1[i] = regtest_offsetmap[ovector16_1[i]];
916 if (ovector16_2[i] >= 0)
917 ovector16_2[i] = regtest_offsetmap[ovector16_2[i]];
918 }
919 }
920
921 for (i = 0; i < return_value8_1; ++i)
922 if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
923 printf("\n8 and 16 bit: Ovector[%d] value differs(%d:%d:%d:%d): [%d] '%s' @ '%s' \n",
924 i, ovector8_1[i], ovector8_2[i], ovector16_1[i], ovector16_2[i],
925 total, current->pattern, current->input);
926 is_successful = 0;
927 }
928 }
929 } else {
930 #endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */
931 /* Only the 8 bit and 16 bit results must be equal. */
932 #ifdef SUPPORT_PCRE8
933 if (return_value8_1 != return_value8_2) {
934 printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
935 return_value8_1, return_value8_2, total, current->pattern, current->input);
936 is_successful = 0;
937 } else if (return_value8_1 >= 0) {
938 return_value8_1 *= 2;
939 for (i = 0; i < return_value8_1; ++i)
940 if (ovector8_1[i] != ovector8_2[i]) {
941 printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
942 i, ovector8_1[i], ovector8_2[i], total, current->pattern, current->input);
943 is_successful = 0;
944 }
945 }
946 #endif
947
948 #ifdef SUPPORT_PCRE16
949 if (return_value16_1 != return_value16_2) {
950 printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
951 return_value16_1, return_value16_2, total, current->pattern, current->input);
952 is_successful = 0;
953 } else if (return_value16_1 >= 0) {
954 return_value16_1 *= 2;
955 for (i = 0; i < return_value16_1; ++i)
956 if (ovector16_1[i] != ovector16_2[i]) {
957 printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
958 i, ovector16_1[i], ovector16_2[i], total, current->pattern, current->input);
959 is_successful = 0;
960 }
961 }
962 #endif
963
964 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
965 }
966 #endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */
967 }
968
969 if (is_successful) {
970 #ifdef SUPPORT_PCRE8
971 if (!(current->start_offset & F_NO8)) {
972 if (return_value8_1 < 0 && !(current->start_offset & F_NOMATCH)) {
973 printf("8 bit: Test should match: [%d] '%s' @ '%s'\n",
974 total, current->pattern, current->input);
975 is_successful = 0;
976 }
977
978 if (return_value8_1 >= 0 && (current->start_offset & F_NOMATCH)) {
979 printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n",
980 total, current->pattern, current->input);
981 is_successful = 0;
982 }
983 }
984 #endif
985 #ifdef SUPPORT_PCRE16
986 if (!(current->start_offset & F_NO16)) {
987 if (return_value16_1 < 0 && !(current->start_offset & F_NOMATCH)) {
988 printf("16 bit: Test should match: [%d] '%s' @ '%s'\n",
989 total, current->pattern, current->input);
990 is_successful = 0;
991 }
992
993 if (return_value16_1 >= 0 && (current->start_offset & F_NOMATCH)) {
994 printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n",
995 total, current->pattern, current->input);
996 is_successful = 0;
997 }
998 }
999 #endif
1000 }
1001
1002 if (is_successful)
1003 successful++;
1004
1005 #ifdef SUPPORT_PCRE8
1006 if (re8) {
1007 pcre_free_study(extra8);
1008 pcre_free(re8);
1009 }
1010 #endif
1011 #ifdef SUPPORT_PCRE16
1012 if (re16) {
1013 pcre16_free_study(extra16);
1014 pcre_free(re16);
1015 }
1016 #endif
1017
1018 /* printf("[%d-%d|%d-%d]%s", ovector8_1[0], ovector8_1[1], ovector16_1[0], ovector16_1[1], (current->flags & PCRE_CASELESS) ? "C" : ""); */
1019 printf(".");
1020 fflush(stdout);
1021 current++;
1022 }
1023
1024 if (total == successful) {
1025 printf("\nAll JIT regression tests are successfully passed.\n");
1026 return 0;
1027 } else {
1028 printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
1029 return 1;
1030 }
1031 }
1032
1033 /* End of pcre_jit_test.c */

  ViewVC Help
Powered by ViewVC 1.1.5