/[pcre]/code/branches/pcre16/pcre_jit_test.c
ViewVC logotype

Contents of /code/branches/pcre16/pcre_jit_test.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 806 - (show annotations)
Thu Dec 15 11:57:39 2011 UTC (8 years, 2 months ago) by zherczeg
File MIME type: text/plain
File size: 47439 byte(s)
lcc and inline printint.c fixes
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Main Library written by Philip Hazel
9 Copyright (c) 1997-2011 University of Cambridge
10
11 This JIT compiler regression test program was written by Zoltan Herczeg
12 Copyright (c) 2010-2011
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include <stdio.h>
48 #include <string.h>
49 #include "pcre.h"
50
51 #define PCRE_BUG 0x80000000
52
53 /*
54 Letter characters:
55 \xe6\x92\xad = 0x64ad = 25773 (kanji)
56 Non-letter characters:
57 \xc2\xa1 = 0xa1 = (Inverted Exclamation Mark)
58 \xf3\xa9\xb7\x80 = 0xe9dc0 = 957888
59 \xed\xa0\x80 = 55296 = 0xd800 (Invalid UTF character)
60 \xed\xb0\x80 = 56320 = 0xdc00 (Invalid UTF character)
61 Newlines:
62 \xc2\x85 = 0x85 = 133 (NExt Line = NEL)
63 \xe2\x80\xa8 = 0x2028 = 8232 (Line Separator)
64 Othercase pairs:
65 \xc3\xa9 = 0xe9 = 233 (e')
66 \xc3\x89 = 0xc9 = 201 (E')
67 \xc3\xa1 = 0xe1 = 225 (a')
68 \xc3\x81 = 0xc1 = 193 (A')
69 \xc8\xba = 0x23a = 570
70 \xe2\xb1\xa5 = 0x2c65 = 11365
71 \xe1\xbd\xb8 = 0x1f78 = 8056
72 \xe1\xbf\xb8 = 0x1ff8 = 8184
73 \xf0\x90\x90\x80 = 0x10400 = 66560
74 \xf0\x90\x90\xa8 = 0x10428 = 66600
75 Mark property:
76 \xcc\x8d = 0x30d = 781
77 Special:
78 \xdf\xbf = 0x7ff = 2047 (highest 2 byte character)
79 \xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character)
80 \xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character)
81 \xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character)
82 \xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character)
83 */
84
85 static int regression_tests(void);
86
87 int main(void)
88 {
89 int jit = 0;
90 pcre_config(PCRE_CONFIG_JIT, &jit);
91 if (!jit) {
92 printf("JIT must be enabled to run pcre_jit_test\n");
93 return 1;
94 }
95 return regression_tests();
96 }
97
98 /* --------------------------------------------------------------------------------------- */
99
100 #if !(defined SUPPORT_PCRE8) && !(defined SUPPORT_PCRE16)
101 #error SUPPORT_PCRE8 or SUPPORT_PCRE16 must be defined
102 #endif
103
104 #define MUA (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
105 #define MUAP (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
106 #define CMUA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
107 #define CMUAP (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
108 #define MA (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
109 #define MAP (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
110 #define CMA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
111
112 #define OFFSET_MASK 0x00ffff
113 #define F_NO8 0x010000
114 #define F_NO16 0x020000
115 #define F_NOMATCH 0x040000
116 #define F_DIFF 0x080000
117 #define F_FORCECONV 0x100000
118
119 struct regression_test_case {
120 int flags;
121 int start_offset;
122 const char *pattern;
123 const char *input;
124 };
125
126 static struct regression_test_case regression_test_cases[] = {
127 /* Constant strings. */
128 { MUA, 0, "AbC", "AbAbC" },
129 { MUA, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
130 { CMUA, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
131 { MA, 0, "[^a]", "aAbB" },
132 { CMA, 0, "[^m]", "mMnN" },
133 { MA, 0, "a[^b][^#]", "abacd" },
134 { CMA, 0, "A[^B][^E]", "abacd" },
135 { CMUA, 0, "[^x][^#]", "XxBll" },
136 { MUA, 0, "[^a]", "aaa\xc3\xa1#Ab" },
137 { CMUA, 0, "[^A]", "aA\xe6\x92\xad" },
138 { MUA, 0, "\\W(\\W)?\\w", "\r\n+bc" },
139 { MUA, 0, "\\W(\\W)?\\w", "\n\r+bc" },
140 { MUA, 0, "\\W(\\W)?\\w", "\r\r+bc" },
141 { MUA, 0, "\\W(\\W)?\\w", "\n\n+bc" },
142 { MUA, 0, "[axd]", "sAXd" },
143 { CMUA, 0, "[axd]", "sAXd" },
144 { CMUA, 0 | F_NOMATCH, "[^axd]", "DxA" },
145 { MUA, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
146 { MUA, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
147 { CMUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
148 { MUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
149 { MUA, 0, "[^a]", "\xc2\x80[]" },
150 { CMUA, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
151 { CMA, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
152 { PCRE_CASELESS, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
153 { PCRE_CASELESS, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
154 { PCRE_CASELESS, 0, "a1", "Aa1" },
155 { MA, 0, "\\Ca", "cda" },
156 { CMA, 0, "\\Ca", "CDA" },
157 { MA, 0 | F_NOMATCH, "\\Cx", "cda" },
158 { CMA, 0 | F_NOMATCH, "\\Cx", "CDA" },
159 { CMUAP, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
160 { CMUAP, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
161 { CMUAP, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
162 { CMUAP, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
163
164 /* Assertions. */
165 { MUA, 0, "\\b[^A]", "A_B#" },
166 { MA, 0 | F_NOMATCH, "\\b\\W", "\n*" },
167 { MUA, 0, "\\B[^,]\\b[^s]\\b", "#X" },
168 { MAP, 0, "\\B", "_\xa1" },
169 { MAP, 0, "\\b_\\b[,A]\\B", "_," },
170 { MUAP, 0, "\\b", "\xe6\x92\xad!" },
171 { MUAP, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
172 { MUAP, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
173 { MUAP, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
174 { MUA, 0 | F_NOMATCH, "\\b.", "\xcd\xbe" },
175 { CMUAP, 0, "\\By", "\xf0\x90\x90\xa8y" },
176 { MA, 0 | F_NOMATCH, "\\R^", "\n" },
177 { MA, 1 | F_NOMATCH, "^", "\n" },
178 { 0, 0, "^ab", "ab" },
179 { 0, 0 | F_NOMATCH, "^ab", "aab" },
180 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "^a", "\r\raa\n\naa\r\naa" },
181 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
182 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^-", "a--b--\x85--" },
183 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xe2\x80\xa8--" },
184 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xc2\x85--" },
185 { 0, 0, "ab$", "ab" },
186 { 0, 0 | F_NOMATCH, "ab$", "ab\r\n" },
187 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "a$", "\r\raa\n\naa\r\naa" },
188 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aaa" },
189 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "#$", "#\xc2\x85###\r#" },
190 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "#$", "#\xe2\x80\xa9" },
191 { PCRE_NOTBOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "^a", "aa\naa" },
192 { PCRE_NOTBOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },
193 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\naa" },
194 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\r\n" },
195 { PCRE_UTF8 | PCRE_DOLLAR_ENDONLY | PCRE_NEWLINE_ANY, 0, "\\p{Any}{2,}$", "aa\r\n" },
196 { PCRE_NOTEOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },
197 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa" },
198 { PCRE_NEWLINE_CR | PCRE_UTF8, 0, "a\\Z", "aaa\r" },
199 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa\n" },
200 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r" },
201 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
202 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r\n" },
203 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
204 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
205 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
206 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
207 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
208 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
209 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
210 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
211 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
212 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xc2\x85" },
213 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
214 { MA, 0, "\\Aa", "aaa" },
215 { MA, 1 | F_NOMATCH, "\\Aa", "aaa" },
216 { MA, 1, "\\Ga", "aaa" },
217 { MA, 1 | F_NOMATCH, "\\Ga", "aba" },
218 { MA, 0, "a\\z", "aaa" },
219 { MA, 0 | F_NOMATCH, "a\\z", "aab" },
220
221 /* Brackets. */
222 { MUA, 0, "(ab|bb|cd)", "bacde" },
223 { MUA, 0, "(?:ab|a)(bc|c)", "ababc" },
224 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
225 { CMUA, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
226 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
227 { MUA, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
228
229 /* Greedy and non-greedy ? operators. */
230 { MUA, 0, "(?:a)?a", "laab" },
231 { CMUA, 0, "(A)?A", "llaab" },
232 { MUA, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
233 { MUA, 0, "(a)?a", "manm" },
234 { CMUA, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
235 { MUA, 0, "(a|b)?\?d((?:e)?)", "abcde" },
236 { MUA, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
237
238 /* Greedy and non-greedy + operators */
239 { MUA, 0, "(aa)+aa", "aaaaaaa" },
240 { MUA, 0, "(aa)+?aa", "aaaaaaa" },
241 { MUA, 0, "(?:aba|ab|a)+l", "ababamababal" },
242 { MUA, 0, "(?:aba|ab|a)+?l", "ababamababal" },
243 { MUA, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
244 { MUA, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
245 { MUA, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
246
247 /* Greedy and non-greedy * operators */
248 { CMUA, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
249 { MUA, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
250 { MUA, 0, "(aa|ab)*ab", "aaabaaab" },
251 { CMUA, 0, "(aa|Ab)*?aB", "aaabaaab" },
252 { MUA, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
253 { MUA, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
254 { MA, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
255 { MA, 0, "((?:a|)*){0}a", "a" },
256
257 /* Combining ? + * operators */
258 { MUA, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
259 { MUA, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
260 { MUA, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
261 { MUA, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
262 { MUA, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
263
264 /* Single character iterators. */
265 { MUA, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
266 { MUA, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
267 { MUA, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
268 { MUA, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
269 { MUA, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
270 { MUA, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
271 { MUA, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
272 { MUA, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
273 { MUA, 0, "(ba{2})+c", "baabaaabacbaabaac" },
274 { MUA, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
275 { MUA, 0, "(a?+[^b])+", "babaacacb" },
276 { MUA, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
277 { CMUA, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
278 { CMUA, 0, "[c-f]+k", "DemmFke" },
279 { MUA, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
280 { MUA, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
281 { CMUA, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
282 { CMUA, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
283 { CMUA, 0, "[ace]{3,}", "AcbDAcEEcEd" },
284 { CMUA, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
285 { MUA, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
286 { CMUA, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
287 { MUA, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
288 { MUA, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
289 { MUA, 0, "\\b\\w+\\B", "x,a_cd" },
290 { MUAP, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
291 { CMUA, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
292 { CMUAP, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" },
293 { CMUA, 0, "[^\xf0\x90\x90\x80]{2,4}@", "\xf0\x90\x90\xa8\xf0\x90\x90\x80###\xf0\x90\x90\x80@@@" },
294 { CMUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
295 { MUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
296 { MUA, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" },
297
298 /* Basic character sets. */
299 { MUA, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
300 { MUA, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
301 { MUA, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
302 { MUA, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
303 { MUA, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
304 { MUA, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
305
306 /* Unicode properties. */
307 { MUAP, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
308 { MUAP, 0, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
309 { MUAP, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
310 { MUAP, 0 | F_NOMATCH, "[\\P{Any}]", "abc" },
311 { MUAP, 0 | F_NOMATCH, "[^\\p{Any}]", "abc" },
312 { MUAP, 0 | F_NOMATCH, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
313 { MUAP, 0 | F_NOMATCH, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
314 { MUAP, 0 | F_NOMATCH, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
315 { MUAP, 0 | F_NOMATCH, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
316 { MUAP, 0, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
317 { MUAP, 0, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
318 { MUAP, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
319 { CMUAP, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
320 { MUAP, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
321 { MUAP, 0, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
322 { MUA, 0, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
323 { CMUAP, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
324 { MUAP, 0, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
325 { MUAP, 0, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
326 { PCRE_UCP, 0, "[a-b\\s]{2,5}[^a]", "AB baaa" },
327
328 /* Possible empty brackets. */
329 { MUA, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
330 { MUA, 0, "(|ab||bc|a)+d", "abcxabcabd" },
331 { MUA, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
332 { MUA, 0, "(|ab||bc|a)*d", "abcxabcabd" },
333 { MUA, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
334 { MUA, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
335 { MUA, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
336 { MUA, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
337 { MUA, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
338 { MUA, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
339
340 /* Start offset. */
341 { MUA, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
342 { MUA, 4 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
343 { MUA, 2 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
344 { MUA, 1, "(\\w\\W\\w)+", "ab#d" },
345
346 /* Newline. */
347 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
348 { PCRE_MULTILINE | PCRE_NEWLINE_CR, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
349 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{1,3}[^#]", "\r\n##...." },
350
351 /* Any character except newline or any newline. */
352 { PCRE_NEWLINE_CRLF, 0, ".", "\r" },
353 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
354 { PCRE_NEWLINE_ANYCRLF, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
355 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
356 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
357 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0 | F_NOMATCH, ".(.).", "\xe2\x80\xa8\nb\r" },
358 { PCRE_NEWLINE_ANY, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
359 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
360 { PCRE_BSR_ANYCRLF, 0, "\\R", "\r" },
361 { PCRE_BSR_ANYCRLF, 0, "\\R", "\x85#\r\n#" },
362 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\xe2\x80\xa8#c" },
363 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\r\nc" },
364 { PCRE_NEWLINE_CRLF | PCRE_BSR_UNICODE | PCRE_UTF8, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
365 { MUA, 0 | F_NOMATCH, "\\R+", "ab" },
366 { MUA, 0, "\\R+", "ab\r\n\r" },
367 { MUA, 0, "\\R*", "ab\r\n\r" },
368 { MUA, 0, "\\R*", "\r\n\r" },
369 { MUA, 0, "\\R{2,4}", "\r\nab\r\r" },
370 { MUA, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
371 { MUA, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
372 { MUA, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
373 { MUA, 0 | F_NOMATCH, "\\R+\\R\\R", "\r\n\r\n" },
374 { MUA, 0, "\\R+\\R\\R", "\r\r\r" },
375 { MUA, 0, "\\R*\\R\\R", "\n\r" },
376 { MUA, 0 | F_NOMATCH, "\\R{2,4}\\R\\R", "\r\r\r" },
377 { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
378
379 /* Atomic groups (no fallback from "next" direction). */
380 { MUA, 0 | F_NOMATCH, "(?>ab)ab", "bab" },
381 { MUA, 0 | F_NOMATCH, "(?>(ab))ab", "bab" },
382 { MUA, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
383 "bababcdedefgheijijklmlmnop" },
384 { MUA, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
385 { MUA, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
386 { MUA, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
387 { MUA, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
388 { MUA, 0, "((?>a|)+?)b", "aaacaaab" },
389 { MUA, 0, "(?>x|)*$", "aaa" },
390 { MUA, 0, "(?>(x)|)*$", "aaa" },
391 { MUA, 0, "(?>x|())*$", "aaa" },
392 { MUA, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
393 { MUA, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
394 { MUA, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
395 { MUA, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
396 { MUA, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
397 { MUA, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
398 { MUA, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
399 { MUA, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
400 { MUA, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
401 { MUA, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
402 { MUA, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
403 { MUA, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
404 { MUA, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
405 { MUA, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
406 { CMA, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
407 { MUA, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
408 { MUA, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
409 { MUA, 0 | F_NOMATCH, "\\X", "\xcc\x8d\xcc\x8d" },
410 { MUA, 0, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
411 { MUA, 0, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
412 { MUA, 0, "\\X{2,4}", "abcdef" },
413 { MUA, 0, "\\X{2,4}?", "abcdef" },
414 { MUA, 0 | F_NOMATCH, "\\X{2,4}..", "#\xcc\x8d##" },
415 { MUA, 0, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
416 { MUA, 0, "(c(ab)?+ab)+", "cabcababcab" },
417 { MUA, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
418
419 /* Possessive quantifiers. */
420 { MUA, 0, "(?:a|b)++m", "mababbaaxababbaam" },
421 { MUA, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
422 { MUA, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
423 { MUA, 0, "(a|b)++m", "mababbaaxababbaam" },
424 { MUA, 0, "(a|b)*+m", "mababbaaxababbaam" },
425 { MUA, 0, "(a|b)*+m", "ababbaaxababbaam" },
426 { MUA, 0, "(a|b(*ACCEPT))++m", "maaxab" },
427 { MUA, 0, "(?:b*)++m", "bxbbxbbbxm" },
428 { MUA, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
429 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxm" },
430 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
431 { MUA, 0, "(b*)++m", "bxbbxbbbxm" },
432 { MUA, 0, "(b*)++m", "bxbbxbbbxbbm" },
433 { MUA, 0, "(b*)*+m", "bxbbxbbbxm" },
434 { MUA, 0, "(b*)*+m", "bxbbxbbbxbbm" },
435 { MUA, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
436 { MUA, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
437 { MUA, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
438 { MUA, 0, "(a|(b))++m", "mababbaaxababbaam" },
439 { MUA, 0, "((a)|b)*+m", "mababbaaxababbaam" },
440 { MUA, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
441 { MUA, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
442 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxm" },
443 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
444 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
445 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
446 { MUA, 0, "((b*))++m", "bxbbxbbbxm" },
447 { MUA, 0, "((b*))++m", "bxbbxbbbxbbm" },
448 { MUA, 0, "((b*))*+m", "bxbbxbbbxm" },
449 { MUA, 0, "((b*))*+m", "bxbbxbbbxbbm" },
450 { MUA, 0 | F_NOMATCH, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
451 { MUA, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
452 { MUA, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
453 { MUA, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
454 { MUA, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
455
456 /* Back references. */
457 { MUA, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
458 { CMUA, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
459 { CMA, 0, "(a{2,4})\\1", "AaAaaAaA" },
460 { MUA, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
461 { MUA, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
462 { MUA, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
463 { MUA, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
464 { MUA, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
465 { MUA, 0, "(?:(aa)|b)\\1?b", "bb" },
466 { CMUA, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
467 { MUA, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
468 { CMUA, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
469 { MUA, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
470 { CMA, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
471 { MUA, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
472 { MUA, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
473 { MA, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
474 { MUAP, 0, "(\\P{N})\\1{2,}", ".www." },
475 { MUAP, 0, "(\\P{N})\\1{0,2}", "wwwww." },
476 { MUAP, 0, "(\\P{N})\\1{1,2}ww", "wwww" },
477 { MUAP, 0, "(\\P{N})\\1{1,2}ww", "wwwww" },
478 { PCRE_UCP, 0, "(\\P{N})\\1{2,}", ".www." },
479 { CMUAP, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" },
480
481 /* Assertions. */
482 { MUA, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
483 { MUA, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
484 { MUA, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
485 { MUA, 0, "(?<=aaa|aa|a)a", "aaa" },
486 { MUA, 2, "(?<=aaa|aa|a)a", "aaa" },
487 { MA, 0, "(?<=aaa|aa|a)a", "aaa" },
488 { MA, 2, "(?<=aaa|aa|a)a", "aaa" },
489 { MUA, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
490 { MUA, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
491 { MUA, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
492 { MUA, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
493 { MUA, 0, "((?(?=(a))a)+k)", "bbak" },
494 { MUA, 0, "((?(?=a)a)+k)", "bbak" },
495 { MUA, 0 | F_NOMATCH, "(?=(?>(a))m)amk", "a k" },
496 { MUA, 0 | F_NOMATCH, "(?!(?>(a))m)amk", "a k" },
497 { MUA, 0 | F_NOMATCH, "(?>(?=(a))am)amk", "a k" },
498 { MUA, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
499 { MUA, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
500 { MUA, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
501 { MUA, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
502 { MUA, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
503 { MUA, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
504 { MUA, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
505 { MUA, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
506
507 /* Not empty, ACCEPT, FAIL */
508 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*", "bcx" },
509 { MUA | PCRE_NOTEMPTY, 0, "a*", "bcaad" },
510 { MUA | PCRE_NOTEMPTY, 0, "a*?", "bcaad" },
511 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
512 { MUA, 0, "a(*ACCEPT)b", "ab" },
513 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*(*ACCEPT)b", "bcx" },
514 { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
515 { MUA | PCRE_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
516 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "(?:z|a*(*ACCEPT)b)", "bcx" },
517 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
518 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
519 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
520 { MUA | PCRE_NOTEMPTY_ATSTART, 0 | F_NOMATCH, "a*(*ACCEPT)b", "" },
521 { MUA, 0, "((a(*ACCEPT)b))", "ab" },
522 { MUA, 0, "(a(*FAIL)a|a)", "aaa" },
523 { MUA, 0, "(?=ab(*ACCEPT)b)a", "ab" },
524 { MUA, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
525 { MUA, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
526 { MUA | PCRE_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
527
528 /* Conditional blocks. */
529 { MUA, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
530 { MUA, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
531 { MUA, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
532 { MUA, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
533 { MUA, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
534 { MUA, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
535 { MUA, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
536 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
537 { MUA, 0 | F_DIFF, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
538 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
539 { MUA, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
540 { MUA, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
541 { MUA, 0, "(?(?=a)ab)", "a" },
542 { MUA, 0, "(?(?<!b)c)", "b" },
543 { MUA, 0, "(?(DEFINE)a(b))", "a" },
544 { MUA, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
545 { MUA, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
546 { MUA, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
547 { MUA, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
548 { MUA, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
549 { MUA, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
550 { MUA, 0, "(c)?\?(?(1)a|b)", "cbb" },
551 { MUA, 0 | F_DIFF, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
552 { MUA, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
553 { MUA, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
554 { MUA, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
555 { MUA, 0 | F_DIFF, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
556 { MUA, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
557 { MUA, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
558 { MUA, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
559 { MUA, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
560 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)*l", "bc ddd abccabccl" },
561 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+?dd", "bcabcacdb bdddd" },
562 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+l", "ababccddabdbccd abcccl" },
563
564 /* Set start of match. */
565 { MUA, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
566 { MUA, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
567 { MUA, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
568 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a\\K(*ACCEPT)b", "aa" },
569 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
570
571 /* First line. */
572 { MUA | PCRE_FIRSTLINE, 0, "\\p{Any}a", "bb\naaa" },
573 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "\\p{Any}a", "bb\r\naaa" },
574 { MUA | PCRE_FIRSTLINE, 0, "(?<=a)", "a" },
575 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[^a][^b]", "ab" },
576 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "a", "\na" },
577 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[abc]", "\na" },
578 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^a", "\na" },
579 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^(?<=\n)", "\na" },
580 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\xc2\x85#" },
581 { PCRE_MULTILINE | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\x85#" },
582 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^#", "\xe2\x80\xa8#" },
583 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "\\p{Any}", "\r\na" },
584 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, ".", "\r" },
585 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "a", "\ra" },
586 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH, "ba", "bbb\r\nba" },
587 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH, "\\p{Any}{4}|a", "\r\na" },
588 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 1, ".", "\r\n" },
589
590 /* Recurse. */
591 { MUA, 0, "(a)(?1)", "aa" },
592 { MUA, 0, "((a))(?1)", "aa" },
593 { MUA, 0, "(b|a)(?1)", "aa" },
594 { MUA, 0, "(b|(a))(?1)", "aa" },
595 { MUA, 0 | F_NOMATCH, "((a)(b)(?:a*))(?1)", "aba" },
596 { MUA, 0, "((a)(b)(?:a*))(?1)", "abab" },
597 { MUA, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
598 { MUA, 0, "((?2)b|(a)){2}(?1)", "aabab" },
599 { MUA, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
600 { MUA, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
601 { MUA, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
602 { MUA, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
603 { MUA, 0 | F_NOMATCH, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
604 { MUA, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
605 { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
606 { MUA, 0 | F_NOMATCH, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
607 { MUA, 0, "b|<(?R)*>", "<<b>" },
608 { MUA, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
609 { MUA, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
610 { MUA, 0, "(a)((?(R)a|b))(?2)", "aabbabaa" },
611 { MUA, 0, "(a)((?(R2)a|b))(?2)", "aabbabaa" },
612 { MUA, 0, "(a)((?(R1)a|b))(?2)", "ababba" },
613 { MUA, 0, "(?(R0)aa|bb(?R))", "abba aabb bbaa" },
614 { MUA, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" },
615 { MUA, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" },
616
617 /* 16 bit specific tests. */
618 { CMA, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" },
619 { CMA, 0 | F_FORCECONV, "\xe1\xbd\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
620 { CMA, 0 | F_FORCECONV, "[\xc3\xa1]", "\xc3\x81\xc3\xa1" },
621 { CMA, 0 | F_FORCECONV, "[\xe1\xbd\xb8]", "\xe1\xbf\xb8\xe1\xbd\xb8" },
622 { CMA, 0 | F_FORCECONV, "[a-\xed\xb0\x80]", "A" },
623 { CMA, 0 | F_NO8 | F_FORCECONV, "[a-\\x{dc00}]", "B" },
624 { CMA, 0 | F_NO8 | F_NOMATCH | F_FORCECONV, "[b-\\x{dc00}]", "a" },
625 { CMA, 0 | F_NO8 | F_FORCECONV, "\xed\xa0\x80\\x{d800}\xed\xb0\x80\\x{dc00}", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80" },
626 { CMA, 0 | F_NO8 | F_FORCECONV, "[\xed\xa0\x80\\x{d800}]{1,2}?[\xed\xb0\x80\\x{dc00}]{1,2}?#", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80#" },
627 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80\xed\xb0\x80#]{0,3}(?<=\xed\xb0\x80.)", "\xed\xa0\x80#\xed\xa0\x80##\xed\xb0\x80\xed\xa0\x80" },
628 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\x9f\xbf\xed\xa0\x83" },
629 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\xb4\x80\xed\xb3\xb0" },
630 { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" },
631 { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" },
632 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" },
633 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80][\xed\xb0\x80]{2,}", "\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" },
634 { MA, 0 | F_FORCECONV, "[^\xed\xb0\x80]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
635 { MA, 0 | F_NO8 | F_FORCECONV, "[^\\x{dc00}]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
636 { CMA, 0 | F_FORCECONV, ".\\B.", "\xed\xa0\x80\xed\xb0\x80" },
637 { CMA, 0 | F_FORCECONV, "\\D+(?:\\d+|.)\\S+(?:\\s+|.)\\W+(?:\\w+|.)\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80" },
638 { CMA, 0 | F_FORCECONV, "\\d*\\s*\\w*\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80" },
639 { CMA, 0 | F_FORCECONV | F_NOMATCH, "\\d*?\\D*?\\s*?\\S*?\\w*?\\W*?##", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80#" },
640 { CMA | PCRE_EXTENDED, 0 | F_FORCECONV, "\xed\xa0\x80 \xed\xb0\x80 !", "\xed\xa0\x80\xed\xb0\x80!" },
641 { CMA, 0 | F_FORCECONV, "\xed\xa0\x80+#[^#]+\xed\xa0\x80", "\xed\xa0\x80#a\xed\xa0\x80" },
642 { CMA, 0 | F_FORCECONV, "(\xed\xa0\x80+)#\\1", "\xed\xa0\x80\xed\xa0\x80#\xed\xa0\x80\xed\xa0\x80" },
643
644 /* Deep recursion. */
645 { MUA, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
646 { MUA, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
647 { MUA, 0, "((a?)+)+b", "aaaaaaaaaaaaa b" },
648
649 /* Deep recursion: Stack limit reached. */
650 { MA, 0 | F_NOMATCH, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
651 { MA, 0 | F_NOMATCH, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
652 { MA, 0 | F_NOMATCH, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
653 { MA, 0 | F_NOMATCH, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
654 { MA, 0 | F_NOMATCH, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
655
656 { 0, 0, NULL, NULL }
657 };
658
659 static const unsigned char *tables(int mode)
660 {
661 /* The purpose of this function to allow valgrind
662 for reporting invalid reads and writes. */
663 static unsigned char *tables_copy;
664 pcre *regex;
665 const char *errorptr;
666 int erroroffset;
667 const unsigned char *default_tables;
668 #ifdef SUPPORT_PCRE8
669 char null_str[1] = { 0 };
670 #else
671 PCRE_SCHAR16 null_str[1] = { 0 };
672 #endif
673
674 if (mode) {
675 if (tables_copy)
676 free(tables_copy);
677 tables_copy = NULL;
678 return NULL;
679 }
680
681 if (tables_copy)
682 return tables_copy;
683
684 default_tables = NULL;
685 #ifdef SUPPORT_PCRE8
686 regex = pcre_compile(null_str, 0, &errorptr, &erroroffset, NULL);
687 if (regex) {
688 pcre_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
689 pcre_free(regex);
690 }
691 #else
692 regex = pcre16_compile(null_str, 0, &errorptr, &erroroffset, NULL);
693 if (regex) {
694 pcre16_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
695 pcre16_free(regex);
696 }
697 #endif
698 /* Shouldn't ever happen. */
699 if (!default_tables)
700 return NULL;
701
702 /* Unfortunately this value cannot get from pcre_fullinfo.
703 Since this is a test program, this is acceptable at the moment. */
704 tables_copy = (unsigned char *)malloc(1088);
705 if (!tables_copy)
706 return NULL;
707
708 memcpy(tables_copy, default_tables, 1088);
709 return tables_copy;
710 }
711
712 static pcre_jit_stack* callback(void *arg)
713 {
714 return (pcre_jit_stack *)arg;
715 }
716
717 static void setstack(pcre_extra *extra)
718 {
719 static pcre_jit_stack *stack;
720
721 if (!extra) {
722 if (stack)
723 pcre_jit_stack_free(stack);
724 stack = NULL;
725 return;
726 }
727
728 if (!stack)
729 stack = pcre_jit_stack_alloc(1, 1024 * 1024);
730 /* Extra can be NULL. */
731 pcre_assign_jit_stack(extra, callback, stack);
732 }
733
734 #ifdef SUPPORT_PCRE16
735
736 static int convert_utf8_to_utf16(const char *input, PCRE_SCHAR16 *output, int *offsetmap, int max_length)
737 {
738 unsigned char *iptr = (unsigned char*)input;
739 unsigned short *optr = (unsigned short *)output;
740 unsigned int c;
741
742 if (max_length == 0)
743 return 0;
744
745 while (*iptr && max_length > 1) {
746 c = 0;
747 if (offsetmap)
748 *offsetmap++ = (int)(iptr - (unsigned char*)input);
749
750 if (!(*iptr & 0x80))
751 c = *iptr++;
752 else if (!(*iptr & 0x20)) {
753 c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
754 iptr += 2;
755 } else if (!(*iptr & 0x10)) {
756 c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
757 iptr += 3;
758 } else if (!(*iptr & 0x08)) {
759 c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
760 iptr += 4;
761 }
762
763 if (c < 65536) {
764 *optr++ = c;
765 max_length--;
766 } else if (max_length <= 2) {
767 *optr = '\0';
768 return (int)(optr - (unsigned short *)output);
769 } else {
770 c -= 0x10000;
771 *optr++ = 0xd800 | ((c >> 10) & 0x3ff);
772 *optr++ = 0xdc00 | (c & 0x3ff);
773 max_length -= 2;
774 if (offsetmap)
775 offsetmap++;
776 }
777 }
778 if (offsetmap)
779 *offsetmap = (int)(iptr - (unsigned char*)input);
780 *optr = '\0';
781 return (int)(optr - (unsigned short *)output);
782 }
783
784 static int copy_char8_to_char16(const char *input, PCRE_SCHAR16 *output, int max_length)
785 {
786 unsigned char *iptr = (unsigned char*)input;
787 unsigned short *optr = (unsigned short *)output;
788
789 if (max_length == 0)
790 return 0;
791
792 while (*iptr && max_length > 1) {
793 *optr++ = *iptr++;
794 max_length--;
795 }
796 *optr = '\0';
797 return (int)(optr - (unsigned short *)output);
798 }
799
800 #define REGTEST_MAX_LENGTH 4096
801 static PCRE_SCHAR16 regtest_buf[REGTEST_MAX_LENGTH];
802 static int regtest_offsetmap[REGTEST_MAX_LENGTH];
803
804 #endif /* SUPPORT_PCRE16 */
805
806 static int regression_tests(void)
807 {
808 struct regression_test_case *current = regression_test_cases;
809 const char *error;
810 int i, err_offs, is_successful;
811 int total = 0;
812 int successful = 0;
813 int counter = 0;
814 #ifdef SUPPORT_PCRE8
815 pcre *re8;
816 pcre_extra *extra8;
817 int ovector8_1[32];
818 int ovector8_2[32];
819 int return_value8_1, return_value8_2;
820 int utf8 = 0, ucp8 = 0;
821 int disabled_flags8 = 0;
822 #endif
823 #ifdef SUPPORT_PCRE16
824 pcre *re16;
825 pcre_extra *extra16;
826 int ovector16_1[32];
827 int ovector16_2[32];
828 int return_value16_1, return_value16_2;
829 int utf16 = 0, ucp16 = 0;
830 int disabled_flags16 = 0;
831 int length16;
832 #endif
833
834 /* This test compares the behaviour of interpreter and JIT. Although disabling
835 utf or ucp may make tests fail, if the pcre_exec result is the SAME, it is
836 still considered successful from pcre_jit_test point of view. */
837
838 printf("Running JIT regression\n");
839
840 #ifdef SUPPORT_PCRE8
841 pcre_config(PCRE_CONFIG_UTF8, &utf8);
842 pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp8);
843 if (!utf8)
844 disabled_flags8 |= PCRE_UTF8;
845 if (!ucp8)
846 disabled_flags8 |= PCRE_UCP;
847 printf(" in 8 bit mode with utf8 %s and ucp %s:\n", utf8 ? "enabled" : "disabled", ucp8 ? "enabled" : "disabled");
848 #endif
849 #ifdef SUPPORT_PCRE16
850 pcre16_config(PCRE_CONFIG_UTF16, &utf16);
851 pcre16_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp16);
852 if (!utf16)
853 disabled_flags16 |= PCRE_UTF8;
854 if (!ucp16)
855 disabled_flags16 |= PCRE_UCP;
856 printf(" in 16 bit mode with utf16 %s and ucp %s:\n", utf16 ? "enabled" : "disabled", ucp16 ? "enabled" : "disabled");
857 #endif
858
859 while (current->pattern) {
860 /* printf("\nPattern: %s :\n", current->pattern); */
861 total++;
862
863 error = NULL;
864 #ifdef SUPPORT_PCRE8
865 re8 = NULL;
866 if (!(current->start_offset & F_NO8))
867 re8 = pcre_compile(current->pattern,
868 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags8),
869 &error, &err_offs, tables(0));
870
871 extra8 = NULL;
872 if (re8) {
873 error = NULL;
874 extra8 = pcre_study(re8, PCRE_STUDY_JIT_COMPILE, &error);
875 if (!extra8) {
876 printf("\n8 bit: Cannot study pattern: %s\n", current->pattern);
877 pcre_free(re8);
878 re8 = NULL;
879 }
880 if (!(extra8->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
881 printf("\n8 bit: JIT compiler does not support: %s\n", current->pattern);
882 pcre_free_study(extra8);
883 pcre_free(re8);
884 re8 = NULL;
885 }
886 } else if (utf8 && ucp8 && !(current->start_offset & F_NO8))
887 printf("\n8 bit: Cannot compile pattern: %s\n", current->pattern);
888 #endif
889 #ifdef SUPPORT_PCRE16
890 if ((current->flags & PCRE_UTF8) || (current->start_offset & F_FORCECONV))
891 convert_utf8_to_utf16(current->pattern, regtest_buf, NULL, REGTEST_MAX_LENGTH);
892 else
893 copy_char8_to_char16(current->pattern, regtest_buf, REGTEST_MAX_LENGTH);
894
895 re16 = NULL;
896 if (!(current->start_offset & F_NO16))
897 re16 = pcre16_compile(regtest_buf,
898 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags16),
899 &error, &err_offs, tables(0));
900
901 extra16 = NULL;
902 if (re16) {
903 error = NULL;
904 extra16 = pcre16_study(re16, PCRE_STUDY_JIT_COMPILE, &error);
905 if (!extra16) {
906 printf("\n16 bit: Cannot study pattern: %s\n", current->pattern);
907 pcre16_free(re16);
908 re16 = NULL;
909 }
910 if (!(extra16->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
911 printf("\n16 bit: JIT compiler does not support: %s\n", current->pattern);
912 pcre16_free_study(extra16);
913 pcre16_free(re16);
914 re16 = NULL;
915 }
916 } else if (utf16 && ucp16 && !(current->start_offset & F_NO16))
917 printf("\n16 bit: Cannot compile pattern: %s\n", current->pattern);
918 #endif
919
920 counter++;
921 if ((counter & 0x3) != 0)
922 setstack(NULL);
923
924 #ifdef SUPPORT_PCRE8
925 return_value8_1 = -1000;
926 return_value8_2 = -1000;
927 for (i = 0; i < 32; ++i)
928 ovector8_1[i] = -2;
929 for (i = 0; i < 32; ++i)
930 ovector8_2[i] = -2;
931 if (re8) {
932 setstack(extra8);
933 return_value8_1 = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
934 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector8_1, 32);
935 return_value8_2 = pcre_exec(re8, NULL, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
936 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector8_2, 32);
937 }
938 #endif
939
940 #ifdef SUPPORT_PCRE16
941 return_value16_1 = -1000;
942 return_value16_2 = -1000;
943 for (i = 0; i < 32; ++i)
944 ovector16_1[i] = -2;
945 for (i = 0; i < 32; ++i)
946 ovector16_2[i] = -2;
947 if (re16) {
948 setstack(extra16);
949 if ((current->flags & PCRE_UTF8) || (current->start_offset & F_FORCECONV))
950 length16 = convert_utf8_to_utf16(current->input, regtest_buf, regtest_offsetmap, REGTEST_MAX_LENGTH);
951 else
952 length16 = copy_char8_to_char16(current->input, regtest_buf, REGTEST_MAX_LENGTH);
953 return_value16_1 = pcre16_exec(re16, extra16, regtest_buf, length16, current->start_offset & OFFSET_MASK,
954 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector16_1, 32);
955 return_value16_2 = pcre16_exec(re16, NULL, regtest_buf, length16, current->start_offset & OFFSET_MASK,
956 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector16_2, 32);
957 }
958 #endif
959
960 /* If F_DIFF is set, just run the test, but do not compare the results.
961 Segfaults can still be captured. */
962
963 is_successful = 1;
964 if (!(current->start_offset & F_DIFF)) {
965 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
966 if (utf8 == utf16 && !(current->start_offset & F_FORCECONV)) {
967 /* All results must be the same. */
968 if (return_value8_1 != return_value8_2 || return_value8_1 != return_value16_1 || return_value8_1 != return_value16_2) {
969 printf("\n8 and 16 bit: Return value differs(%d:%d:%d:%d): [%d] '%s' @ '%s'\n",
970 return_value8_1, return_value8_2, return_value16_1, return_value16_2,
971 total, current->pattern, current->input);
972 is_successful = 0;
973 } else if (return_value8_1 >= 0) {
974 return_value8_1 *= 2;
975 /* Transform back the results. */
976 if (current->flags & PCRE_UTF8) {
977 for (i = 0; i < return_value8_1; ++i) {
978 if (ovector16_1[i] >= 0)
979 ovector16_1[i] = regtest_offsetmap[ovector16_1[i]];
980 if (ovector16_2[i] >= 0)
981 ovector16_2[i] = regtest_offsetmap[ovector16_2[i]];
982 }
983 }
984
985 for (i = 0; i < return_value8_1; ++i)
986 if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
987 printf("\n8 and 16 bit: Ovector[%d] value differs(%d:%d:%d:%d): [%d] '%s' @ '%s' \n",
988 i, ovector8_1[i], ovector8_2[i], ovector16_1[i], ovector16_2[i],
989 total, current->pattern, current->input);
990 is_successful = 0;
991 }
992 }
993 } else {
994 #endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */
995 /* Only the 8 bit and 16 bit results must be equal. */
996 #ifdef SUPPORT_PCRE8
997 if (return_value8_1 != return_value8_2) {
998 printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
999 return_value8_1, return_value8_2, total, current->pattern, current->input);
1000 is_successful = 0;
1001 } else if (return_value8_1 >= 0) {
1002 return_value8_1 *= 2;
1003 for (i = 0; i < return_value8_1; ++i)
1004 if (ovector8_1[i] != ovector8_2[i]) {
1005 printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1006 i, ovector8_1[i], ovector8_2[i], total, current->pattern, current->input);
1007 is_successful = 0;
1008 }
1009 }
1010 #endif
1011
1012 #ifdef SUPPORT_PCRE16
1013 if (return_value16_1 != return_value16_2) {
1014 printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1015 return_value16_1, return_value16_2, total, current->pattern, current->input);
1016 is_successful = 0;
1017 } else if (return_value16_1 >= 0) {
1018 return_value16_1 *= 2;
1019 for (i = 0; i < return_value16_1; ++i)
1020 if (ovector16_1[i] != ovector16_2[i]) {
1021 printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1022 i, ovector16_1[i], ovector16_2[i], total, current->pattern, current->input);
1023 is_successful = 0;
1024 }
1025 }
1026 #endif
1027
1028 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1029 }
1030 #endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */
1031 }
1032
1033 if (is_successful) {
1034 #ifdef SUPPORT_PCRE8
1035 if (!(current->start_offset & F_NO8)) {
1036 if (return_value8_1 < 0 && !(current->start_offset & F_NOMATCH)) {
1037 printf("8 bit: Test should match: [%d] '%s' @ '%s'\n",
1038 total, current->pattern, current->input);
1039 is_successful = 0;
1040 }
1041
1042 if (return_value8_1 >= 0 && (current->start_offset & F_NOMATCH)) {
1043 printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n",
1044 total, current->pattern, current->input);
1045 is_successful = 0;
1046 }
1047 }
1048 #endif
1049 #ifdef SUPPORT_PCRE16
1050 if (!(current->start_offset & F_NO16)) {
1051 if (return_value16_1 < 0 && !(current->start_offset & F_NOMATCH)) {
1052 printf("16 bit: Test should match: [%d] '%s' @ '%s'\n",
1053 total, current->pattern, current->input);
1054 is_successful = 0;
1055 }
1056
1057 if (return_value16_1 >= 0 && (current->start_offset & F_NOMATCH)) {
1058 printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n",
1059 total, current->pattern, current->input);
1060 is_successful = 0;
1061 }
1062 }
1063 #endif
1064 }
1065
1066 if (is_successful)
1067 successful++;
1068
1069 #ifdef SUPPORT_PCRE8
1070 if (re8) {
1071 pcre_free_study(extra8);
1072 pcre_free(re8);
1073 }
1074 #endif
1075 #ifdef SUPPORT_PCRE16
1076 if (re16) {
1077 pcre16_free_study(extra16);
1078 pcre16_free(re16);
1079 }
1080 #endif
1081
1082 /* printf("[%d-%d|%d-%d]%s", ovector8_1[0], ovector8_1[1], ovector16_1[0], ovector16_1[1], (current->flags & PCRE_CASELESS) ? "C" : ""); */
1083 printf(".");
1084 fflush(stdout);
1085 current++;
1086 }
1087 tables(1);
1088 setstack(NULL);
1089
1090 if (total == successful) {
1091 printf("\nAll JIT regression tests are successfully passed.\n");
1092 return 0;
1093 } else {
1094 printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
1095 return 1;
1096 }
1097 }
1098
1099 /* End of pcre_jit_test.c */

  ViewVC Help
Powered by ViewVC 1.1.5