1 |
/-- These tests for Unicode property support test PCRE's API and show some of
|
2 |
the compiled code. They are not Perl-compatible. --/
|
3 |
|
4 |
/[\p{L}]/DZ
|
5 |
|
6 |
/[\p{^L}]/DZ
|
7 |
|
8 |
/[\P{L}]/DZ
|
9 |
|
10 |
/[\P{^L}]/DZ
|
11 |
|
12 |
/[abc\p{L}\x{0660}]/8DZ
|
13 |
|
14 |
/[\p{Nd}]/8DZ
|
15 |
1234
|
16 |
|
17 |
/[\p{Nd}+-]+/8DZ
|
18 |
1234
|
19 |
12-34
|
20 |
12+\x{661}-34
|
21 |
** Failers
|
22 |
abcd
|
23 |
|
24 |
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/8iDZ
|
25 |
|
26 |
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/8DZ
|
27 |
|
28 |
/AB\x{1fb0}/8DZ
|
29 |
|
30 |
/AB\x{1fb0}/8DZi
|
31 |
|
32 |
/[\x{105}-\x{109}]/8iDZ
|
33 |
\x{104}
|
34 |
\x{105}
|
35 |
\x{109}
|
36 |
** Failers
|
37 |
\x{100}
|
38 |
\x{10a}
|
39 |
|
40 |
/[z-\x{100}]/8iDZ
|
41 |
Z
|
42 |
z
|
43 |
\x{39c}
|
44 |
\x{178}
|
45 |
|
|
46 |
\x{80}
|
47 |
\x{ff}
|
48 |
\x{100}
|
49 |
\x{101}
|
50 |
** Failers
|
51 |
\x{102}
|
52 |
Y
|
53 |
y
|
54 |
|
55 |
/[z-\x{100}]/8DZi
|
56 |
|
57 |
/(?:[\PPa*]*){8,}/
|
58 |
|
59 |
/[\P{Any}]/BZ
|
60 |
|
61 |
/[\P{Any}\E]/BZ
|
62 |
|
63 |
/(\P{Yi}+\277)/
|
64 |
|
65 |
/(\P{Yi}+\277)?/
|
66 |
|
67 |
/(?<=\P{Yi}{3}A)X/
|
68 |
|
69 |
/\p{Yi}+(\P{Yi}+)(?1)/
|
70 |
|
71 |
/(\P{Yi}{2}\277)?/
|
72 |
|
73 |
/[\P{Yi}A]/
|
74 |
|
75 |
/[\P{Yi}\P{Yi}\P{Yi}A]/
|
76 |
|
77 |
/[^\P{Yi}A]/
|
78 |
|
79 |
/[^\P{Yi}\P{Yi}\P{Yi}A]/
|
80 |
|
81 |
/(\P{Yi}*\277)*/
|
82 |
|
83 |
/(\P{Yi}*?\277)*/
|
84 |
|
85 |
/(\p{Yi}*+\277)*/
|
86 |
|
87 |
/(\P{Yi}?\277)*/
|
88 |
|
89 |
/(\P{Yi}??\277)*/
|
90 |
|
91 |
/(\p{Yi}?+\277)*/
|
92 |
|
93 |
/(\P{Yi}{0,3}\277)*/
|
94 |
|
95 |
/(\P{Yi}{0,3}?\277)*/
|
96 |
|
97 |
/(\p{Yi}{0,3}+\277)*/
|
98 |
|
99 |
/\p{Zl}{2,3}+/8BZ
|
100 |
\xe2\x80\xa8\xe2\x80\xa8
|
101 |
\x{2028}\x{2028}\x{2028}
|
102 |
|
103 |
/\p{Zl}/8BZ
|
104 |
|
105 |
/\p{Lu}{3}+/8BZ
|
106 |
|
107 |
/\pL{2}+/8BZ
|
108 |
|
109 |
/\p{Cc}{2}+/8BZ
|
110 |
|
111 |
/^\p{Cs}/8
|
112 |
\?\x{dfff}
|
113 |
** Failers
|
114 |
\x{09f}
|
115 |
|
116 |
/^\p{Sc}+/8
|
117 |
$\x{a2}\x{a3}\x{a4}\x{a5}\x{a6}
|
118 |
\x{9f2}
|
119 |
** Failers
|
120 |
X
|
121 |
\x{2c2}
|
122 |
|
123 |
/^\p{Zs}/8
|
124 |
\ \
|
125 |
\x{a0}
|
126 |
\x{1680}
|
127 |
\x{180e}
|
128 |
\x{2000}
|
129 |
\x{2001}
|
130 |
** Failers
|
131 |
\x{2028}
|
132 |
\x{200d}
|
133 |
|
134 |
/-- These four are here rather than in test 6 because Perl has problems with
|
135 |
the negative versions of the properties. --/
|
136 |
|
137 |
/\p{^Lu}/8i
|
138 |
1234
|
139 |
** Failers
|
140 |
ABC
|
141 |
|
142 |
/\P{Lu}/8i
|
143 |
1234
|
144 |
** Failers
|
145 |
ABC
|
146 |
|
147 |
/\p{Ll}/8i
|
148 |
a
|
149 |
Az
|
150 |
** Failers
|
151 |
ABC
|
152 |
|
153 |
/\p{Lu}/8i
|
154 |
A
|
155 |
a\x{10a0}B
|
156 |
** Failers
|
157 |
a
|
158 |
\x{1d00}
|
159 |
|
160 |
/[\x{c0}\x{391}]/8i
|
161 |
\x{c0}
|
162 |
\x{e0}
|
163 |
|
164 |
/-- The next two are special cases where the lengths of the different cases of
|
165 |
the same character differ. The first went wrong with heap frame storage; the
|
166 |
second was broken in all cases. --/
|
167 |
|
168 |
/^\x{023a}+?(\x{0130}+)/8i
|
169 |
\x{023a}\x{2c65}\x{0130}
|
170 |
|
171 |
/^\x{023a}+([^X])/8i
|
172 |
\x{023a}\x{2c65}X
|
173 |
|
174 |
/\x{c0}+\x{116}+/8i
|
175 |
\x{c0}\x{e0}\x{116}\x{117}
|
176 |
|
177 |
/[\x{c0}\x{116}]+/8i
|
178 |
\x{c0}\x{e0}\x{116}\x{117}
|
179 |
|
180 |
/(\x{de})\1/8i
|
181 |
\x{de}\x{de}
|
182 |
\x{de}\x{fe}
|
183 |
\x{fe}\x{fe}
|
184 |
\x{fe}\x{de}
|
185 |
|
186 |
/^\x{c0}$/8i
|
187 |
\x{c0}
|
188 |
\x{e0}
|
189 |
|
190 |
/^\x{e0}$/8i
|
191 |
\x{c0}
|
192 |
\x{e0}
|
193 |
|
194 |
/-- The next two should be Perl-compatible, but it fails to match \x{e0}. PCRE
|
195 |
will match it only with UCP support, because without that it has no notion
|
196 |
of case for anything other than the ASCII letters. --/
|
197 |
|
198 |
/((?i)[\x{c0}])/8
|
199 |
\x{c0}
|
200 |
\x{e0}
|
201 |
|
202 |
/(?i:[\x{c0}])/8
|
203 |
\x{c0}
|
204 |
\x{e0}
|
205 |
|
206 |
/-- This should be Perl-compatible but Perl 5.11 gets \x{300} wrong. --/8
|
207 |
|
208 |
/^\X/8
|
209 |
A
|
210 |
A\x{300}BC
|
211 |
A\x{300}\x{301}\x{302}BC
|
212 |
*** Failers
|
213 |
\x{300}
|
214 |
|
215 |
/-- These are PCRE's extra properties to help with Unicodizing \d etc. --/
|
216 |
|
217 |
/^\p{Xan}/8
|
218 |
ABCD
|
219 |
1234
|
220 |
\x{6ca}
|
221 |
\x{a6c}
|
222 |
\x{10a7}
|
223 |
** Failers
|
224 |
_ABC
|
225 |
|
226 |
/^\p{Xan}+/8
|
227 |
ABCD1234\x{6ca}\x{a6c}\x{10a7}_
|
228 |
** Failers
|
229 |
_ABC
|
230 |
|
231 |
/^\p{Xan}+?/8
|
232 |
\x{6ca}\x{a6c}\x{10a7}_
|
233 |
|
234 |
/^\p{Xan}*/8
|
235 |
ABCD1234\x{6ca}\x{a6c}\x{10a7}_
|
236 |
|
237 |
/^\p{Xan}{2,9}/8
|
238 |
ABCD1234\x{6ca}\x{a6c}\x{10a7}_
|
239 |
|
240 |
/^\p{Xan}{2,9}?/8
|
241 |
\x{6ca}\x{a6c}\x{10a7}_
|
242 |
|
243 |
/^[\p{Xan}]/8
|
244 |
ABCD1234_
|
245 |
1234abcd_
|
246 |
\x{6ca}
|
247 |
\x{a6c}
|
248 |
\x{10a7}
|
249 |
** Failers
|
250 |
_ABC
|
251 |
|
252 |
/^[\p{Xan}]+/8
|
253 |
ABCD1234\x{6ca}\x{a6c}\x{10a7}_
|
254 |
** Failers
|
255 |
_ABC
|
256 |
|
257 |
/^>\p{Xsp}/8
|
258 |
>\x{1680}\x{2028}\x{0b}
|
259 |
>\x{a0}
|
260 |
** Failers
|
261 |
\x{0b}
|
262 |
|
263 |
/^>\p{Xsp}+/8
|
264 |
> \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
|
265 |
|
266 |
/^>\p{Xsp}+?/8
|
267 |
>\x{1680}\x{2028}\x{0b}
|
268 |
|
269 |
/^>\p{Xsp}*/8
|
270 |
> \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
|
271 |
|
272 |
/^>\p{Xsp}{2,9}/8
|
273 |
> \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
|
274 |
|
275 |
/^>\p{Xsp}{2,9}?/8
|
276 |
> \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
|
277 |
|
278 |
/^>[\p{Xsp}]/8
|
279 |
>\x{2028}\x{0b}
|
280 |
|
281 |
/^>[\p{Xsp}]+/8
|
282 |
> \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
|
283 |
|
284 |
/^>\p{Xps}/8
|
285 |
>\x{1680}\x{2028}\x{0b}
|
286 |
>\x{a0}
|
287 |
** Failers
|
288 |
\x{0b}
|
289 |
|
290 |
/^>\p{Xps}+/8
|
291 |
> \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
|
292 |
|
293 |
/^>\p{Xps}+?/8
|
294 |
>\x{1680}\x{2028}\x{0b}
|
295 |
|
296 |
/^>\p{Xps}*/8
|
297 |
> \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
|
298 |
|
299 |
/^>\p{Xps}{2,9}/8
|
300 |
> \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
|
301 |
|
302 |
/^>\p{Xps}{2,9}?/8
|
303 |
> \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
|
304 |
|
305 |
/^>[\p{Xps}]/8
|
306 |
>\x{2028}\x{0b}
|
307 |
|
308 |
/^>[\p{Xps}]+/8
|
309 |
> \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
|
310 |
|
311 |
/^\p{Xwd}/8
|
312 |
ABCD
|
313 |
1234
|
314 |
\x{6ca}
|
315 |
\x{a6c}
|
316 |
\x{10a7}
|
317 |
_ABC
|
318 |
** Failers
|
319 |
[]
|
320 |
|
321 |
/^\p{Xwd}+/8
|
322 |
ABCD1234\x{6ca}\x{a6c}\x{10a7}_
|
323 |
|
324 |
/^\p{Xwd}+?/8
|
325 |
\x{6ca}\x{a6c}\x{10a7}_
|
326 |
|
327 |
/^\p{Xwd}*/8
|
328 |
ABCD1234\x{6ca}\x{a6c}\x{10a7}_
|
329 |
|
330 |
/^\p{Xwd}{2,9}/8
|
331 |
A_B12\x{6ca}\x{a6c}\x{10a7}
|
332 |
|
333 |
/^\p{Xwd}{2,9}?/8
|
334 |
\x{6ca}\x{a6c}\x{10a7}_
|
335 |
|
336 |
/^[\p{Xwd}]/8
|
337 |
ABCD1234_
|
338 |
1234abcd_
|
339 |
\x{6ca}
|
340 |
\x{a6c}
|
341 |
\x{10a7}
|
342 |
_ABC
|
343 |
** Failers
|
344 |
[]
|
345 |
|
346 |
/^[\p{Xwd}]+/8
|
347 |
ABCD1234\x{6ca}\x{a6c}\x{10a7}_
|
348 |
|
349 |
/-- A check not in UTF-8 mode --/
|
350 |
|
351 |
/^[\p{Xwd}]+/
|
352 |
ABCD1234_
|
353 |
|
354 |
/-- Some negative checks --/
|
355 |
|
356 |
/^[\P{Xwd}]+/8
|
357 |
!.+\x{019}\x{35a}AB
|
358 |
|
359 |
/^[\p{^Xwd}]+/8
|
360 |
!.+\x{019}\x{35a}AB
|
361 |
|
362 |
/[\D]/WBZ8
|
363 |
1\x{3c8}2
|
364 |
|
365 |
/[\d]/WBZ8
|
366 |
>\x{6f4}<
|
367 |
|
368 |
/[\S]/WBZ8
|
369 |
\x{1680}\x{6f4}\x{1680}
|
370 |
|
371 |
/[\s]/WBZ8
|
372 |
>\x{1680}<
|
373 |
|
374 |
/[\W]/WBZ8
|
375 |
A\x{1712}B
|
376 |
|
377 |
/[\w]/WBZ8
|
378 |
>\x{1723}<
|
379 |
|
380 |
/\D/WBZ8
|
381 |
1\x{3c8}2
|
382 |
|
383 |
/\d/WBZ8
|
384 |
>\x{6f4}<
|
385 |
|
386 |
/\S/WBZ8
|
387 |
\x{1680}\x{6f4}\x{1680}
|
388 |
|
389 |
/\s/WBZ8
|
390 |
>\x{1680}>
|
391 |
|
392 |
/\W/WBZ8
|
393 |
A\x{1712}B
|
394 |
|
395 |
/\w/WBZ8
|
396 |
>\x{1723}<
|
397 |
|
398 |
/[[:alpha:]]/WBZ
|
399 |
|
400 |
/[[:lower:]]/WBZ
|
401 |
|
402 |
/[[:upper:]]/WBZ
|
403 |
|
404 |
/[[:alnum:]]/WBZ
|
405 |
|
406 |
/[[:ascii:]]/WBZ
|
407 |
|
408 |
/[[:blank:]]/WBZ
|
409 |
|
410 |
/[[:cntrl:]]/WBZ
|
411 |
|
412 |
/[[:digit:]]/WBZ
|
413 |
|
414 |
/[[:graph:]]/WBZ
|
415 |
|
416 |
/[[:print:]]/WBZ
|
417 |
|
418 |
/[[:punct:]]/WBZ
|
419 |
|
420 |
/[[:space:]]/WBZ
|
421 |
|
422 |
/[[:word:]]/WBZ
|
423 |
|
424 |
/[[:xdigit:]]/WBZ
|
425 |
|
426 |
/-- Unicode properties for \b abd \B --/
|
427 |
|
428 |
/\b...\B/8W
|
429 |
abc_
|
430 |
\x{37e}abc\x{376}
|
431 |
\x{37e}\x{376}\x{371}\x{393}\x{394}
|
432 |
!\x{c0}++\x{c1}\x{c2}
|
433 |
!\x{c0}+++++
|
434 |
|
435 |
/-- Without PCRE_UCP, non-ASCII always fail, even if < 256 --/
|
436 |
|
437 |
/\b...\B/8
|
438 |
abc_
|
439 |
** Failers
|
440 |
\x{37e}abc\x{376}
|
441 |
\x{37e}\x{376}\x{371}\x{393}\x{394}
|
442 |
!\x{c0}++\x{c1}\x{c2}
|
443 |
!\x{c0}+++++
|
444 |
|
445 |
/-- With PCRE_UCP, non-UTF8 chars that are < 256 still check properties --/
|
446 |
|
447 |
/\b...\B/W
|
448 |
abc_
|
449 |
!\x{c0}++\x{c1}\x{c2}
|
450 |
!\x{c0}+++++
|
451 |
|
452 |
/-- POSIX interface --/
|
453 |
|
454 |
/\w/P
|
455 |
+++\x{c2}
|
456 |
|
457 |
/\w/WP
|
458 |
+++\x{c2}
|
459 |
|
460 |
/-- Some of these are silly, but they check various combinations --/
|
461 |
|
462 |
/[[:^alpha:][:^cntrl:]]+/8WBZ
|
463 |
123
|
464 |
abc
|
465 |
|
466 |
/[[:^cntrl:][:^alpha:]]+/8WBZ
|
467 |
123
|
468 |
abc
|
469 |
|
470 |
/[[:alpha:]]+/8WBZ
|
471 |
abc
|
472 |
|
473 |
/[[:^alpha:]\S]+/8WBZ
|
474 |
123
|
475 |
abc
|
476 |
|
477 |
/[^\d]+/8WBZ
|
478 |
abc123
|
479 |
abc\x{123}
|
480 |
\x{660}abc
|
481 |
|
482 |
/\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}/8iSI
|
483 |
\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}
|
484 |
\x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f}
|
485 |
|
486 |
/\p{Xps}*/SI
|
487 |
|
488 |
/\p{Lu}+9\p{Lu}+B\p{Lu}+b/BZ
|
489 |
|
490 |
/\p{^Lu}+9\p{^Lu}+B\p{^Lu}+b/BZ
|
491 |
|
492 |
/\P{Lu}+9\P{Lu}+B\P{Lu}+b/BZ
|
493 |
|
494 |
/\p{Han}+X\p{Greek}+\x{370}/BZ8
|
495 |
|
496 |
/\p{Xan}+!\p{Xan}+A/BZ
|
497 |
|
498 |
/\p{Xsp}+!\p{Xsp}\t/BZ
|
499 |
|
500 |
/\p{Xps}+!\p{Xps}\t/BZ
|
501 |
|
502 |
/\p{Xwd}+!\p{Xwd}_/BZ
|
503 |
|
504 |
/A+\p{N}A+\dB+\p{N}*B+\d*/WBZ
|
505 |
|
506 |
/-- These behaved oddly in Perl, so they are kept in this test --/
|
507 |
|
508 |
/(\x{23a}\x{23a}\x{23a})?\1/8i
|
509 |
\x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}
|
510 |
|
511 |
/(ȺȺȺ)?\1/8i
|
512 |
ȺȺȺⱥⱥ
|
513 |
|
514 |
/(\x{23a}\x{23a}\x{23a})?\1/8i
|
515 |
\x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}
|
516 |
|
517 |
/(ȺȺȺ)?\1/8i
|
518 |
ȺȺȺⱥⱥⱥ
|
519 |
|
520 |
/(\x{23a}\x{23a}\x{23a})\1/8i
|
521 |
\x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}
|
522 |
|
523 |
/(ȺȺȺ)\1/8i
|
524 |
ȺȺȺⱥⱥ
|
525 |
|
526 |
/(\x{23a}\x{23a}\x{23a})\1/8i
|
527 |
\x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}
|
528 |
|
529 |
/(ȺȺȺ)\1/8i
|
530 |
ȺȺȺⱥⱥⱥ
|
531 |
|
532 |
/(\x{2c65}\x{2c65})\1/8i
|
533 |
\x{2c65}\x{2c65}\x{23a}\x{23a}
|
534 |
|
535 |
/(ⱥⱥ)\1/8i
|
536 |
ⱥⱥȺȺ
|
537 |
|
538 |
/(\x{23a}\x{23a}\x{23a})\1Y/8i
|
539 |
X\x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}YZ
|
540 |
|
541 |
/(\x{2c65}\x{2c65})\1Y/8i
|
542 |
X\x{2c65}\x{2c65}\x{23a}\x{23a}YZ
|
543 |
|
544 |
/-- --/
|
545 |
|
546 |
/-- These scripts weren't yet in Perl when I added Unicode 6.0.0 to PCRE --/
|
547 |
|
548 |
/^[\p{Batak}]/8
|
549 |
\x{1bc0}
|
550 |
\x{1bff}
|
551 |
** Failers
|
552 |
\x{1bf4}
|
553 |
|
554 |
/^[\p{Brahmi}]/8
|
555 |
\x{11000}
|
556 |
\x{1106f}
|
557 |
** Failers
|
558 |
\x{1104e}
|
559 |
|
560 |
/^[\p{Mandaic}]/8
|
561 |
\x{840}
|
562 |
\x{85e}
|
563 |
** Failers
|
564 |
\x{85c}
|
565 |
\x{85d}
|
566 |
|
567 |
/-- --/
|
568 |
|
569 |
/-- End of testinput13 --/
|