/[pcre]/code/branches/pcre16/testdata/testoutput16
ViewVC logotype

Contents of /code/branches/pcre16/testdata/testoutput16

Parent Directory Parent Directory | Revision Log Revision Log


Revision 810 - (show annotations)
Mon Dec 19 13:34:10 2011 UTC (8 years, 6 months ago) by ph10
File size: 18327 byte(s)
Error occurred while calculating annotation data.
A lot more work on pcretest; now runs many (but not all) tests.
1 /-- This set of tests is for UTF-16 support, and is relevant only to the 16-bit
2 library. There are some non-UTF 16-bit tests as well (it doesn't seem
3 worth setting up another test file just for this case). --/
4
5 /xxx/8?DZSS
6 **Failed: invalid UTF-8 string cannot be converted to UTF-16
7
8 /abc/8
9 ]
10 **Failed: invalid UTF-8 string cannot be converted to UTF-16
11
12 /X(\C{3})/8
13 X\x{11234}Y
14 0: X\x{11234}Y
15 1: \x{11234}Y
16
17 /X(\C{4})/8
18 X\x{11234}YZ
19 0: X\x{11234}YZ
20 1: \x{11234}YZ
21
22 /X\C*/8
23 XYZabcdce
24 0: XYZabcdce
25
26 /X\C*?/8
27 XYZabcde
28 0: X
29
30 /X\C{3,5}/8
31 Xabcdefg
32 0: Xabcde
33 X\x{11234}Y
34 0: X\x{11234}Y
35 X\x{11234}YZ
36 0: X\x{11234}YZ
37 X\x{11234}\x{512}
38 0: X\x{11234}\x{512}
39 X\x{11234}\x{512}YZ
40 0: X\x{11234}\x{512}YZ
41 X\x{11234}\x{512}\x{11234}Z
42 0: X\x{11234}\x{512}\x{11234}
43
44 /X\C{3,5}?/8
45 Xabcdefg
46 0: Xabc
47 X\x{11234}Y
48 0: X\x{11234}Y
49 X\x{11234}YZ
50 0: X\x{11234}Y
51 X\x{11234}\x{512}YZ
52 0: X\x{11234}\x{512}
53 *** Failers
54 No match
55 X\x{11234}
56 No match
57
58 /a\Cb/
59 aXb
60 0: aXb
61 a\nb
62 0: a\x0ab
63
64 /a\Cb/8
65 aXb
66 0: aXb
67 a\nb
68 0: a\x{0a}b
69
70 /a\C\Cb/8
71 a\x{12257}b
72 0: a\x{12257}b
73 ** Failers
74 No match
75 a\x{100}b
76 No match
77
78 /ab\Cde/8
79 abXde
80 0: abXde
81
82 /-- Check maximum non-UTF character size --/
83
84 /\x{ffff}/
85
86 /\x{10000}/
87 Failed: character value in \x{...} sequence is too large at offset 8
88
89 /\x{100}/8DZ
90 ------------------------------------------------------------------
91 Bra
92 \x{100}
93 Ket
94 End
95 ------------------------------------------------------------------
96 Capturing subpattern count = 0
97 Options: utf
98 First char = \x{100}
99 No need char
100
101 /\x{1000}/8DZ
102 ------------------------------------------------------------------
103 Bra
104 \x{1000}
105 Ket
106 End
107 ------------------------------------------------------------------
108 Capturing subpattern count = 0
109 Options: utf
110 First char = \x{1000}
111 No need char
112
113 /\x{10000}/8DZ
114 ------------------------------------------------------------------
115 Bra
116 \x{10000}
117 Ket
118 End
119 ------------------------------------------------------------------
120 Capturing subpattern count = 0
121 Options: utf
122 First char = \x{d800}
123 Need char = 56320
124
125 /\x{100000}/8DZ
126 ------------------------------------------------------------------
127 Bra
128 \x{100000}
129 Ket
130 End
131 ------------------------------------------------------------------
132 Capturing subpattern count = 0
133 Options: utf
134 First char = \x{dbc0}
135 Need char = 56320
136
137 /\x{10ffff}/8DZ
138 ------------------------------------------------------------------
139 Bra
140 \x{10ffff}
141 Ket
142 End
143 ------------------------------------------------------------------
144 Capturing subpattern count = 0
145 Options: utf
146 First char = \x{dbff}
147 Need char = 57343
148
149 /[\x{ff}]/8DZ
150 ------------------------------------------------------------------
151 Bra
152 \xff
153 Ket
154 End
155 ------------------------------------------------------------------
156 Capturing subpattern count = 0
157 Options: utf
158 First char = \x{ff}
159 No need char
160
161 /[\x{100}]/8DZ
162 ------------------------------------------------------------------
163 Bra
164 \x{100}
165 Ket
166 End
167 ------------------------------------------------------------------
168 Capturing subpattern count = 0
169 Options: utf
170 First char = \x{100}
171 No need char
172
173 /\x80/8DZ
174 ------------------------------------------------------------------
175 Bra
176 \x80
177 Ket
178 End
179 ------------------------------------------------------------------
180 Capturing subpattern count = 0
181 Options: utf
182 First char = \x{80}
183 No need char
184
185 /\xff/8DZ
186 ------------------------------------------------------------------
187 Bra
188 \xff
189 Ket
190 End
191 ------------------------------------------------------------------
192 Capturing subpattern count = 0
193 Options: utf
194 First char = \x{ff}
195 No need char
196
197 /\x{D55c}\x{ad6d}\x{C5B4}/DZ8
198 ------------------------------------------------------------------
199 Bra
200 \x{d55c}\x{ad6d}\x{c5b4}
201 Ket
202 End
203 ------------------------------------------------------------------
204 Capturing subpattern count = 0
205 Options: utf
206 First char = \x{d55c}
207 Need char = 50612
208 \x{D55c}\x{ad6d}\x{C5B4}
209 0: \x{d55c}\x{ad6d}\x{c5b4}
210
211 /\x{65e5}\x{672c}\x{8a9e}/DZ8
212 ------------------------------------------------------------------
213 Bra
214 \x{65e5}\x{672c}\x{8a9e}
215 Ket
216 End
217 ------------------------------------------------------------------
218 Capturing subpattern count = 0
219 Options: utf
220 First char = \x{65e5}
221 Need char = 35486
222 \x{65e5}\x{672c}\x{8a9e}
223 0: \x{65e5}\x{672c}\x{8a9e}
224
225 /\x{80}/DZ8
226 ------------------------------------------------------------------
227 Bra
228 \x80
229 Ket
230 End
231 ------------------------------------------------------------------
232 Capturing subpattern count = 0
233 Options: utf
234 First char = \x{80}
235 No need char
236
237 /\x{084}/DZ8
238 ------------------------------------------------------------------
239 Bra
240 \x84
241 Ket
242 End
243 ------------------------------------------------------------------
244 Capturing subpattern count = 0
245 Options: utf
246 First char = \x{84}
247 No need char
248
249 /\x{104}/DZ8
250 ------------------------------------------------------------------
251 Bra
252 \x{104}
253 Ket
254 End
255 ------------------------------------------------------------------
256 Capturing subpattern count = 0
257 Options: utf
258 First char = \x{104}
259 No need char
260
261 /\x{861}/DZ8
262 ------------------------------------------------------------------
263 Bra
264 \x{861}
265 Ket
266 End
267 ------------------------------------------------------------------
268 Capturing subpattern count = 0
269 Options: utf
270 First char = \x{861}
271 No need char
272
273 /\x{212ab}/DZ8
274 ------------------------------------------------------------------
275 Bra
276 \x{212ab}
277 Ket
278 End
279 ------------------------------------------------------------------
280 Capturing subpattern count = 0
281 Options: utf
282 First char = \x{d844}
283 Need char = 57003
284
285 /-- This one is here not because it's different to Perl, but because the way
286 the captured single-byte is displayed. (In Perl it becomes a character, and you
287 can't tell the difference.) --/
288
289 /X(\C)(.*)/8
290 X\x{1234}
291 0: X\x{1234}
292 1: \x{1234}
293 2:
294 X\nabc
295 0: X\x{0a}abc
296 1: \x{0a}
297 2: abc
298
299 /-- This one is here because Perl gives out a grumbly error message (quite
300 correctly, but that messes up comparisons). --/
301
302 /a\Cb/8
303 *** Failers
304 No match
305 a\x{100}b
306 0: a\x{100}b
307
308 /[^ab\xC0-\xF0]/8SDZ
309 ------------------------------------------------------------------
310 Bra
311 [\x00-`c-\xbf\xf1-\xff] (neg)
312 Ket
313 End
314 ------------------------------------------------------------------
315 Capturing subpattern count = 0
316 Options: utf
317 No first char
318 No need char
319 Subject length lower bound = 1
320 Starting byte set: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
321 \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
322 \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4
323 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y
324 Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f
325 \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e
326 \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d
327 \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac
328 \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb
329 \xbc \xbd \xbe \xbf \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb
330 \xfc \xfd \xfe \xff
331 \x{f1}
332 0: \x{f1}
333 \x{bf}
334 0: \x{bf}
335 \x{100}
336 0: \x{100}
337 \x{1000}
338 0: \x{1000}
339 *** Failers
340 0: *
341 \x{c0}
342 No match
343 \x{f0}
344 No match
345
346 /Ā{3,4}/8SDZ
347 ------------------------------------------------------------------
348 Bra
349 \x{100}{3}
350 \x{100}?
351 Ket
352 End
353 ------------------------------------------------------------------
354 Capturing subpattern count = 0
355 Options: utf
356 First char = \x{100}
357 Need char = 256
358 Subject length lower bound = 3
359 No set of starting bytes
360 \x{100}\x{100}\x{100}\x{100\x{100}
361 0: \x{100}\x{100}\x{100}
362
363 /(\x{100}+|x)/8SDZ
364 ------------------------------------------------------------------
365 Bra
366 CBra 1
367 \x{100}+
368 Alt
369 x
370 Ket
371 Ket
372 End
373 ------------------------------------------------------------------
374 Capturing subpattern count = 1
375 Options: utf
376 No first char
377 No need char
378 Subject length lower bound = 1
379 Starting byte set: x \xff
380
381 /(\x{100}*a|x)/8SDZ
382 ------------------------------------------------------------------
383 Bra
384 CBra 1
385 \x{100}*+
386 a
387 Alt
388 x
389 Ket
390 Ket
391 End
392 ------------------------------------------------------------------
393 Capturing subpattern count = 1
394 Options: utf
395 No first char
396 No need char
397 Subject length lower bound = 1
398 Starting byte set: a x \xff
399
400 /(\x{100}{0,2}a|x)/8SDZ
401 ------------------------------------------------------------------
402 Bra
403 CBra 1
404 \x{100}{0,2}
405 a
406 Alt
407 x
408 Ket
409 Ket
410 End
411 ------------------------------------------------------------------
412 Capturing subpattern count = 1
413 Options: utf
414 No first char
415 No need char
416 Subject length lower bound = 1
417 Starting byte set: a x \xff
418
419 /(\x{100}{1,2}a|x)/8SDZ
420 ------------------------------------------------------------------
421 Bra
422 CBra 1
423 \x{100}
424 \x{100}{0,1}
425 a
426 Alt
427 x
428 Ket
429 Ket
430 End
431 ------------------------------------------------------------------
432 Capturing subpattern count = 1
433 Options: utf
434 No first char
435 No need char
436 Subject length lower bound = 1
437 Starting byte set: x \xff
438
439 /\x{100}/8DZ
440 ------------------------------------------------------------------
441 Bra
442 \x{100}
443 Ket
444 End
445 ------------------------------------------------------------------
446 Capturing subpattern count = 0
447 Options: utf
448 First char = \x{100}
449 No need char
450
451 /a\x{100}\x{101}*/8DZ
452 ------------------------------------------------------------------
453 Bra
454 a\x{100}
455 \x{101}*
456 Ket
457 End
458 ------------------------------------------------------------------
459 Capturing subpattern count = 0
460 Options: utf
461 First char = 'a'
462 Need char = 256
463
464 /a\x{100}\x{101}+/8DZ
465 ------------------------------------------------------------------
466 Bra
467 a\x{100}
468 \x{101}+
469 Ket
470 End
471 ------------------------------------------------------------------
472 Capturing subpattern count = 0
473 Options: utf
474 First char = 'a'
475 Need char = 257
476
477 /[^\x{c4}]/DZ
478 ------------------------------------------------------------------
479 Bra
480 [^\xc4]
481 Ket
482 End
483 ------------------------------------------------------------------
484 Capturing subpattern count = 0
485 No options
486 No first char
487 No need char
488
489 /[\x{100}]/8DZ
490 ------------------------------------------------------------------
491 Bra
492 \x{100}
493 Ket
494 End
495 ------------------------------------------------------------------
496 Capturing subpattern count = 0
497 Options: utf
498 First char = \x{100}
499 No need char
500 \x{100}
501 0: \x{100}
502 Z\x{100}
503 0: \x{100}
504 \x{100}Z
505 0: \x{100}
506 *** Failers
507 No match
508
509 /[\xff]/DZ8
510 ------------------------------------------------------------------
511 Bra
512 \xff
513 Ket
514 End
515 ------------------------------------------------------------------
516 Capturing subpattern count = 0
517 Options: utf
518 First char = \x{ff}
519 No need char
520 >\x{ff}<
521 0: \x{ff}
522
523 /[^\xff]/8DZ
524 ------------------------------------------------------------------
525 Bra
526 [^\x{ff}]
527 Ket
528 End
529 ------------------------------------------------------------------
530 Capturing subpattern count = 0
531 Options: utf
532 No first char
533 No need char
534
535 /\x{100}abc(xyz(?1))/8DZ
536 ------------------------------------------------------------------
537 Bra
538 \x{100}abc
539 CBra 1
540 xyz
541 Recurse
542 Ket
543 Ket
544 End
545 ------------------------------------------------------------------
546 Capturing subpattern count = 1
547 Options: utf
548 First char = \x{100}
549 Need char = 'z'
550
551 /\777/8I
552 Capturing subpattern count = 0
553 Options: utf
554 First char = \x{1ff}
555 No need char
556 \x{1ff}
557 0: \x{1ff}
558 \777
559 0: \x{1ff}
560
561 /\x{100}+\x{200}/8DZ
562 ------------------------------------------------------------------
563 Bra
564 \x{100}++
565 \x{200}
566 Ket
567 End
568 ------------------------------------------------------------------
569 Capturing subpattern count = 0
570 Options: utf
571 First char = \x{100}
572 Need char = 512
573
574 /\x{100}+X/8DZ
575 ------------------------------------------------------------------
576 Bra
577 \x{100}++
578 X
579 Ket
580 End
581 ------------------------------------------------------------------
582 Capturing subpattern count = 0
583 Options: utf
584 First char = \x{100}
585 Need char = 'X'
586
587 /^[\QĀ\E-\QŐ\E/BZ8
588 Failed: missing terminating ] for character class at offset 13
589
590 /-- This tests the stricter UTF-8 check according to RFC 3629. --/
591
592 /X/8
593 \x{0}\x{d7ff}\x{e000}\x{10ffff}
594 No match
595 \x{d800}
596 Error -10 (bad UTF-8 string) offset=0 reason=1
597 \x{d800}\?
598 No match
599 \x{da00}
600 Error -10 (bad UTF-8 string) offset=0 reason=1
601 \x{da00}\?
602 No match
603 \x{dfff}
604 Error -10 (bad UTF-8 string) offset=0 reason=3
605 \x{dfff}\?
606 No match
607 \x{110000}
608 Error -10 (bad UTF-8 string) offset=0 reason=3
609 \x{110000}\?
610 No match
611 \x{2000000}
612 Error -10 (bad UTF-8 string) offset=1 reason=3
613 \x{2000000}\?
614 No match
615 \x{7fffffff}
616 Error -10 (bad UTF-8 string) offset=1 reason=3
617 \x{7fffffff}\?
618 No match
619
620 /(*UTF16)\x{11234}/
621 abcd\x{11234}pqr
622 0: \x{11234}
623
624 /(*CRLF)(*UTF16)(*BSR_UNICODE)a\Rb/I
625 Capturing subpattern count = 0
626 Options: bsr_unicode utf
627 Forced newline sequence: CRLF
628 First char = 'a'
629 Need char = 'b'
630
631 /\h/SI8
632 Capturing subpattern count = 0
633 Options: utf
634 No first char
635 No need char
636 Subject length lower bound = 1
637 Starting byte set: \x09 \x20 \xa0 \xff
638 ABC\x{09}
639 0: \x{09}
640 ABC\x{20}
641 0:
642 ABC\x{a0}
643 0: \x{a0}
644 ABC\x{1680}
645 0: \x{1680}
646 ABC\x{180e}
647 0: \x{180e}
648 ABC\x{2000}
649 0: \x{2000}
650 ABC\x{202f}
651 0: \x{202f}
652 ABC\x{205f}
653 0: \x{205f}
654 ABC\x{3000}
655 0: \x{3000}
656
657 /\v/SI8
658 Capturing subpattern count = 0
659 Options: utf
660 No first char
661 No need char
662 Subject length lower bound = 1
663 Starting byte set: \x0a \x0b \x0c \x0d \x85 \xff
664 ABC\x{0a}
665 0: \x{0a}
666 ABC\x{0b}
667 0: \x{0b}
668 ABC\x{0c}
669 0: \x{0c}
670 ABC\x{0d}
671 0: \x{0d}
672 ABC\x{85}
673 0: \x{85}
674 ABC\x{2028}
675 0: \x{2028}
676
677 /\h*A/SI8
678 Capturing subpattern count = 0
679 Options: utf
680 No first char
681 Need char = 'A'
682 Subject length lower bound = 1
683 Starting byte set: \x09 \x20 A \xa0
684 CDBABC
685 0: A
686
687 /\v+A/SI8
688 Capturing subpattern count = 0
689 Options: utf
690 No first char
691 Need char = 'A'
692 Subject length lower bound = 2
693 Starting byte set: \x0a \x0b \x0c \x0d \x85 \xff
694
695 /\s?xxx\s/8SI
696 Capturing subpattern count = 0
697 Options: utf
698 No first char
699 Need char = 'x'
700 Subject length lower bound = 4
701 Starting byte set: \x09 \x0a \x0c \x0d \x20 x
702
703 /\sxxx\s/I8ST1
704 Capturing subpattern count = 0
705 Options: utf
706 No first char
707 Need char = 'x'
708 Subject length lower bound = 5
709 Starting byte set: \x09 \x0a \x0c \x0d \x20 \x85 \xa0
710 AB\x{85}xxx\x{a0}XYZ
711 0: \x{85}xxx\x{a0}
712 AB\x{a0}xxx\x{85}XYZ
713 0: \x{a0}xxx\x{85}
714
715 /\S \S/I8ST1
716 Capturing subpattern count = 0
717 Options: utf
718 No first char
719 Need char = ' '
720 Subject length lower bound = 3
721 Starting byte set: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0b \x0e
722 \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d
723 \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @
724 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e
725 f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 \x83
726 \x84 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93
727 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa1 \xa2 \xa3
728 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2
729 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1
730 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0
731 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf
732 \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee
733 \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd
734 \xfe \xff
735 \x{a2} \x{84}
736 0: \x{a2} \x{84}
737 A Z
738 0: A Z
739
740 /a+/8
741 a\x{123}aa\>1
742 0: aa
743 a\x{123}aa\>2
744 0: aa
745 a\x{123}aa\>3
746 0: a
747 a\x{123}aa\>4
748 No match
749 a\x{123}aa\>5
750 Error -24 (bad offset value)
751 a\x{123}aa\>6
752 Error -24 (bad offset value)
753
754 /\x{1234}+/iS8I
755 Capturing subpattern count = 0
756 Options: caseless utf
757 First char = \x{1234}
758 No need char
759 Subject length lower bound = 1
760 No set of starting bytes
761
762 /\x{1234}+?/iS8I
763 Capturing subpattern count = 0
764 Options: caseless utf
765 First char = \x{1234}
766 No need char
767 Subject length lower bound = 1
768 No set of starting bytes
769
770 /\x{1234}++/iS8I
771 Capturing subpattern count = 0
772 Options: caseless utf
773 First char = \x{1234}
774 No need char
775 Subject length lower bound = 1
776 No set of starting bytes
777
778 /\x{1234}{2}/iS8I
779 Capturing subpattern count = 0
780 Options: caseless utf
781 First char = \x{1234}
782 Need char = 4660
783 Subject length lower bound = 2
784 No set of starting bytes
785
786 /[^\x{c4}]/8DZ
787 ------------------------------------------------------------------
788 Bra
789 [^\x{c4}]
790 Ket
791 End
792 ------------------------------------------------------------------
793 Capturing subpattern count = 0
794 Options: utf
795 No first char
796 No need char
797
798 /X+\x{200}/8DZ
799 ------------------------------------------------------------------
800 Bra
801 X++
802 \x{200}
803 Ket
804 End
805 ------------------------------------------------------------------
806 Capturing subpattern count = 0
807 Options: utf
808 First char = 'X'
809 Need char = 512
810
811 /\R/SI8
812 Capturing subpattern count = 0
813 Options: utf
814 No first char
815 No need char
816 Subject length lower bound = 1
817 Starting byte set: \x0a \x0b \x0c \x0d \x85 \xff
818
819 /-- End of testinput16 --/

  ViewVC Help
Powered by ViewVC 1.1.5