/[pcre]/code/trunk/testdata/testinput2
ViewVC logotype

Diff of /code/trunk/testdata/testinput2

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 448 by ph10, Tue Sep 15 19:11:48 2009 UTC revision 608 by ph10, Sun Jun 12 16:25:55 2011 UTC
# Line 1  Line 1 
1    /-- This set of tests is not Perl-compatible. It checks on special features
2        of PCRE's API, error diagnostics, and the compiled code of some patterns.
3        It also checks the non-Perl syntax the PCRE supports (Python, .NET,
4        Oniguruma). Finally, there are some tests where PCRE and Perl differ,
5        either because PCRE can't be compatible, or there is a possible Perl
6        bug. --/
7    
8    /-- Originally, the Perl >= 5.10 things were in here too, but now I have
9        separated many (most?) of them out into test 11. However, there may still
10        be some that were overlooked. --/
11    
12  /(a)b|/I  /(a)b|/I
13    
14  /abc/I  /abc/I
# Line 40  Line 51 
51    
52  /(?X)[\B]/  /(?X)[\B]/
53    
54    /(?X)[\R]/
55    
56    /(?X)[\X]/
57    
58    /[\B]/BZ
59    
60    /[\R]/BZ
61    
62    /[\X]/BZ
63    
64  /[z-a]/  /[z-a]/
65    
66  /^*/  /^*/
# Line 333  Line 354 
354      *** Failers      *** Failers
355      a      a
356    
357  /This one is here because I think Perl 5.005_02 gets the setting of $1 wrong/I  /This one is here because Perl behaves differently; see also the following/I
358    
359  /^(a\1?){4}$/I  /^(a\1?){4}$/I
360        aaaa
361      aaaaaa      aaaaaa
362    
363    /Perl does not fail these two for the final subjects. Neither did PCRE until/
364    /release 8.01. The problem is in backtracking into a subpattern that contains/
365    /a recursive reference to itself. PCRE has now made these into atomic patterns./
366    
367    /^(xa|=?\1a){2}$/
368        xa=xaa
369        ** Failers
370        xa=xaaa
371    
372    /^(xa|=?\1a)+$/
373        xa=xaa
374        ** Failers
375        xa=xaaa
376    
377  /These are syntax tests from Perl 5.005/I  /These are syntax tests from Perl 5.005/I
378    
# Line 1122  Line 1158 
1158    
1159  /(a(?1)+b)/DZ  /(a(?1)+b)/DZ
1160    
 /^\W*(?:((.)\W*(?1)\W*\2|)|((.)\W*(?3)\W*\4|\W*.\W*))\W*$/Ii  
     1221  
     Satan, oscillate my metallic sonatas!  
     A man, a plan, a canal: Panama!  
     Able was I ere I saw Elba.  
     *** Failers  
     The quick brown fox  
   
1161  /^(\d+|\((?1)([+*-])(?1)\)|-(?1))$/I  /^(\d+|\((?1)([+*-])(?1)\)|-(?1))$/I
1162      12      12
1163      (((2+2)*-3)-7)      (((2+2)*-3)-7)
# Line 2261  a random value. /Ix Line 2289  a random value. /Ix
2289  /a+b?(*THEN)c+(*FAIL)/C  /a+b?(*THEN)c+(*FAIL)/C
2290      aaabccc      aaabccc
2291    
 /a(*PRUNE:XXX)b/  
   
2292  /a(*MARK)b/  /a(*MARK)b/
2293    
2294  /(?i:A{1,}\6666666666)/  /(?i:A{1,}\6666666666)/
# Line 2320  a random value. /Ix Line 2346  a random value. /Ix
2346      a\nb      a\nb
2347      a\r\nb      a\r\nb
2348      a\x85b      a\x85b
2349    
2350    /(*ANY).*/g
2351        abc\r\ndef
2352    
2353    /(*ANYCRLF).*/g
2354        abc\r\ndef
2355    
2356    /(*CRLF).*/g
2357        abc\r\ndef
2358    
2359  /a\Rb/I<bsr_anycrlf>  /a\Rb/I<bsr_anycrlf>
2360      a\rb      a\rb
# Line 2549  a random value. /Ix Line 2584  a random value. /Ix
2584    abc\Y    abc\Y
2585    abcxypqr    abcxypqr
2586    abcxypqr\Y    abcxypqr\Y
2587    
2588    /(*NO_START_OPT)xyz/C
2589      abcxyz
2590    
2591    /xyz/CY
2592      abcxyz
2593    
2594  /^"((?(?=[a])[^"])|b)*"$/C  /^"((?(?=[a])[^"])|b)*"$/C
2595      "ab"      "ab"
# Line 2750  a random value. /Ix Line 2791  a random value. /Ix
2791    
2792  /(?&word)(?&element)(?(DEFINE)(?<element><[^\d][^>]>[^<])(?<word>\w*+))/BZ  /(?&word)(?&element)(?(DEFINE)(?<element><[^\d][^>]>[^<])(?<word>\w*+))/BZ
2793    
2794    /(ab)(x(y)z(cd(*ACCEPT)))pq/BZ
2795    
2796  /abc\K/+  /abc\K/+
2797      abcdef      abcdef
2798      abcdef\N\N      abcdef\N\N
# Line 2840  a random value. /Ix Line 2883  a random value. /Ix
2883  /^X(?7)(a)(?|(b)|(q)(r)(s))(c)(d)(Y)/  /^X(?7)(a)(?|(b)|(q)(r)(s))(c)(d)(Y)/
2884      XYabcdY      XYabcdY
2885    
2886  / End of testinput2 /  /(?<=b(?1)|zzz)(a)/
2887        xbaax
2888        xzzzax
2889    
2890    /(a)(?<=b\1)/
2891    
2892    /(a)(?<=b+(?1))/
2893    
2894    /(a+)(?<=b(?1))/
2895    
2896    /(a(?<=b(?1)))/
2897    
2898    /(?<=b(?1))xyz/
2899    
2900    /(?<=b(?1))xyz(b+)pqrstuvew/
2901    
2902    /(a|bc)\1/SI
2903    
2904    /(a|bc)\1{2,3}/SI
2905    
2906    /(a|bc)(?1)/SI
2907    
2908    /(a|b\1)(a|b\1)/SI
2909    
2910    /(a|b\1){2}/SI
2911    
2912    /(a|bbbb\1)(a|bbbb\1)/SI
2913    
2914    /(a|bbbb\1){2}/SI
2915    
2916    /^From +([^ ]+) +[a-zA-Z][a-zA-Z][a-zA-Z] +[a-zA-Z][a-zA-Z][a-zA-Z] +[0-9]?[0-9] +[0-9][0-9]:[0-9][0-9]/SI
2917    
2918    /  (?: [\040\t] |  \(
2919    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
2920    \)  )*                          # optional leading comment
2921    (?:    (?:
2922    [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
2923    (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
2924    |
2925    " (?:                      # opening quote...
2926    [^\\\x80-\xff\n\015"]                #   Anything except backslash and quote
2927    |                     #    or
2928    \\ [^\x80-\xff]           #   Escaped something (something != CR)
2929    )* "  # closing quote
2930    )                    # initial word
2931    (?:  (?: [\040\t] |  \(
2932    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
2933    \)  )*  \.  (?: [\040\t] |  \(
2934    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
2935    \)  )*   (?:
2936    [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
2937    (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
2938    |
2939    " (?:                      # opening quote...
2940    [^\\\x80-\xff\n\015"]                #   Anything except backslash and quote
2941    |                     #    or
2942    \\ [^\x80-\xff]           #   Escaped something (something != CR)
2943    )* "  # closing quote
2944    )  )* # further okay, if led by a period
2945    (?: [\040\t] |  \(
2946    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
2947    \)  )*  @  (?: [\040\t] |  \(
2948    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
2949    \)  )*    (?:
2950    [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
2951    (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
2952    |   \[                         # [
2953    (?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
2954    \]                        #           ]
2955    )                           # initial subdomain
2956    (?:                                  #
2957    (?: [\040\t] |  \(
2958    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
2959    \)  )*  \.                        # if led by a period...
2960    (?: [\040\t] |  \(
2961    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
2962    \)  )*   (?:
2963    [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
2964    (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
2965    |   \[                         # [
2966    (?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
2967    \]                        #           ]
2968    )                     #   ...further okay
2969    )*
2970    # address
2971    |                     #  or
2972    (?:
2973    [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
2974    (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
2975    |
2976    " (?:                      # opening quote...
2977    [^\\\x80-\xff\n\015"]                #   Anything except backslash and quote
2978    |                     #    or
2979    \\ [^\x80-\xff]           #   Escaped something (something != CR)
2980    )* "  # closing quote
2981    )             # one word, optionally followed by....
2982    (?:
2983    [^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037]  |  # atom and space parts, or...
2984    \(
2985    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
2986    \)       |  # comments, or...
2987    
2988    " (?:                      # opening quote...
2989    [^\\\x80-\xff\n\015"]                #   Anything except backslash and quote
2990    |                     #    or
2991    \\ [^\x80-\xff]           #   Escaped something (something != CR)
2992    )* "  # closing quote
2993    # quoted strings
2994    )*
2995    <  (?: [\040\t] |  \(
2996    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
2997    \)  )*                     # leading <
2998    (?:  @  (?: [\040\t] |  \(
2999    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
3000    \)  )*    (?:
3001    [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
3002    (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
3003    |   \[                         # [
3004    (?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
3005    \]                        #           ]
3006    )                           # initial subdomain
3007    (?:                                  #
3008    (?: [\040\t] |  \(
3009    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
3010    \)  )*  \.                        # if led by a period...
3011    (?: [\040\t] |  \(
3012    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
3013    \)  )*   (?:
3014    [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
3015    (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
3016    |   \[                         # [
3017    (?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
3018    \]                        #           ]
3019    )                     #   ...further okay
3020    )*
3021    
3022    (?:  (?: [\040\t] |  \(
3023    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
3024    \)  )*  ,  (?: [\040\t] |  \(
3025    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
3026    \)  )*  @  (?: [\040\t] |  \(
3027    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
3028    \)  )*    (?:
3029    [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
3030    (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
3031    |   \[                         # [
3032    (?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
3033    \]                        #           ]
3034    )                           # initial subdomain
3035    (?:                                  #
3036    (?: [\040\t] |  \(
3037    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
3038    \)  )*  \.                        # if led by a period...
3039    (?: [\040\t] |  \(
3040    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
3041    \)  )*   (?:
3042    [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
3043    (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
3044    |   \[                         # [
3045    (?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
3046    \]                        #           ]
3047    )                     #   ...further okay
3048    )*
3049    )* # further okay, if led by comma
3050    :                                # closing colon
3051    (?: [\040\t] |  \(
3052    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
3053    \)  )*  )? #       optional route
3054    (?:
3055    [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
3056    (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
3057    |
3058    " (?:                      # opening quote...
3059    [^\\\x80-\xff\n\015"]                #   Anything except backslash and quote
3060    |                     #    or
3061    \\ [^\x80-\xff]           #   Escaped something (something != CR)
3062    )* "  # closing quote
3063    )                    # initial word
3064    (?:  (?: [\040\t] |  \(
3065    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
3066    \)  )*  \.  (?: [\040\t] |  \(
3067    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
3068    \)  )*   (?:
3069    [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
3070    (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
3071    |
3072    " (?:                      # opening quote...
3073    [^\\\x80-\xff\n\015"]                #   Anything except backslash and quote
3074    |                     #    or
3075    \\ [^\x80-\xff]           #   Escaped something (something != CR)
3076    )* "  # closing quote
3077    )  )* # further okay, if led by a period
3078    (?: [\040\t] |  \(
3079    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
3080    \)  )*  @  (?: [\040\t] |  \(
3081    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
3082    \)  )*    (?:
3083    [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
3084    (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
3085    |   \[                         # [
3086    (?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
3087    \]                        #           ]
3088    )                           # initial subdomain
3089    (?:                                  #
3090    (?: [\040\t] |  \(
3091    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
3092    \)  )*  \.                        # if led by a period...
3093    (?: [\040\t] |  \(
3094    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
3095    \)  )*   (?:
3096    [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
3097    (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
3098    |   \[                         # [
3099    (?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
3100    \]                        #           ]
3101    )                     #   ...further okay
3102    )*
3103    #       address spec
3104    (?: [\040\t] |  \(
3105    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
3106    \)  )*  > #                  trailing >
3107    # name and address
3108    )  (?: [\040\t] |  \(
3109    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
3110    \)  )*                       # optional trailing comment
3111    /xSI
3112    
3113    /<tr([\w\W\s\d][^<>]{0,})><TD([\w\W\s\d][^<>]{0,})>([\d]{0,}\.)(.*)((<BR>([\w\W\s\d][^<>]{0,})|[\s]{0,}))<\/a><\/TD><TD([\w\W\s\d][^<>]{0,})>([\w\W\s\d][^<>]{0,})<\/TD><TD([\w\W\s\d][^<>]{0,})>([\w\W\s\d][^<>]{0,})<\/TD><\/TR>/isIS
3114    
3115    "(?>.*/)foo"SI
3116    
3117    /(?(?=[^a-z]+[a-z])  \d{2}-[a-z]{3}-\d{2}  |  \d{2}-\d{2}-\d{2} ) /xSI
3118    
3119    /(?:(?:(?:(?:(?:(?:(?:(?:(?:(a|b|c))))))))))/iSI
3120    
3121    /(?:c|d)(?:)(?:aaaaaaaa(?:)(?:bbbbbbbb)(?:bbbbbbbb(?:))(?:bbbbbbbb(?:)(?:bbbbbbbb)))/SI
3122    
3123    /<a[\s]+href[\s]*=[\s]*          # find <a href=
3124     ([\"\'])?                       # find single or double quote
3125     (?(1) (.*?)\1 | ([^\s]+))       # if quote found, match up to next matching
3126                                     # quote, otherwise match up to next space
3127    /isxSI
3128    
3129    /^(?!:)                       # colon disallowed at start
3130      (?:                         # start of item
3131        (?: [0-9a-f]{1,4} |       # 1-4 hex digits or
3132        (?(1)0 | () ) )           # if null previously matched, fail; else null
3133        :                         # followed by colon
3134      ){1,7}                      # end item; 1-7 of them required
3135      [0-9a-f]{1,4} $             # final hex number at end of string
3136      (?(1)|.)                    # check that there was an empty component
3137      /xiIS
3138    
3139    /(?|(?<a>A)|(?<a>B))/I
3140        AB\Ca
3141        BA\Ca
3142    
3143    /(?|(?<a>A)|(?<b>B))/
3144    
3145    /(?:a(?<quote> (?<apostrophe>')|(?<realquote>")) |
3146        b(?<quote> (?<apostrophe>')|(?<realquote>")) )
3147        (?('quote')[a-z]+|[0-9]+)/JIx
3148        a"aaaaa
3149        b"aaaaa
3150        ** Failers
3151        b"11111
3152        a"11111
3153    
3154    /^(?|(a)(b)(c)(?<D>d)|(?<D>e)) (?('D')X|Y)/JDZx
3155        abcdX
3156        eX
3157        ** Failers
3158        abcdY
3159        ey
3160    
3161    /(?<A>a) (b)(c)  (?<A>d  (?(R&A)$ | (?4)) )/JDZx
3162        abcdd
3163        ** Failers
3164        abcdde
3165    
3166    /abcd*/
3167        xxxxabcd\P
3168        xxxxabcd\P\P
3169    
3170    /abcd*/i
3171        xxxxabcd\P
3172        xxxxabcd\P\P
3173        XXXXABCD\P
3174        XXXXABCD\P\P
3175    
3176    /abc\d*/
3177        xxxxabc1\P
3178        xxxxabc1\P\P
3179    
3180    /(a)bc\1*/
3181        xxxxabca\P
3182        xxxxabca\P\P
3183    
3184    /abc[de]*/
3185        xxxxabcde\P
3186        xxxxabcde\P\P
3187    
3188    /-- This is not in the Perl >= 5.10 test because Perl seems currently to be
3189        broken and not behaving as specified in that it *does* bumpalong after
3190        hitting (*COMMIT). --/
3191    
3192    /(?1)(A(*COMMIT)|B)D/
3193        ABD
3194        XABD
3195        BAD
3196        ABXABD
3197        ** Failers
3198        ABX
3199        BAXBAD
3200    
3201    /(\3)(\1)(a)/<JS>
3202        cat
3203    
3204    /(\3)(\1)(a)/SI<JS>
3205        cat
3206    
3207    /(\3)(\1)(a)/SI
3208        cat
3209    
3210    /i(?(DEFINE)(?<s>a))/SI
3211        i
3212    
3213    /()i(?(1)a)/SI
3214        ia
3215    
3216    /(?i)a(?-i)b|c/BZ
3217        XabX
3218        XAbX
3219        CcC
3220        ** Failers
3221        XABX
3222    
3223    /(?i)a(?s)b|c/BZ
3224    
3225    /(?i)a(?s-i)b|c/BZ
3226    
3227    /^(ab(c\1)d|x){2}$/BZ
3228        xabcxd
3229    
3230    /^(?&t)*+(?(DEFINE)(?<t>.))$/BZ
3231    
3232    /^(?&t)*(?(DEFINE)(?<t>.))$/BZ
3233    
3234    / -- The first four of these are not in the Perl >= 5.10 test because Perl
3235         documents that the use of \K in assertions is "not well defined". The
3236         last is here because Perl gives the match as "b" rather than "ab". I
3237         believe this to be a Perl bug. --/
3238    
3239    /(?=a\Kb)ab/
3240        ab
3241    
3242    /(?!a\Kb)ac/
3243        ac
3244    
3245    /^abc(?<=b\Kc)d/
3246        abcd
3247    
3248    /^abc(?<!b\Kq)d/
3249        abcd
3250    
3251    /(?>a\Kb)z|(ab)/
3252        ab
3253    
3254    /----------------------/
3255    
3256    /(?P<L1>(?P<L2>0|)|(?P>L2)(?P>L1))/
3257    
3258    /abc(*MARK:)pqr/
3259    
3260    /abc(*:)pqr/
3261    
3262    /abc(*FAIL:123)xyz/
3263    
3264    /--- This should, and does, fail. In Perl, it does not, which I think is a
3265         bug because replacing the B in the pattern by (B|D) does make it fail. ---/
3266    
3267    /A(*COMMIT)B/+K
3268        ACABX
3269    
3270    /--- These should be different, but in Perl 5.11 are not, which I think
3271         is a bug in Perl. ---/
3272    
3273    /A(*THEN)B|A(*THEN)C/K
3274        AC
3275    
3276    /A(*PRUNE)B|A(*PRUNE)C/K
3277        AC
3278    
3279    /--- A whole lot of tests of verbs with arguments are here rather than in test
3280         11 because Perl doesn't seem to follow its specification entirely
3281         correctly. ---/
3282    
3283    /--- Perl 5.11 sets $REGERROR on the AC failure case here; PCRE does not. It is
3284         not clear how Perl defines "involved in the failure of the match". ---/
3285    
3286    /^(A(*THEN:A)B|C(*THEN:B)D)/K
3287        AB
3288        CD
3289        ** Failers
3290        AC
3291        CB
3292    
3293    /--- Check the use of names for success and failure. PCRE doesn't show these
3294    names for success, though Perl does, contrary to its spec. ---/
3295    
3296    /^(A(*PRUNE:A)B|C(*PRUNE:B)D)/K
3297        AB
3298        CD
3299        ** Failers
3300        AC
3301        CB
3302    
3303    /--- An empty name does not pass back an empty string. It is the same as if no
3304    name were given. ---/
3305    
3306    /^(A(*PRUNE:)B|C(*PRUNE:B)D)/K
3307        AB
3308        CD
3309    
3310    /--- PRUNE goes to next bumpalong; COMMIT does not. ---/
3311    
3312    /A(*PRUNE:A)B/K
3313        ACAB
3314    
3315    /(*MARK:A)(*PRUNE:B)(C|X)/K
3316        C
3317        D
3318    
3319    /(*MARK:A)(*THEN:B)(C|X)/K
3320        C
3321        D
3322    
3323    /--- This should fail, as the skip causes a bump to offset 3 (the skip) ---/
3324    
3325    /A(*MARK:A)A+(*SKIP)(B|Z) | AC/xK
3326        AAAC
3327    
3328    /--- Same --/
3329    
3330    /A(*MARK:A)A+(*MARK:B)(*SKIP:B)(B|Z) | AC/xK
3331        AAAC
3332    
3333    /--- This should fail; the SKIP advances by one, but when we get to AC, the
3334         PRUNE kills it. ---/
3335    
3336    /A(*PRUNE:A)A+(*SKIP:A)(B|Z) | AC/xK
3337        AAAC
3338    
3339    /A(*:A)A+(*SKIP)(B|Z) | AC/xK
3340        AAAC
3341    
3342    /--- This should fail, as a null name is the same as no name ---/
3343    
3344    /A(*MARK:A)A+(*SKIP:)(B|Z) | AC/xK
3345        AAAC
3346    
3347    /--- This fails in PCRE, and I think that is in accordance with Perl's
3348         documentation, though in Perl it succeeds. ---/
3349    
3350    /A(*MARK:A)A+(*SKIP:B)(B|Z) | AAC/xK
3351        AAAC
3352    
3353    /--- Mark names can be duplicated ---/
3354    
3355    /A(*:A)B|X(*:A)Y/K
3356        AABC
3357        XXYZ
3358    
3359    /^A(*:A)B|^X(*:A)Y/K
3360        ** Failers
3361        XAQQ
3362    
3363    /--- A check on what happens after hitting a mark and them bumping along to
3364    something that does not even start. Perl reports tags after the failures here,
3365    though it does not when the individual letters are made into something
3366    more complicated. ---/
3367    
3368    /A(*:A)B|XX(*:B)Y/K
3369        AABC
3370        XXYZ
3371        ** Failers
3372        XAQQ
3373        XAQQXZZ
3374        AXQQQ
3375        AXXQQQ
3376    
3377    /--- COMMIT at the start of a pattern should be the same as an anchor. Perl
3378    optimizations defeat this. So does the PCRE optimization unless we disable it
3379    with \Y. ---/
3380    
3381    /(*COMMIT)ABC/
3382        ABCDEFG
3383        ** Failers
3384        DEFGABC\Y
3385    
3386    /--- Repeat some tests with added studying. ---/
3387    
3388    /A(*COMMIT)B/+KS
3389        ACABX
3390    
3391    /A(*THEN)B|A(*THEN)C/KS
3392        AC
3393    
3394    /A(*PRUNE)B|A(*PRUNE)C/KS
3395        AC
3396    
3397    /^(A(*THEN:A)B|C(*THEN:B)D)/KS
3398        AB
3399        CD
3400        ** Failers
3401        AC
3402        CB
3403    
3404    /^(A(*PRUNE:A)B|C(*PRUNE:B)D)/KS
3405        AB
3406        CD
3407        ** Failers
3408        AC
3409        CB
3410    
3411    /^(A(*PRUNE:)B|C(*PRUNE:B)D)/KS
3412        AB
3413        CD
3414    
3415    /A(*PRUNE:A)B/KS
3416        ACAB
3417    
3418    /(*MARK:A)(*PRUNE:B)(C|X)/KS
3419        C
3420        D
3421    
3422    /(*MARK:A)(*THEN:B)(C|X)/KS
3423        C
3424        D
3425    
3426    /A(*MARK:A)A+(*SKIP)(B|Z) | AC/xKS
3427        AAAC
3428    
3429    /A(*MARK:A)A+(*MARK:B)(*SKIP:B)(B|Z) | AC/xKS
3430        AAAC
3431    
3432    /A(*PRUNE:A)A+(*SKIP:A)(B|Z) | AC/xKS
3433        AAAC
3434    
3435    /A(*:A)A+(*SKIP)(B|Z) | AC/xKS
3436        AAAC
3437    
3438    /A(*MARK:A)A+(*SKIP:)(B|Z) | AC/xKS
3439        AAAC
3440    
3441    /A(*MARK:A)A+(*SKIP:B)(B|Z) | AAC/xKS
3442        AAAC
3443    
3444    /A(*:A)B|XX(*:B)Y/KS
3445        AABC
3446        XXYZ
3447        ** Failers
3448        XAQQ
3449        XAQQXZZ
3450        AXQQQ
3451        AXXQQQ
3452    
3453    /(*COMMIT)ABC/
3454        ABCDEFG
3455        ** Failers
3456        DEFGABC\Y
3457    
3458    /^(ab (c+(*THEN)cd) | xyz)/x
3459        abcccd
3460    
3461    /^(ab (c+(*PRUNE)cd) | xyz)/x
3462        abcccd
3463    
3464    /^(ab (c+(*FAIL)cd) | xyz)/x
3465        abcccd
3466    
3467    /--- Perl 5.11 gets some of these wrong ---/
3468    
3469    /(?>.(*ACCEPT))*?5/
3470        abcde
3471    
3472    /(.(*ACCEPT))*?5/
3473        abcde
3474    
3475    /(.(*ACCEPT))5/
3476        abcde
3477    
3478    /(.(*ACCEPT))*5/
3479        abcde
3480    
3481    /A\NB./BZ
3482        ACBD
3483        *** Failers
3484        A\nB
3485        ACB\n
3486    
3487    /A\NB./sBZ
3488        ACBD
3489        ACB\n
3490        *** Failers
3491        A\nB
3492    
3493    /A\NB/<crlf>
3494        A\nB
3495        A\rB
3496        ** Failers
3497        A\r\nB
3498    
3499    /\R+b/BZ
3500    
3501    /\R+\n/BZ
3502    
3503    /\R+\d/BZ
3504    
3505    /\d*\R/BZ
3506    
3507    /\s*\R/BZ
3508    
3509    /-- Perl treats this one differently, not failing the second string. I believe
3510        that is a bug in Perl. --/
3511    
3512    /^((abc|abcx)(*THEN)y|abcd)/
3513        abcd
3514        *** Failers
3515        abcxy
3516    
3517    /(?<=abc)def/
3518        abc\P\P
3519    
3520    /abc$/
3521        abc
3522        abc\P
3523        abc\P\P
3524    
3525    /abc$/m
3526        abc
3527        abc\n
3528        abc\P\P
3529        abc\n\P\P
3530        abc\P
3531        abc\n\P
3532    
3533    /abc\z/
3534        abc
3535        abc\P
3536        abc\P\P
3537    
3538    /abc\Z/
3539        abc
3540        abc\P
3541        abc\P\P
3542    
3543    /abc\b/
3544        abc
3545        abc\P
3546        abc\P\P
3547    
3548    /abc\B/
3549        abc
3550        abc\P
3551        abc\P\P
3552    
3553    /.+/
3554        abc\>0
3555        abc\>1
3556        abc\>2
3557        abc\>3
3558        abc\>4
3559        abc\>-4
3560    
3561    /^\cģ/
3562    
3563    /(?P<abn>(?P=abn)xxx)/BZ
3564    
3565    /(a\1z)/BZ
3566    
3567    /(?P<abn>(?P=abn)(?<badstufxxx)/BZ
3568    
3569    /(?P<abn>(?P=axn)xxx)/BZ
3570    
3571    /(?P<abn>(?P=axn)xxx)(?<axn>yy)/BZ
3572    
3573    /-- These tests are here because Perl gets the first one wrong. --/
3574    
3575    /(\R*)(.)/s
3576        \r\n
3577        \r\r\n\n\r
3578        \r\r\n\n\r\n
3579    
3580    /(\R)*(.)/s
3581        \r\n
3582        \r\r\n\n\r
3583        \r\r\n\n\r\n
3584    
3585    /((?>\r\n|\n|\x0b|\f|\r|\x85)*)(.)/s
3586        \r\n
3587        \r\r\n\n\r
3588        \r\r\n\n\r\n
3589    
3590    /-- --/
3591    
3592    /^abc$/BZ
3593    
3594    /^abc$/BZm
3595    
3596    /^(a)*+(\w)/S
3597        aaaaX
3598        ** Failers
3599        aaaa
3600    
3601    /^(?:a)*+(\w)/S
3602        aaaaX
3603        ** Failers
3604        aaaa
3605    
3606    /(a)++1234/SDZ
3607    
3608    /([abc])++1234/SI
3609    
3610    /(?<=(abc)+)X/
3611    
3612    /(^ab)/I
3613    
3614    /(^ab)++/I
3615    
3616    /(^ab|^)+/I
3617    
3618    /(^ab|^)++/I
3619    
3620    /(?:^ab)/I
3621    
3622    /(?:^ab)++/I
3623    
3624    /(?:^ab|^)+/I
3625    
3626    /(?:^ab|^)++/I
3627    
3628    /(.*ab)/I
3629    
3630    /(.*ab)++/I
3631    
3632    /(.*ab|.*)+/I
3633    
3634    /(.*ab|.*)++/I
3635    
3636    /(?:.*ab)/I
3637    
3638    /(?:.*ab)++/I
3639    
3640    /(?:.*ab|.*)+/I
3641    
3642    /(?:.*ab|.*)++/I
3643    
3644    /(?=a)[bcd]/I
3645    
3646    /((?=a))[bcd]/I
3647    
3648    /((?=a))+[bcd]/I
3649    
3650    /((?=a))++[bcd]/I
3651    
3652    /(?=a+)[bcd]/iI
3653    
3654    /(?=a+?)[bcd]/iI
3655    
3656    /(?=a++)[bcd]/iI
3657    
3658    /(?=a{3})[bcd]/iI
3659    
3660    /(abc)\1+/S
3661    
3662    /-- Perl doesn't get these right IMO (the 3rd is PCRE-specific) --/
3663    
3664    /(?1)(?:(b(*ACCEPT))){0}/
3665        b
3666    
3667    /(?1)(?:(b(*ACCEPT))){0}c/
3668        bc
3669        ** Failers
3670        b
3671    
3672    /(?1)(?:((*ACCEPT))){0}c/
3673        c
3674        c\N
3675    
3676    /-- --/
3677    
3678    /-- End of testinput2 --/

Legend:
Removed from v.448  
changed lines
  Added in v.608

  ViewVC Help
Powered by ViewVC 1.1.5