/[pcre]/code/trunk/pcre_jit_test.c
ViewVC logotype

Diff of /code/trunk/pcre_jit_test.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 835 by zherczeg, Mon Oct 31 09:31:46 2011 UTC revision 836 by ph10, Wed Dec 28 17:16:11 2011 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                    Main Library written by Philip Hazel                    Main Library written by Philip Hazel
9             Copyright (c) 1997-2011 University of Cambridge             Copyright (c) 1997-2012 University of Cambridge
10    
11    This JIT compiler regression test program was written by Zoltan Herczeg    This JIT compiler regression test program was written by Zoltan Herczeg
12                        Copyright (c) 2010-2011                        Copyright (c) 2010-2012
13    
14  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
15  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 51  POSSIBILITY OF SUCH DAMAGE. Line 51  POSSIBILITY OF SUCH DAMAGE.
51  #define PCRE_BUG 0x80000000  #define PCRE_BUG 0x80000000
52    
53  /*  /*
54   Hungarian utf8 characters   Letter characters:
55   \xc3\xa9 = 0xe9 = 233 (e') \xc3\x89 = 0xc9 = 201 (E')     \xe6\x92\xad = 0x64ad = 25773 (kanji)
56   \xc3\xa1 = 0xe1 = 225 (a') \xc3\x81 = 0xc1 = 193 (A')   Non-letter characters:
57   \xe6\x92\xad = 0x64ad = 25773 (a valid kanji)     \xc2\xa1 = 0xa1 =  (Inverted Exclamation Mark)
58   \xc2\x85 = 0x85 (NExt Line = NEL)     \xf3\xa9\xb7\x80 = 0xe9dc0 = 957888
59   \xc2\xa1 = 0xa1 (Inverted Exclamation Mark)     \xed\xa0\x80 = 55296 = 0xd800 (Invalid UTF character)
60   \xe2\x80\xa8 = 0x2028 (Line Separator)     \xed\xb0\x80 = 56320 = 0xdc00 (Invalid UTF character)
61   \xc8\xba = 570 \xe2\xb1\xa5 = 11365 (lowercase length != uppercase length)   Newlines:
62   \xcc\x8d = 781 (Something with Mark property)     \xc2\x85 = 0x85 = 133 (NExt Line = NEL)
63       \xe2\x80\xa8 = 0x2028 = 8232 (Line Separator)
64     Othercase pairs:
65       \xc3\xa9 = 0xe9 = 233 (e')
66          \xc3\x89 = 0xc9 = 201 (E')
67       \xc3\xa1 = 0xe1 = 225 (a')
68          \xc3\x81 = 0xc1 = 193 (A')
69       \xc8\xba = 0x23a = 570
70          \xe2\xb1\xa5 = 0x2c65 = 11365
71       \xe1\xbd\xb8 = 0x1f78 = 8056
72          \xe1\xbf\xb8 = 0x1ff8 = 8184
73       \xf0\x90\x90\x80 = 0x10400 = 66560
74          \xf0\x90\x90\xa8 = 0x10428 = 66600
75     Mark property:
76       \xcc\x8d = 0x30d = 781
77     Special:
78       \xdf\xbf = 0x7ff = 2047 (highest 2 byte character)
79       \xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character)
80       \xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character)
81       \xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character)
82       \xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character)
83  */  */
84    
 static void setstack(pcre_extra *extra);  
85  static int regression_tests(void);  static int regression_tests(void);
86    
87  int main(void)  int main(void)
88  {  {
89          int jit = 0;          int jit = 0;
90    #ifdef SUPPORT_PCRE8
91          pcre_config(PCRE_CONFIG_JIT, &jit);          pcre_config(PCRE_CONFIG_JIT, &jit);
92    #else
93            pcre16_config(PCRE_CONFIG_JIT, &jit);
94    #endif
95          if (!jit) {          if (!jit) {
96                  printf("JIT must be enabled to run pcre_jit_test\n");                  printf("JIT must be enabled to run pcre_jit_test\n");
97                  return 1;                  return 1;
# Line 76  int main(void) Line 99  int main(void)
99          return regression_tests();          return regression_tests();
100  }  }
101    
 static pcre_jit_stack* callback(void *arg)  
 {  
         return (pcre_jit_stack *)arg;  
 }  
   
 static void setstack(pcre_extra *extra)  
 {  
         static pcre_jit_stack *stack;  
         if (stack) pcre_jit_stack_free(stack);  
         stack = pcre_jit_stack_alloc(1, 1024 * 1024);  
         pcre_assign_jit_stack(extra, callback, stack);  
 }  
   
102  /* --------------------------------------------------------------------------------------- */  /* --------------------------------------------------------------------------------------- */
103    
104  #define MUA     (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)  #if !(defined SUPPORT_PCRE8) && !(defined SUPPORT_PCRE16)
105  #define MUAP    (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)  #error SUPPORT_PCRE8 or SUPPORT_PCRE16 must be defined
106  #define CMUA    (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)  #endif
107  #define CMUAP   (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)  
108  #define MA      (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)  #define MUA     (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
109  #define MAP     (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)  #define MUAP    (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
110  #define CMA     (PCRE_CASELESS | PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)  #define CMUA    (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
111    #define CMUAP   (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
112    #define MA      (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
113    #define MAP     (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
114    #define CMA     (PCRE_CASELESS | PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
115    
116    #define OFFSET_MASK     0x00ffff
117    #define F_NO8           0x010000
118    #define F_NO16          0x020000
119    #define F_NOMATCH       0x040000
120    #define F_DIFF          0x080000
121    #define F_FORCECONV     0x100000
122    #define F_PROPERTY      0x200000
123    
124  struct regression_test_case {  struct regression_test_case {
125          int flags;          int flags;
# Line 124  static struct regression_test_case regre Line 146  static struct regression_test_case regre
146          { MUA, 0, "\\W(\\W)?\\w", "\n\n+bc" },          { MUA, 0, "\\W(\\W)?\\w", "\n\n+bc" },
147          { MUA, 0, "[axd]", "sAXd" },          { MUA, 0, "[axd]", "sAXd" },
148          { CMUA, 0, "[axd]", "sAXd" },          { CMUA, 0, "[axd]", "sAXd" },
149          { CMUA, 0, "[^axd]", "DxA" },          { CMUA, 0 | F_NOMATCH, "[^axd]", "DxA" },
150          { MUA, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },          { MUA, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
151          { MUA, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },          { MUA, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
152          { CMUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },          { CMUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
# Line 137  static struct regression_test_case regre Line 159  static struct regression_test_case regre
159          { PCRE_CASELESS, 0, "a1", "Aa1" },          { PCRE_CASELESS, 0, "a1", "Aa1" },
160          { MA, 0, "\\Ca", "cda" },          { MA, 0, "\\Ca", "cda" },
161          { CMA, 0, "\\Ca", "CDA" },          { CMA, 0, "\\Ca", "CDA" },
162          { MA, 0, "\\Cx", "cda" },          { MA, 0 | F_NOMATCH, "\\Cx", "cda" },
163          { CMA, 0, "\\Cx", "CDA" },          { CMA, 0 | F_NOMATCH, "\\Cx", "CDA" },
164            { CMUAP, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
165            { CMUAP, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
166            { CMUAP, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
167            { CMUAP, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
168    
169          /* Assertions. */          /* Assertions. */
170          { MUA, 0, "\\b[^A]", "A_B#" },          { MUA, 0, "\\b[^A]", "A_B#" },
171          { MA, 0, "\\b\\W", "\n*" },          { MA, 0 | F_NOMATCH, "\\b\\W", "\n*" },
172          { MUA, 0, "\\B[^,]\\b[^s]\\b", "#X" },          { MUA, 0, "\\B[^,]\\b[^s]\\b", "#X" },
173          { MAP, 0, "\\B", "_\xa1" },          { MAP, 0, "\\B", "_\xa1" },
174          { MAP, 0, "\\b_\\b[,A]\\B", "_," },          { MAP, 0, "\\b_\\b[,A]\\B", "_," },
# Line 150  static struct regression_test_case regre Line 176  static struct regression_test_case regre
176          { MUAP, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },          { MUAP, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
177          { MUAP, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },          { MUAP, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
178          { MUAP, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },          { MUAP, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
179          { MUA, 0, "\\b.", "\xcd\xbe" },          { MUA, 0 | F_NOMATCH, "\\b.", "\xcd\xbe" },
180          { MA, 0, "\\R^", "\n" },          { CMUAP, 0, "\\By", "\xf0\x90\x90\xa8y" },
181          { MA, 1, "^", "\n" },          { MA, 0 | F_NOMATCH, "\\R^", "\n" },
182            { MA, 1 | F_NOMATCH, "^", "\n" },
183          { 0, 0, "^ab", "ab" },          { 0, 0, "^ab", "ab" },
184          { 0, 0, "^ab", "aab" },          { 0, 0 | F_NOMATCH, "^ab", "aab" },
185          { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "^a", "\r\raa\n\naa\r\naa" },          { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "^a", "\r\raa\n\naa\r\naa" },
186          { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },          { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
187          { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^-", "a--b--\x85--" },          { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^-", "a--b--\x85--" },
188          { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xe2\x80\xa8--" },          { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xe2\x80\xa8--" },
189          { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xc2\x85--" },          { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xc2\x85--" },
190          { 0, 0, "ab$", "ab" },          { 0, 0, "ab$", "ab" },
191          { 0, 0, "ab$", "ab\r\n" },          { 0, 0 | F_NOMATCH, "ab$", "ab\r\n" },
192          { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "a$", "\r\raa\n\naa\r\naa" },          { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "a$", "\r\raa\n\naa\r\naa" },
193          { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aaa" },          { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aaa" },
194          { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "#$", "#\xc2\x85###\r#" },          { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "#$", "#\xc2\x85###\r#" },
195          { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "#$", "#\xe2\x80\xa9" },          { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "#$", "#\xe2\x80\xa9" },
196          { PCRE_NOTBOL | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },          { PCRE_NOTBOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "^a", "aa\naa" },
197          { PCRE_NOTBOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },          { PCRE_NOTBOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },
198          { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },          { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\naa" },
199          { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0, "a$", "aa\r\n" },          { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\r\n" },
200          { PCRE_UTF8 | PCRE_DOLLAR_ENDONLY | PCRE_NEWLINE_ANY, 0, "\\p{Any}{2,}$", "aa\r\n" },          { PCRE_UTF8 | PCRE_DOLLAR_ENDONLY | PCRE_NEWLINE_ANY, 0 | F_PROPERTY, "\\p{Any}{2,}$", "aa\r\n" },
201          { PCRE_NOTEOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },          { PCRE_NOTEOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },
202          { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa" },          { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa" },
203          { PCRE_NEWLINE_CR | PCRE_UTF8, 0, "a\\Z", "aaa\r" },          { PCRE_NEWLINE_CR | PCRE_UTF8, 0, "a\\Z", "aaa\r" },
# Line 190  static struct regression_test_case regre Line 217  static struct regression_test_case regre
217          { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xc2\x85" },          { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xc2\x85" },
218          { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },          { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
219          { MA, 0, "\\Aa", "aaa" },          { MA, 0, "\\Aa", "aaa" },
220          { MA, 1, "\\Aa", "aaa" },          { MA, 1 | F_NOMATCH, "\\Aa", "aaa" },
221          { MA, 1, "\\Ga", "aaa" },          { MA, 1, "\\Ga", "aaa" },
222          { MA, 1, "\\Ga", "aba" },          { MA, 1 | F_NOMATCH, "\\Ga", "aba" },
223          { MA, 0, "a\\z", "aaa" },          { MA, 0, "a\\z", "aaa" },
224          { MA, 0, "a\\z", "aab" },          { MA, 0 | F_NOMATCH, "a\\z", "aab" },
225    
226          /* Brackets. */          /* Brackets. */
227          { MUA, 0, "(ab|bb|cd)", "bacde" },          { MUA, 0, "(ab|bb|cd)", "bacde" },
# Line 267  static struct regression_test_case regre Line 294  static struct regression_test_case regre
294          { MUA, 0, "\\b\\w+\\B", "x,a_cd" },          { MUA, 0, "\\b\\w+\\B", "x,a_cd" },
295          { MUAP, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },          { MUAP, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
296          { CMUA, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },          { CMUA, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
297            { CMUAP, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" },
298            { CMUA, 0, "[^\xf0\x90\x90\x80]{2,4}@", "\xf0\x90\x90\xa8\xf0\x90\x90\x80###\xf0\x90\x90\x80@@@" },
299            { CMUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
300            { MUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
301            { MUA, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" },
302    
303          /* Basic character sets. */          /* Basic character sets. */
304          { MUA, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },          { MUA, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
# Line 278  static struct regression_test_case regre Line 310  static struct regression_test_case regre
310    
311          /* Unicode properties. */          /* Unicode properties. */
312          { MUAP, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },          { MUAP, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
313          { MUAP, 0, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },          { MUAP, 0 | F_PROPERTY, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
314          { MUAP, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },          { MUAP, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
315          { MUAP, 0, "[\\P{Any}]", "abc" },          { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}]", "abc" },
316          { MUAP, 0, "[^\\p{Any}]", "abc" },          { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}]", "abc" },
317          { MUAP, 0, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },          { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
318          { MUAP, 0, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },          { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
319          { MUAP, 0, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },          { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
320          { MUAP, 0, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },          { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
321          { MUAP, 0, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },          { MUAP, 0 | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
322          { MUAP, 0, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },          { MUAP, 0 | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
323          { MUAP, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },          { MUAP, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
324          { CMUAP, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },          { CMUAP, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
325          { MUAP, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },          { MUAP, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
326          { MUAP, 0, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },          { MUAP, 0 | F_PROPERTY, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
327          { MUA, 0, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },          { MUA, 0 | F_PROPERTY, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
328          { CMUAP, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },          { CMUAP, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
329          { MUAP, 0, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },          { MUAP, 0 | F_PROPERTY, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
330          { MUAP, 0, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },          { MUAP, 0 | F_PROPERTY, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
331          { PCRE_UCP, 0, "[a-b\\s]{2,5}[^a]", "AB  baaa" },          { PCRE_UCP, 0, "[a-b\\s]{2,5}[^a]", "AB  baaa" },
332    
333          /* Possible empty brackets. */          /* Possible empty brackets. */
# Line 312  static struct regression_test_case regre Line 344  static struct regression_test_case regre
344    
345          /* Start offset. */          /* Start offset. */
346          { MUA, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },          { MUA, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
347          { MUA, 4, "(\\w\\W\\w)+", "ab#d" },          { MUA, 4 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
348          { MUA, 2, "(\\w\\W\\w)+", "ab#d" },          { MUA, 2 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
349          { MUA, 1, "(\\w\\W\\w)+", "ab#d" },          { MUA, 1, "(\\w\\W\\w)+", "ab#d" },
350    
351          /* Newline. */          /* Newline. */
# Line 327  static struct regression_test_case regre Line 359  static struct regression_test_case regre
359          { PCRE_NEWLINE_ANYCRLF, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },          { PCRE_NEWLINE_ANYCRLF, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
360          { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },          { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
361          { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },          { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
362          { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.).", "\xe2\x80\xa8\nb\r" },          { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0 | F_NOMATCH, ".(.).", "\xe2\x80\xa8\nb\r" },
363          { PCRE_NEWLINE_ANY, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },          { PCRE_NEWLINE_ANY, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
364          { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },          { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
365          { PCRE_BSR_ANYCRLF, 0, "\\R", "\r" },          { PCRE_BSR_ANYCRLF, 0, "\\R", "\r" },
# Line 335  static struct regression_test_case regre Line 367  static struct regression_test_case regre
367          { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\xe2\x80\xa8#c" },          { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\xe2\x80\xa8#c" },
368          { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\r\nc" },          { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\r\nc" },
369          { PCRE_NEWLINE_CRLF | PCRE_BSR_UNICODE | PCRE_UTF8, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },          { PCRE_NEWLINE_CRLF | PCRE_BSR_UNICODE | PCRE_UTF8, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
370          { MUA, 0, "\\R+", "ab" },          { MUA, 0 | F_NOMATCH, "\\R+", "ab" },
371          { MUA, 0, "\\R+", "ab\r\n\r" },          { MUA, 0, "\\R+", "ab\r\n\r" },
372          { MUA, 0, "\\R*", "ab\r\n\r" },          { MUA, 0, "\\R*", "ab\r\n\r" },
373          { MUA, 0, "\\R*", "\r\n\r" },          { MUA, 0, "\\R*", "\r\n\r" },
# Line 343  static struct regression_test_case regre Line 375  static struct regression_test_case regre
375          { MUA, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },          { MUA, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
376          { MUA, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },          { MUA, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
377          { MUA, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },          { MUA, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
378          { MUA, 0, "\\R+\\R\\R", "\r\n\r\n" },          { MUA, 0 | F_NOMATCH, "\\R+\\R\\R", "\r\n\r\n" },
379          { MUA, 0, "\\R+\\R\\R", "\r\r\r" },          { MUA, 0, "\\R+\\R\\R", "\r\r\r" },
380          { MUA, 0, "\\R*\\R\\R", "\n\r" },          { MUA, 0, "\\R*\\R\\R", "\n\r" },
381          { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r" },          { MUA, 0 | F_NOMATCH, "\\R{2,4}\\R\\R", "\r\r\r" },
382          { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },          { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
383    
384          /* Atomic groups (no fallback from "next" direction). */          /* Atomic groups (no fallback from "next" direction). */
385          { MUA, 0, "(?>ab)ab", "bab" },          { MUA, 0 | F_NOMATCH, "(?>ab)ab", "bab" },
386          { MUA, 0, "(?>(ab))ab", "bab" },          { MUA, 0 | F_NOMATCH, "(?>(ab))ab", "bab" },
387          { MUA, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",          { MUA, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
388                          "bababcdedefgheijijklmlmnop" },                          "bababcdedefgheijijklmlmnop" },
389          { MUA, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },          { MUA, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
# Line 379  static struct regression_test_case regre Line 411  static struct regression_test_case regre
411          { CMA, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },          { CMA, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
412          { MUA, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },          { MUA, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
413          { MUA, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },          { MUA, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
414          { MUA, 0, "\\X", "\xcc\x8d\xcc\x8d" },          { MUA, 0 | F_NOMATCH | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d" },
415          { MUA, 0, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },          { MUA, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
416          { MUA, 0, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },          { MUA, 0 | F_PROPERTY, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
417          { MUA, 0, "\\X{2,4}", "abcdef" },          { MUA, 0 | F_PROPERTY, "\\X{2,4}", "abcdef" },
418          { MUA, 0, "\\X{2,4}?", "abcdef" },          { MUA, 0 | F_PROPERTY, "\\X{2,4}?", "abcdef" },
419          { MUA, 0, "\\X{2,4}..", "#\xcc\x8d##" },          { MUA, 0 | F_NOMATCH | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d##" },
420          { MUA, 0, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },          { MUA, 0 | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
421          { MUA, 0, "(c(ab)?+ab)+", "cabcababcab" },          { MUA, 0, "(c(ab)?+ab)+", "cabcababcab" },
422          { MUA, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },          { MUA, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
423    
# Line 420  static struct regression_test_case regre Line 452  static struct regression_test_case regre
452          { MUA, 0, "((b*))++m", "bxbbxbbbxbbm" },          { MUA, 0, "((b*))++m", "bxbbxbbbxbbm" },
453          { MUA, 0, "((b*))*+m", "bxbbxbbbxm" },          { MUA, 0, "((b*))*+m", "bxbbxbbbxm" },
454          { MUA, 0, "((b*))*+m", "bxbbxbbbxbbm" },          { MUA, 0, "((b*))*+m", "bxbbxbbbxbbm" },
455          { MUA, 0, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },          { MUA, 0 | F_NOMATCH, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
456          { MUA, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },          { MUA, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
457          { MUA, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },          { MUA, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
458          { MUA, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },          { MUA, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
# Line 444  static struct regression_test_case regre Line 476  static struct regression_test_case regre
476          { MUA, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },          { MUA, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
477          { MUA, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },          { MUA, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
478          { MA, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },          { MA, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
479          { MUAP, 0, "(\\P{N})\\1{2,}", ".www." },          { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
480          { MUAP, 0, "(\\P{N})\\1{0,2}", "wwwww." },          { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{0,2}", "wwwww." },
481          { MUAP, 0, "(\\P{N})\\1{1,2}ww", "wwww" },          { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwww" },
482          { MUAP, 0, "(\\P{N})\\1{1,2}ww", "wwwww" },          { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwwww" },
483          { PCRE_UCP, 0, "(\\P{N})\\1{2,}", ".www." },          { PCRE_UCP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
484            { CMUAP, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" },
485    
486          /* Assertions. */          /* Assertions. */
487          { MUA, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },          { MUA, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
# Line 464  static struct regression_test_case regre Line 497  static struct regression_test_case regre
497          { MUA, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },          { MUA, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
498          { MUA, 0, "((?(?=(a))a)+k)", "bbak" },          { MUA, 0, "((?(?=(a))a)+k)", "bbak" },
499          { MUA, 0, "((?(?=a)a)+k)", "bbak" },          { MUA, 0, "((?(?=a)a)+k)", "bbak" },
500          { MUA, 0, "(?=(?>(a))m)amk", "a k" },          { MUA, 0 | F_NOMATCH, "(?=(?>(a))m)amk", "a k" },
501          { MUA, 0, "(?!(?>(a))m)amk", "a k" },          { MUA, 0 | F_NOMATCH, "(?!(?>(a))m)amk", "a k" },
502          { MUA, 0, "(?>(?=(a))am)amk", "a k" },          { MUA, 0 | F_NOMATCH, "(?>(?=(a))am)amk", "a k" },
503          { MUA, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },          { MUA, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
504          { MUA, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },          { MUA, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
505          { MUA, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },          { MUA, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
# Line 477  static struct regression_test_case regre Line 510  static struct regression_test_case regre
510          { MUA, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },          { MUA, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
511    
512          /* Not empty, ACCEPT, FAIL */          /* Not empty, ACCEPT, FAIL */
513          { MUA | PCRE_NOTEMPTY, 0, "a*", "bcx" },          { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*", "bcx" },
514          { MUA | PCRE_NOTEMPTY, 0, "a*", "bcaad" },          { MUA | PCRE_NOTEMPTY, 0, "a*", "bcaad" },
515          { MUA | PCRE_NOTEMPTY, 0, "a*?", "bcaad" },          { MUA | PCRE_NOTEMPTY, 0, "a*?", "bcaad" },
516          { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },          { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
517          { MUA, 0, "a(*ACCEPT)b", "ab" },          { MUA, 0, "a(*ACCEPT)b", "ab" },
518          { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcx" },          { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*(*ACCEPT)b", "bcx" },
519          { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },          { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
520          { MUA | PCRE_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },          { MUA | PCRE_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
521          { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcx" },          { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "(?:z|a*(*ACCEPT)b)", "bcx" },
522          { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },          { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
523          { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },          { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
524          { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },          { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
525          { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "" },          { MUA | PCRE_NOTEMPTY_ATSTART, 0 | F_NOMATCH, "a*(*ACCEPT)b", "" },
526          { MUA, 0, "((a(*ACCEPT)b))", "ab" },          { MUA, 0, "((a(*ACCEPT)b))", "ab" },
527          { MUA, 0, "(a(*FAIL)a|a)", "aaa" },          { MUA, 0, "(a(*FAIL)a|a)", "aaa" },
528          { MUA, 0, "(?=ab(*ACCEPT)b)a", "ab" },          { MUA, 0, "(?=ab(*ACCEPT)b)a", "ab" },
# Line 506  static struct regression_test_case regre Line 539  static struct regression_test_case regre
539          { MUA, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },          { MUA, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
540          { MUA, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },          { MUA, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
541          { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },          { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
542          { MUA | PCRE_BUG, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },          { MUA, 0 | F_DIFF, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
543          { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },          { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
544          { MUA, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },          { MUA, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
545          { MUA, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },          { MUA, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
# Line 520  static struct regression_test_case regre Line 553  static struct regression_test_case regre
553          { MUA, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },          { MUA, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
554          { MUA, 0, "(c)?\?(?(1)a|b)", "cdcaa" },          { MUA, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
555          { MUA, 0, "(c)?\?(?(1)a|b)", "cbb" },          { MUA, 0, "(c)?\?(?(1)a|b)", "cbb" },
556          { MUA | PCRE_BUG, 0, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },          { MUA, 0 | F_DIFF, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
557          { MUA, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },          { MUA, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
558          { MUA, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },          { MUA, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
559          { MUA, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },          { MUA, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
560          { MUA | PCRE_BUG, 0, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },          { MUA, 0 | F_DIFF, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
561          { MUA, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },          { MUA, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
562          { MUA, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },          { MUA, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
563          { MUA, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },          { MUA, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
# Line 537  static struct regression_test_case regre Line 570  static struct regression_test_case regre
570          { MUA, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },          { MUA, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
571          { MUA, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },          { MUA, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
572          { MUA, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },          { MUA, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
573          { MUA | PCRE_NOTEMPTY, 0, "a\\K(*ACCEPT)b", "aa" },          { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a\\K(*ACCEPT)b", "aa" },
574          { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },          { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
575    
576          /* First line. */          /* First line. */
577          { MUA | PCRE_FIRSTLINE, 0, "\\p{Any}a", "bb\naaa" },          { MUA | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}a", "bb\naaa" },
578          { MUA | PCRE_FIRSTLINE, 0, "\\p{Any}a", "bb\r\naaa" },          { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}a", "bb\r\naaa" },
579          { MUA | PCRE_FIRSTLINE, 0, "(?<=a)", "a" },          { MUA | PCRE_FIRSTLINE, 0, "(?<=a)", "a" },
580          { MUA | PCRE_FIRSTLINE, 0, "[^a][^b]", "ab" },          { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[^a][^b]", "ab" },
581          { MUA | PCRE_FIRSTLINE, 0, "a", "\na" },          { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "a", "\na" },
582          { MUA | PCRE_FIRSTLINE, 0, "[abc]", "\na" },          { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[abc]", "\na" },
583          { MUA | PCRE_FIRSTLINE, 0, "^a", "\na" },          { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^a", "\na" },
584          { MUA | PCRE_FIRSTLINE, 0, "^(?<=\n)", "\na" },          { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^(?<=\n)", "\na" },
585          { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0, "#", "\xc2\x85#" },          { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\xc2\x85#" },
586          { PCRE_MULTILINE | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0, "#", "\x85#" },          { PCRE_MULTILINE | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\x85#" },
587          { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0, "^#", "\xe2\x80\xa8#" },          { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^#", "\xe2\x80\xa8#" },
588          { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "\\p{Any}", "\r\na" },          { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}", "\r\na" },
589          { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, ".", "\r" },          { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, ".", "\r" },
590          { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "a", "\ra" },          { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "a", "\ra" },
591          { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "ba", "bbb\r\nba" },          { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH, "ba", "bbb\r\nba" },
592          { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "\\p{Any}{4}|a", "\r\na" },          { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}{4}|a", "\r\na" },
593          { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 1, ".", "\r\n" },          { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 1, ".", "\r\n" },
594    
595          /* Recurse. */          /* Recurse. */
# Line 564  static struct regression_test_case regre Line 597  static struct regression_test_case regre
597          { MUA, 0, "((a))(?1)", "aa" },          { MUA, 0, "((a))(?1)", "aa" },
598          { MUA, 0, "(b|a)(?1)", "aa" },          { MUA, 0, "(b|a)(?1)", "aa" },
599          { MUA, 0, "(b|(a))(?1)", "aa" },          { MUA, 0, "(b|(a))(?1)", "aa" },
600          { MUA, 0, "((a)(b)(?:a*))(?1)", "aba" },          { MUA, 0 | F_NOMATCH, "((a)(b)(?:a*))(?1)", "aba" },
601          { MUA, 0, "((a)(b)(?:a*))(?1)", "abab" },          { MUA, 0, "((a)(b)(?:a*))(?1)", "abab" },
602          { MUA, 0, "((a+)c(?2))b(?1)", "aacaabaca" },          { MUA, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
603          { MUA, 0, "((?2)b|(a)){2}(?1)", "aabab" },          { MUA, 0, "((?2)b|(a)){2}(?1)", "aabab" },
# Line 572  static struct regression_test_case regre Line 605  static struct regression_test_case regre
605          { MUA, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },          { MUA, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
606          { MUA, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },          { MUA, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
607          { MUA, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },          { MUA, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
608          { MUA, 0, "(?(DEFINE)(aa|a))(?1)ab", "aab" },          { MUA, 0 | F_NOMATCH, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
609          { MUA, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },          { MUA, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
610          { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },          { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
611          { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababxc" },          { MUA, 0 | F_NOMATCH, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
612          { MUA, 0, "b|<(?R)*>", "<<b>" },          { MUA, 0, "b|<(?R)*>", "<<b>" },
613          { MUA, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },          { MUA, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
614          { MUA, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },          { MUA, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
# Line 586  static struct regression_test_case regre Line 619  static struct regression_test_case regre
619          { MUA, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" },          { MUA, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" },
620          { MUA, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" },          { MUA, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" },
621    
622            /* 16 bit specific tests. */
623            { CMA, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" },
624            { CMA, 0 | F_FORCECONV, "\xe1\xbd\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
625            { CMA, 0 | F_FORCECONV, "[\xc3\xa1]", "\xc3\x81\xc3\xa1" },
626            { CMA, 0 | F_FORCECONV, "[\xe1\xbd\xb8]", "\xe1\xbf\xb8\xe1\xbd\xb8" },
627            { CMA, 0 | F_FORCECONV, "[a-\xed\xb0\x80]", "A" },
628            { CMA, 0 | F_NO8 | F_FORCECONV, "[a-\\x{dc00}]", "B" },
629            { CMA, 0 | F_NO8 | F_NOMATCH | F_FORCECONV, "[b-\\x{dc00}]", "a" },
630            { CMA, 0 | F_NO8 | F_FORCECONV, "\xed\xa0\x80\\x{d800}\xed\xb0\x80\\x{dc00}", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80" },
631            { CMA, 0 | F_NO8 | F_FORCECONV, "[\xed\xa0\x80\\x{d800}]{1,2}?[\xed\xb0\x80\\x{dc00}]{1,2}?#", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80#" },
632            { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80\xed\xb0\x80#]{0,3}(?<=\xed\xb0\x80.)", "\xed\xa0\x80#\xed\xa0\x80##\xed\xb0\x80\xed\xa0\x80" },
633            { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\x9f\xbf\xed\xa0\x83" },
634            { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\xb4\x80\xed\xb3\xb0" },
635            { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" },
636            { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" },
637            { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" },
638            { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80][\xed\xb0\x80]{2,}", "\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" },
639            { MA, 0 | F_FORCECONV, "[^\xed\xb0\x80]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
640            { MA, 0 | F_NO8 | F_FORCECONV, "[^\\x{dc00}]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
641            { CMA, 0 | F_FORCECONV, ".\\B.", "\xed\xa0\x80\xed\xb0\x80" },
642            { CMA, 0 | F_FORCECONV, "\\D+(?:\\d+|.)\\S+(?:\\s+|.)\\W+(?:\\w+|.)\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80" },
643            { CMA, 0 | F_FORCECONV, "\\d*\\s*\\w*\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80" },
644            { CMA, 0 | F_FORCECONV | F_NOMATCH, "\\d*?\\D*?\\s*?\\S*?\\w*?\\W*?##", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80#" },
645            { CMA | PCRE_EXTENDED, 0 | F_FORCECONV, "\xed\xa0\x80 \xed\xb0\x80 !", "\xed\xa0\x80\xed\xb0\x80!" },
646            { CMA, 0 | F_FORCECONV, "\xed\xa0\x80+#[^#]+\xed\xa0\x80", "\xed\xa0\x80#a\xed\xa0\x80" },
647            { CMA, 0 | F_FORCECONV, "(\xed\xa0\x80+)#\\1", "\xed\xa0\x80\xed\xa0\x80#\xed\xa0\x80\xed\xa0\x80" },
648            { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0 | F_NO8 | F_FORCECONV, "^-", "a--\xe2\x80\xa8--" },
649            { PCRE_BSR_UNICODE, 0 | F_NO8 | F_FORCECONV, "\\R", "ab\xe2\x80\xa8" },
650            { 0, 0 | F_NO8 | F_FORCECONV, "\\v", "ab\xe2\x80\xa9" },
651            { 0, 0 | F_NO8 | F_FORCECONV, "\\h", "ab\xe1\xa0\x8e" },
652            { 0, 0 | F_NO8 | F_FORCECONV, "\\v+?\\V+?#", "\xe2\x80\xa9\xe2\x80\xa9\xef\xbf\xbf\xef\xbf\xbf#" },
653            { 0, 0 | F_NO8 | F_FORCECONV, "\\h+?\\H+?#", "\xe1\xa0\x8e\xe1\xa0\x8e\xef\xbf\xbf\xef\xbf\xbf#" },
654    
655          /* Deep recursion. */          /* Deep recursion. */
656          { MUA, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },          { MUA, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
657          { MUA, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },          { MUA, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
658          { MUA, 0, "((a?)+)+b", "aaaaaaaaaaaaa b" },          { MUA, 0, "((a?)+)+b", "aaaaaaaaaaaaa b" },
659    
660          /* Deep recursion: Stack limit reached. */          /* Deep recursion: Stack limit reached. */
661          { MA, 0, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },          { MA, 0 | F_NOMATCH, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
662          { MA, 0, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },          { MA, 0 | F_NOMATCH, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
663          { MA, 0, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },          { MA, 0 | F_NOMATCH, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
664          { MA, 0, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },          { MA, 0 | F_NOMATCH, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
665          { MA, 0, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },          { MA, 0 | F_NOMATCH, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
666    
667          { 0, 0, NULL, NULL }          { 0, 0, NULL, NULL }
668  };  };
669    
670    static const unsigned char *tables(int mode)
671    {
672            /* The purpose of this function to allow valgrind
673            for reporting invalid reads and writes. */
674            static unsigned char *tables_copy;
675            pcre *regex;
676            const char *errorptr;
677            int erroroffset;
678            const unsigned char *default_tables;
679    #ifdef SUPPORT_PCRE8
680            char null_str[1] = { 0 };
681    #else
682            PCRE_SCHAR16 null_str[1] = { 0 };
683    #endif
684    
685            if (mode) {
686                    if (tables_copy)
687                            free(tables_copy);
688                    tables_copy = NULL;
689                    return NULL;
690            }
691    
692            if (tables_copy)
693                    return tables_copy;
694    
695            default_tables = NULL;
696    #ifdef SUPPORT_PCRE8
697            regex = pcre_compile(null_str, 0, &errorptr, &erroroffset, NULL);
698            if (regex) {
699                    pcre_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
700                    pcre_free(regex);
701            }
702    #else
703            regex = pcre16_compile(null_str, 0, &errorptr, &erroroffset, NULL);
704            if (regex) {
705                    pcre16_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
706                    pcre16_free(regex);
707            }
708    #endif
709            /* Shouldn't ever happen. */
710            if (!default_tables)
711                    return NULL;
712    
713            /* Unfortunately this value cannot get from pcre_fullinfo.
714            Since this is a test program, this is acceptable at the moment. */
715            tables_copy = (unsigned char *)malloc(1088);
716            if (!tables_copy)
717                    return NULL;
718    
719            memcpy(tables_copy, default_tables, 1088);
720            return tables_copy;
721    }
722    
723    static pcre_jit_stack* callback(void *arg)
724    {
725            return (pcre_jit_stack *)arg;
726    }
727    
728    #ifdef SUPPORT_PCRE8
729    static void setstack8(pcre_extra *extra)
730    {
731            static pcre_jit_stack *stack;
732    
733            if (!extra) {
734                    if (stack)
735                            pcre_jit_stack_free(stack);
736                    stack = NULL;
737                    return;
738            }
739    
740            if (!stack)
741                    stack = pcre_jit_stack_alloc(1, 1024 * 1024);
742            /* Extra can be NULL. */
743            pcre_assign_jit_stack(extra, callback, stack);
744    }
745    #endif /* SUPPORT_PCRE8 */
746    
747    #ifdef SUPPORT_PCRE16
748    static void setstack16(pcre_extra *extra)
749    {
750            static pcre_jit_stack *stack;
751    
752            if (!extra) {
753                    if (stack)
754                            pcre16_jit_stack_free(stack);
755                    stack = NULL;
756                    return;
757            }
758    
759            if (!stack)
760                    stack = pcre16_jit_stack_alloc(1, 1024 * 1024);
761            /* Extra can be NULL. */
762            pcre16_assign_jit_stack(extra, callback, stack);
763    }
764    #endif /* SUPPORT_PCRE8 */
765    
766    #ifdef SUPPORT_PCRE16
767    
768    static int convert_utf8_to_utf16(const char *input, PCRE_SCHAR16 *output, int *offsetmap, int max_length)
769    {
770            unsigned char *iptr = (unsigned char*)input;
771            unsigned short *optr = (unsigned short *)output;
772            unsigned int c;
773    
774            if (max_length == 0)
775                    return 0;
776    
777            while (*iptr && max_length > 1) {
778                    c = 0;
779                    if (offsetmap)
780                            *offsetmap++ = (int)(iptr - (unsigned char*)input);
781    
782                    if (!(*iptr & 0x80))
783                            c = *iptr++;
784                    else if (!(*iptr & 0x20)) {
785                            c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
786                            iptr += 2;
787                    } else if (!(*iptr & 0x10)) {
788                            c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
789                            iptr += 3;
790                    } else if (!(*iptr & 0x08)) {
791                            c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
792                            iptr += 4;
793                    }
794    
795                    if (c < 65536) {
796                            *optr++ = c;
797                            max_length--;
798                    } else if (max_length <= 2) {
799                            *optr = '\0';
800                            return (int)(optr - (unsigned short *)output);
801                    } else {
802                            c -= 0x10000;
803                            *optr++ = 0xd800 | ((c >> 10) & 0x3ff);
804                            *optr++ = 0xdc00 | (c & 0x3ff);
805                            max_length -= 2;
806                            if (offsetmap)
807                                    offsetmap++;
808                    }
809            }
810            if (offsetmap)
811                    *offsetmap = (int)(iptr - (unsigned char*)input);
812            *optr = '\0';
813            return (int)(optr - (unsigned short *)output);
814    }
815    
816    static int copy_char8_to_char16(const char *input, PCRE_SCHAR16 *output, int max_length)
817    {
818            unsigned char *iptr = (unsigned char*)input;
819            unsigned short *optr = (unsigned short *)output;
820    
821            if (max_length == 0)
822                    return 0;
823    
824            while (*iptr && max_length > 1) {
825                    *optr++ = *iptr++;
826                    max_length--;
827            }
828            *optr = '\0';
829            return (int)(optr - (unsigned short *)output);
830    }
831    
832    #define REGTEST_MAX_LENGTH 4096
833    static PCRE_SCHAR16 regtest_buf[REGTEST_MAX_LENGTH];
834    static int regtest_offsetmap[REGTEST_MAX_LENGTH];
835    
836    #endif /* SUPPORT_PCRE16 */
837    
838    static int check_ascii(const char *input)
839    {
840            const unsigned char *ptr = (unsigned char *)input;
841            while (*ptr) {
842                    if (*ptr > 127)
843                            return 0;
844                    ptr++;
845            }
846            return 1;
847    }
848    
849  static int regression_tests(void)  static int regression_tests(void)
850  {  {
         pcre *re;  
851          struct regression_test_case *current = regression_test_cases;          struct regression_test_case *current = regression_test_cases;
852          const char *error;          const char *error;
         pcre_extra *extra;  
         int utf8 = 0, ucp = 0;  
         int ovector1[32];  
         int ovector2[32];  
         int return_value1, return_value2;  
853          int i, err_offs;          int i, err_offs;
854          int total = 0, succesful = 0;          int is_successful, is_ascii_pattern, is_ascii_input;
855            int total = 0;
856            int successful = 0;
857          int counter = 0;          int counter = 0;
858          int disabled_flags = PCRE_BUG;  #ifdef SUPPORT_PCRE8
859            pcre *re8;
860            pcre_extra *extra8;
861            int ovector8_1[32];
862            int ovector8_2[32];
863            int return_value8_1, return_value8_2;
864            int utf8 = 0, ucp8 = 0;
865            int disabled_flags8 = 0;
866    #endif
867    #ifdef SUPPORT_PCRE16
868            pcre *re16;
869            pcre_extra *extra16;
870            int ovector16_1[32];
871            int ovector16_2[32];
872            int return_value16_1, return_value16_2;
873            int utf16 = 0, ucp16 = 0;
874            int disabled_flags16 = 0;
875            int length16;
876    #endif
877    
878          /* This test compares the behaviour of interpreter and JIT. Although disabling          /* This test compares the behaviour of interpreter and JIT. Although disabling
879          utf8 or ucp may make tests fail, if the pcre_exec result is the SAME, it is          utf or ucp may make tests fail, if the pcre_exec result is the SAME, it is
880          still considered successful from pcre_jit_test point of view. */          still considered successful from pcre_jit_test point of view. */
881    
882            printf("Running JIT regression\n");
883    
884    #ifdef SUPPORT_PCRE8
885          pcre_config(PCRE_CONFIG_UTF8, &utf8);          pcre_config(PCRE_CONFIG_UTF8, &utf8);
886          pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);          pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp8);
887          if (!utf8)          if (!utf8)
888                  disabled_flags |= PCRE_UTF8;                  disabled_flags8 |= PCRE_UTF8;
889          if (!ucp)          if (!ucp8)
890                  disabled_flags |= PCRE_UCP;                  disabled_flags8 |= PCRE_UCP;
891            printf(" in  8 bit mode with utf8  %s and ucp %s:\n", utf8 ? "enabled" : "disabled", ucp8 ? "enabled" : "disabled");
892    #endif
893    #ifdef SUPPORT_PCRE16
894            pcre16_config(PCRE_CONFIG_UTF16, &utf16);
895            pcre16_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp16);
896            if (!utf16)
897                    disabled_flags16 |= PCRE_UTF8;
898            if (!ucp16)
899                    disabled_flags16 |= PCRE_UCP;
900            printf(" in 16 bit mode with utf16 %s and ucp %s:\n", utf16 ? "enabled" : "disabled", ucp16 ? "enabled" : "disabled");
901    #endif
902    
         printf("Running JIT regression tests with utf8 %s and ucp %s:\n", utf8 ? "enabled" : "disabled", ucp ? "enabled" : "disabled");  
903          while (current->pattern) {          while (current->pattern) {
904                  /* printf("\nPattern: %s :\n", current->pattern); */                  /* printf("\nPattern: %s :\n", current->pattern); */
905                  total++;                  total++;
906                    if (current->start_offset & F_PROPERTY) {
907                            is_ascii_pattern = 0;
908                            is_ascii_input = 0;
909                    } else {
910                            is_ascii_pattern = check_ascii(current->pattern);
911                            is_ascii_input = check_ascii(current->input);
912                    }
913    
914                  error = NULL;                  error = NULL;
915                  re = pcre_compile(current->pattern, current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags), &error, &err_offs, NULL);  #ifdef SUPPORT_PCRE8
916                    re8 = NULL;
917                  if (!re) {                  if (!(current->start_offset & F_NO8))
918                          if (utf8 && ucp)                          re8 = pcre_compile(current->pattern,
919                                  printf("\nCannot compile pattern: %s\n", current->pattern);                                  current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags8),
920                          else {                                  &error, &err_offs, tables(0));
921                                  /* Some patterns cannot be compiled when either of utf8  
922                                  or ucp is disabled. We just skip them. */                  extra8 = NULL;
923                                  printf(".");                  if (re8) {
924                                  succesful++;                          error = NULL;
925                            extra8 = pcre_study(re8, PCRE_STUDY_JIT_COMPILE, &error);
926                            if (!extra8) {
927                                    printf("\n8 bit: Cannot study pattern: %s\n", current->pattern);
928                                    pcre_free(re8);
929                                    re8 = NULL;
930                          }                          }
931                          current++;                          if (!(extra8->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
932                          continue;                                  printf("\n8 bit: JIT compiler does not support: %s\n", current->pattern);
933                  }                                  pcre_free_study(extra8);
934                                    pcre_free(re8);
935                                    re8 = NULL;
936                            }
937                    } else if (((utf8 && ucp8) || is_ascii_pattern) && !(current->start_offset & F_NO8))
938                            printf("\n8 bit: Cannot compile pattern: %s\n", current->pattern);
939    #endif
940    #ifdef SUPPORT_PCRE16
941                    if ((current->flags & PCRE_UTF8) || (current->start_offset & F_FORCECONV))
942                            convert_utf8_to_utf16(current->pattern, regtest_buf, NULL, REGTEST_MAX_LENGTH);
943                    else
944                            copy_char8_to_char16(current->pattern, regtest_buf, REGTEST_MAX_LENGTH);
945    
946                    re16 = NULL;
947                    if (!(current->start_offset & F_NO16))
948                            re16 = pcre16_compile(regtest_buf,
949                                    current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags16),
950                                    &error, &err_offs, tables(0));
951    
952                    extra16 = NULL;
953                    if (re16) {
954                            error = NULL;
955                            extra16 = pcre16_study(re16, PCRE_STUDY_JIT_COMPILE, &error);
956                            if (!extra16) {
957                                    printf("\n16 bit: Cannot study pattern: %s\n", current->pattern);
958                                    pcre16_free(re16);
959                                    re16 = NULL;
960                            }
961                            if (!(extra16->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
962                                    printf("\n16 bit: JIT compiler does not support: %s\n", current->pattern);
963                                    pcre16_free_study(extra16);
964                                    pcre16_free(re16);
965                                    re16 = NULL;
966                            }
967                    } else if (((utf16 && ucp16) || is_ascii_pattern) && !(current->start_offset & F_NO16))
968                            printf("\n16 bit: Cannot compile pattern: %s\n", current->pattern);
969    #endif
970    
971                  error = NULL;                  counter++;
972                  extra = pcre_study(re, PCRE_STUDY_JIT_COMPILE, &error);                  if ((counter & 0x3) != 0) {
973                  if (!extra) {  #ifdef SUPPORT_PCRE8
974                          printf("\nCannot study pattern: %s\n", current->pattern);                          setstack8(NULL);
975                          current++;  #endif
976                          continue;  #ifdef SUPPORT_PCRE16
977                            setstack16(NULL);
978    #endif
979                  }                  }
980    
981                  if (!(extra->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {  #ifdef SUPPORT_PCRE8
982                          printf("\nJIT compiler does not support: %s\n", current->pattern);                  return_value8_1 = -1000;
983                          current++;                  return_value8_2 = -1000;
984                          continue;                  for (i = 0; i < 32; ++i)
985                            ovector8_1[i] = -2;
986                    for (i = 0; i < 32; ++i)
987                            ovector8_2[i] = -2;
988                    if (re8) {
989                            setstack8(extra8);
990                            return_value8_1 = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
991                                    current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector8_1, 32);
992                            return_value8_2 = pcre_exec(re8, NULL, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
993                                    current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector8_2, 32);
994                  }                  }
995    #endif
996    
997                  counter++;  #ifdef SUPPORT_PCRE16
998                  if ((counter & 0x3) != 0)                  return_value16_1 = -1000;
999                          setstack(extra);                  return_value16_2 = -1000;
   
1000                  for (i = 0; i < 32; ++i)                  for (i = 0; i < 32; ++i)
1001                          ovector1[i] = -2;                          ovector16_1[i] = -2;
                 return_value1 = pcre_exec(re, extra, current->input, strlen(current->input), current->start_offset, current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector1, 32);  
   
1002                  for (i = 0; i < 32; ++i)                  for (i = 0; i < 32; ++i)
1003                          ovector2[i] = -2;                          ovector16_2[i] = -2;
1004                  return_value2 = pcre_exec(re, NULL, current->input, strlen(current->input), current->start_offset, current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector2, 32);                  if (re16) {
1005                            setstack16(extra16);
1006                            if ((current->flags & PCRE_UTF8) || (current->start_offset & F_FORCECONV))
1007                                    length16 = convert_utf8_to_utf16(current->input, regtest_buf, regtest_offsetmap, REGTEST_MAX_LENGTH);
1008                            else
1009                                    length16 = copy_char8_to_char16(current->input, regtest_buf, REGTEST_MAX_LENGTH);
1010                            return_value16_1 = pcre16_exec(re16, extra16, regtest_buf, length16, current->start_offset & OFFSET_MASK,
1011                                    current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector16_1, 32);
1012                            return_value16_2 = pcre16_exec(re16, NULL, regtest_buf, length16, current->start_offset & OFFSET_MASK,
1013                                    current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector16_2, 32);
1014                    }
1015    #endif
1016    
1017                  /* If PCRE_BUG is set, just run the test, but do not compare the results.                  /* If F_DIFF is set, just run the test, but do not compare the results.
1018                  Segfaults can still be captured. */                  Segfaults can still be captured. */
                 if (!(current->flags & PCRE_BUG)) {  
                         if (return_value1 != return_value2) {  
                                 printf("\nReturn value differs(%d:%d): '%s' @ '%s'\n", return_value1, return_value2, current->pattern, current->input);  
                                 current++;  
                                 continue;  
                         }  
1019    
1020                          if (return_value1 >= 0) {                  is_successful = 1;
1021                                  return_value1 *= 2;                  if (!(current->start_offset & F_DIFF)) {
1022                                  err_offs = 0;  #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1023                                  for (i = 0; i < return_value1; ++i)                          if (utf8 == utf16 && !(current->start_offset & F_FORCECONV)) {
1024                                          if (ovector1[i] != ovector2[i]) {                                  /* All results must be the same. */
1025                                                  printf("\nOvector[%d] value differs(%d:%d): '%s' @ '%s' \n", i, ovector1[i], ovector2[i], current->pattern, current->input);                                  if (return_value8_1 != return_value8_2 || return_value8_1 != return_value16_1 || return_value8_1 != return_value16_2) {
1026                                                  err_offs = 1;                                          printf("\n8 and 16 bit: Return value differs(%d:%d:%d:%d): [%d] '%s' @ '%s'\n",
1027                                                    return_value8_1, return_value8_2, return_value16_1, return_value16_2,
1028                                                    total, current->pattern, current->input);
1029                                            is_successful = 0;
1030                                    } else if (return_value8_1 >= 0) {
1031                                            return_value8_1 *= 2;
1032                                            /* Transform back the results. */
1033                                            if (current->flags & PCRE_UTF8) {
1034                                                    for (i = 0; i < return_value8_1; ++i) {
1035                                                            if (ovector16_1[i] >= 0)
1036                                                                    ovector16_1[i] = regtest_offsetmap[ovector16_1[i]];
1037                                                            if (ovector16_2[i] >= 0)
1038                                                                    ovector16_2[i] = regtest_offsetmap[ovector16_2[i]];
1039                                                    }
1040                                          }                                          }
1041                                  if (err_offs) {  
1042                                          current++;                                          for (i = 0; i < return_value8_1; ++i)
1043                                          continue;                                                  if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
1044                                                            printf("\n8 and 16 bit: Ovector[%d] value differs(%d:%d:%d:%d): [%d] '%s' @ '%s' \n",
1045                                                                    i, ovector8_1[i], ovector8_2[i], ovector16_1[i], ovector16_2[i],
1046                                                                    total, current->pattern, current->input);
1047                                                            is_successful = 0;
1048                                                    }
1049                                    }
1050                            } else {
1051    #endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */
1052                                    /* Only the 8 bit and 16 bit results must be equal. */
1053    #ifdef SUPPORT_PCRE8
1054                                    if (return_value8_1 != return_value8_2) {
1055                                            printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1056                                                    return_value8_1, return_value8_2, total, current->pattern, current->input);
1057                                            is_successful = 0;
1058                                    } else if (return_value8_1 >= 0) {
1059                                            return_value8_1 *= 2;
1060                                            for (i = 0; i < return_value8_1; ++i)
1061                                                    if (ovector8_1[i] != ovector8_2[i]) {
1062                                                            printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1063                                                                    i, ovector8_1[i], ovector8_2[i], total, current->pattern, current->input);
1064                                                            is_successful = 0;
1065                                                    }
1066                                    }
1067    #endif
1068    
1069    #ifdef SUPPORT_PCRE16
1070                                    if (return_value16_1 != return_value16_2) {
1071                                            printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1072                                                    return_value16_1, return_value16_2, total, current->pattern, current->input);
1073                                            is_successful = 0;
1074                                    } else if (return_value16_1 >= 0) {
1075                                            return_value16_1 *= 2;
1076                                            for (i = 0; i < return_value16_1; ++i)
1077                                                    if (ovector16_1[i] != ovector16_2[i]) {
1078                                                            printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1079                                                                    i, ovector16_1[i], ovector16_2[i], total, current->pattern, current->input);
1080                                                            is_successful = 0;
1081                                                    }
1082                                    }
1083    #endif
1084    
1085    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1086                            }
1087    #endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */
1088                    }
1089    
1090                    if (is_successful) {
1091    #ifdef SUPPORT_PCRE8
1092                            if (!(current->start_offset & F_NO8) && ((utf8 && ucp8) || is_ascii_input)) {
1093                                    if (return_value8_1 < 0 && !(current->start_offset & F_NOMATCH)) {
1094                                            printf("8 bit: Test should match: [%d] '%s' @ '%s'\n",
1095                                                    total, current->pattern, current->input);
1096                                            is_successful = 0;
1097                                    }
1098    
1099                                    if (return_value8_1 >= 0 && (current->start_offset & F_NOMATCH)) {
1100                                            printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n",
1101                                                    total, current->pattern, current->input);
1102                                            is_successful = 0;
1103                                    }
1104                            }
1105    #endif
1106    #ifdef SUPPORT_PCRE16
1107                            if (!(current->start_offset & F_NO16) && ((utf16 && ucp16) || is_ascii_input)) {
1108                                    if (return_value16_1 < 0 && !(current->start_offset & F_NOMATCH)) {
1109                                            printf("16 bit: Test should match: [%d] '%s' @ '%s'\n",
1110                                                    total, current->pattern, current->input);
1111                                            is_successful = 0;
1112                                    }
1113    
1114                                    if (return_value16_1 >= 0 && (current->start_offset & F_NOMATCH)) {
1115                                            printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n",
1116                                                    total, current->pattern, current->input);
1117                                            is_successful = 0;
1118                                  }                                  }
1119                          }                          }
1120    #endif
1121                  }                  }
1122    
1123                  pcre_free_study(extra);                  if (is_successful)
1124                  pcre_free(re);                          successful++;
1125    
1126                  /* printf("[%d-%d]%s", ovector1[0], ovector1[1], (current->flags & PCRE_CASELESS) ? "C" : ""); */  #ifdef SUPPORT_PCRE8
1127                    if (re8) {
1128                            pcre_free_study(extra8);
1129                            pcre_free(re8);
1130                    }
1131    #endif
1132    #ifdef SUPPORT_PCRE16
1133                    if (re16) {
1134                            pcre16_free_study(extra16);
1135                            pcre16_free(re16);
1136                    }
1137    #endif
1138    
1139                    /* printf("[%d-%d|%d-%d]%s", ovector8_1[0], ovector8_1[1], ovector16_1[0], ovector16_1[1], (current->flags & PCRE_CASELESS) ? "C" : ""); */
1140                  printf(".");                  printf(".");
1141                  fflush(stdout);                  fflush(stdout);
1142                  current++;                  current++;
                 succesful++;  
1143          }          }
1144            tables(1);
1145    #ifdef SUPPORT_PCRE8
1146            setstack8(NULL);
1147    #endif
1148    #ifdef SUPPORT_PCRE16
1149            setstack16(NULL);
1150    #endif
1151    
1152          if (total == succesful) {          if (total == successful) {
1153                  printf("\nAll JIT regression tests are successfully passed.\n");                  printf("\nAll JIT regression tests are successfully passed.\n");
1154                  return 0;                  return 0;
1155          } else {          } else {
1156                  printf("\nSuccessful test ratio: %d%%\n", succesful * 100 / total);                  printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
1157                  return 1;                  return 1;
1158          }          }
1159  }  }

Legend:
Removed from v.835  
changed lines
  Added in v.836

  ViewVC Help
Powered by ViewVC 1.1.5