/[pcre]/code/trunk/pcre_jit_test.c
ViewVC logotype

Diff of /code/trunk/pcre_jit_test.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 736 by zherczeg, Sun Oct 16 15:48:03 2011 UTC revision 1055 by chpe, Tue Oct 16 15:53:30 2012 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                    Main Library written by Philip Hazel                    Main Library written by Philip Hazel
9             Copyright (c) 1997-2011 University of Cambridge             Copyright (c) 1997-2012 University of Cambridge
10    
11    This JIT compiler regression test program was written by Zoltan Herczeg    This JIT compiler regression test program was written by Zoltan Herczeg
12                        Copyright (c) 2010-2011                        Copyright (c) 2010-2012
13    
14  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
15  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 48  POSSIBILITY OF SUCH DAMAGE. Line 48  POSSIBILITY OF SUCH DAMAGE.
48  #include <string.h>  #include <string.h>
49  #include "pcre.h"  #include "pcre.h"
50    
51    
52    #include "pcre_internal.h"
53    
54  #define PCRE_BUG 0x80000000  #define PCRE_BUG 0x80000000
55    
56  /*  /*
57   Hungarian utf8 characters   Letter characters:
58   \xc3\xa9 = 0xe9 = 233 (e') \xc3\x89 = 0xc9 = 201 (E')     \xe6\x92\xad = 0x64ad = 25773 (kanji)
59   \xc3\xa1 = 0xe1 = 225 (a') \xc3\x81 = 0xc1 = 193 (A')   Non-letter characters:
60   \xe6\x92\xad = 0x64ad = 25773 (a valid kanji)     \xc2\xa1 = 0xa1 =  (Inverted Exclamation Mark)
61   \xc2\x85 = 0x85 (NExt Line = NEL)     \xf3\xa9\xb7\x80 = 0xe9dc0 = 957888
62   \xc2\xa1 = 0xa1 (Inverted Exclamation Mark)     \xed\xa0\x80 = 55296 = 0xd800 (Invalid UTF character)
63   \xe2\x80\xa8 = 0x2028 (Line Separator)     \xed\xb0\x80 = 56320 = 0xdc00 (Invalid UTF character)
64   \xc8\xba = 570 \xe2\xb1\xa5 = 11365 (lowercase length != uppercase length)   Newlines:
65   \xcc\x8d = 781 (Something with Mark property)     \xc2\x85 = 0x85 = 133 (NExt Line = NEL)
66       \xe2\x80\xa8 = 0x2028 = 8232 (Line Separator)
67     Othercase pairs:
68       \xc3\xa9 = 0xe9 = 233 (e')
69          \xc3\x89 = 0xc9 = 201 (E')
70       \xc3\xa1 = 0xe1 = 225 (a')
71          \xc3\x81 = 0xc1 = 193 (A')
72       \xc8\xba = 0x23a = 570
73          \xe2\xb1\xa5 = 0x2c65 = 11365
74       \xe1\xbd\xb8 = 0x1f78 = 8056
75          \xe1\xbf\xb8 = 0x1ff8 = 8184
76       \xf0\x90\x90\x80 = 0x10400 = 66560
77          \xf0\x90\x90\xa8 = 0x10428 = 66600
78     Mark property:
79       \xcc\x8d = 0x30d = 781
80     Special:
81       \xdf\xbf = 0x7ff = 2047 (highest 2 byte character)
82       \xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character)
83       \xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character)
84       \xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character)
85       \xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character)
86  */  */
87    
 static void setstack(pcre_extra *extra);  
88  static int regression_tests(void);  static int regression_tests(void);
89    
90  int main(void)  int main(void)
91  {  {
92          int jit = 0;          int jit = 0;
93    #if defined SUPPORT_PCRE8
94          pcre_config(PCRE_CONFIG_JIT, &jit);          pcre_config(PCRE_CONFIG_JIT, &jit);
95    #elif defined SUPPORT_PCRE16
96            pcre16_config(PCRE_CONFIG_JIT, &jit);
97    #elif defined SUPPORT_PCRE32
98            pcre32_config(PCRE_CONFIG_JIT, &jit);
99    #endif
100          if (!jit) {          if (!jit) {
101                  printf("JIT must be enabled to run pcre_jit_test\n");                  printf("JIT must be enabled to run pcre_jit_test\n");
102                  return 1;                  return 1;
# Line 76  int main(void) Line 104  int main(void)
104          return regression_tests();          return regression_tests();
105  }  }
106    
 static pcre_jit_stack* callback(void *arg)  
 {  
         return (pcre_jit_stack *)arg;  
 }  
   
 static void setstack(pcre_extra *extra)  
 {  
         static pcre_jit_stack *stack;  
         if (stack) pcre_jit_stack_free(stack);  
         stack = pcre_jit_stack_alloc(1, 1024 * 1024);  
         pcre_assign_jit_stack(extra, callback, stack);  
 }  
   
107  /* --------------------------------------------------------------------------------------- */  /* --------------------------------------------------------------------------------------- */
108    
109  #define MUA     (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)  #if !(defined SUPPORT_PCRE8) && !(defined SUPPORT_PCRE16) && !(defined SUPPORT_PCRE32)
110  #define MUAP    (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)  #error SUPPORT_PCRE8 or SUPPORT_PCRE16 or SUPPORT_PCRE32 must be defined
111  #define CMUA    (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)  #endif
112  #define CMUAP   (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)  
113  #define MA      (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)  #define MUA     (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
114  #define MAP     (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)  #define MUAP    (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
115  #define CMA     (PCRE_CASELESS | PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)  #define CMUA    (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
116    #define CMUAP   (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
117    #define MA      (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
118    #define MAP     (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
119    #define CMA     (PCRE_CASELESS | PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
120    
121    #define OFFSET_MASK     0x00ffff
122    #define F_NO8           0x010000
123    #define F_NO16          0x020000
124    #define F_NO32          0x020000
125    #define F_NOMATCH       0x040000
126    #define F_DIFF          0x080000
127    #define F_FORCECONV     0x100000
128    #define F_PROPERTY      0x200000
129    
130  struct regression_test_case {  struct regression_test_case {
131          int flags;          int flags;
# Line 124  static struct regression_test_case regre Line 152  static struct regression_test_case regre
152          { MUA, 0, "\\W(\\W)?\\w", "\n\n+bc" },          { MUA, 0, "\\W(\\W)?\\w", "\n\n+bc" },
153          { MUA, 0, "[axd]", "sAXd" },          { MUA, 0, "[axd]", "sAXd" },
154          { CMUA, 0, "[axd]", "sAXd" },          { CMUA, 0, "[axd]", "sAXd" },
155          { CMUA, 0, "[^axd]", "DxA" },          { CMUA, 0 | F_NOMATCH, "[^axd]", "DxA" },
156          { MUA, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },          { MUA, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
157          { MUA, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },          { MUA, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
158          { CMUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },          { CMUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
# Line 137  static struct regression_test_case regre Line 165  static struct regression_test_case regre
165          { PCRE_CASELESS, 0, "a1", "Aa1" },          { PCRE_CASELESS, 0, "a1", "Aa1" },
166          { MA, 0, "\\Ca", "cda" },          { MA, 0, "\\Ca", "cda" },
167          { CMA, 0, "\\Ca", "CDA" },          { CMA, 0, "\\Ca", "CDA" },
168          { MA, 0, "\\Cx", "cda" },          { MA, 0 | F_NOMATCH, "\\Cx", "cda" },
169          { CMA, 0, "\\Cx", "CDA" },          { CMA, 0 | F_NOMATCH, "\\Cx", "CDA" },
170            { CMUAP, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
171            { CMUAP, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
172            { CMUAP, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
173            { CMUAP, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
174    
175          /* Assertions. */          /* Assertions. */
176          { MUA, 0, "\\b[^A]", "A_B#" },          { MUA, 0, "\\b[^A]", "A_B#" },
177          { MA, 0, "\\b\\W", "\n*" },          { MA, 0 | F_NOMATCH, "\\b\\W", "\n*" },
178          { MUA, 0, "\\B[^,]\\b[^s]\\b", "#X" },          { MUA, 0, "\\B[^,]\\b[^s]\\b", "#X" },
179          { MAP, 0, "\\B", "_\xa1" },          { MAP, 0, "\\B", "_\xa1" },
180          { MAP, 0, "\\b_\\b[,A]\\B", "_," },          { MAP, 0, "\\b_\\b[,A]\\B", "_," },
# Line 150  static struct regression_test_case regre Line 182  static struct regression_test_case regre
182          { MUAP, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },          { MUAP, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
183          { MUAP, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },          { MUAP, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
184          { MUAP, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },          { MUAP, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
185          { MUA, 0, "\\b.", "\xcd\xbe" },          { MUA, 0 | F_NOMATCH, "\\b.", "\xcd\xbe" },
186          { MA, 0, "\\R^", "\n" },          { CMUAP, 0, "\\By", "\xf0\x90\x90\xa8y" },
187          { MA, 1, "^", "\n" },          { MA, 0 | F_NOMATCH, "\\R^", "\n" },
188            { MA, 1 | F_NOMATCH, "^", "\n" },
189          { 0, 0, "^ab", "ab" },          { 0, 0, "^ab", "ab" },
190          { 0, 0, "^ab", "aab" },          { 0, 0 | F_NOMATCH, "^ab", "aab" },
191          { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "^a", "\r\raa\n\naa\r\naa" },          { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "^a", "\r\raa\n\naa\r\naa" },
192          { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },          { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
193          { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^-", "a--b--\x85--" },          { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^-", "a--b--\x85--" },
194          { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xe2\x80\xa8--" },          { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xe2\x80\xa8--" },
195          { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xc2\x85--" },          { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xc2\x85--" },
196          { 0, 0, "ab$", "ab" },          { 0, 0, "ab$", "ab" },
197          { 0, 0, "ab$", "ab\r\n" },          { 0, 0 | F_NOMATCH, "ab$", "abab\n\n" },
198            { PCRE_DOLLAR_ENDONLY, 0 | F_NOMATCH, "ab$", "abab\r\n" },
199          { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "a$", "\r\raa\n\naa\r\naa" },          { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "a$", "\r\raa\n\naa\r\naa" },
200          { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aaa" },          { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aaa" },
201          { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "#$", "#\xc2\x85###\r#" },          { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "#$", "#\xc2\x85###\r#" },
202          { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "#$", "#\xe2\x80\xa9" },          { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "#$", "#\xe2\x80\xa9" },
203          { PCRE_NOTBOL | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },          { PCRE_NOTBOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "^a", "aa\naa" },
204          { PCRE_NOTBOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },          { PCRE_NOTBOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },
205          { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },          { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\naa" },
206          { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0, "a$", "aa\r\n" },          { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\r\n" },
207          { PCRE_UTF8 | PCRE_DOLLAR_ENDONLY | PCRE_NEWLINE_ANY, 0, "\\p{Any}{2,}$", "aa\r\n" },          { PCRE_UTF8 | PCRE_DOLLAR_ENDONLY | PCRE_NEWLINE_ANY, 0 | F_PROPERTY, "\\p{Any}{2,}$", "aa\r\n" },
208          { PCRE_NOTEOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },          { PCRE_NOTEOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },
209          { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa" },          { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa" },
210          { PCRE_NEWLINE_CR | PCRE_UTF8, 0, "a\\Z", "aaa\r" },          { PCRE_NEWLINE_CR | PCRE_UTF8, 0, "a\\Z", "aaa\r" },
# Line 190  static struct regression_test_case regre Line 224  static struct regression_test_case regre
224          { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xc2\x85" },          { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xc2\x85" },
225          { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },          { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
226          { MA, 0, "\\Aa", "aaa" },          { MA, 0, "\\Aa", "aaa" },
227          { MA, 1, "\\Aa", "aaa" },          { MA, 1 | F_NOMATCH, "\\Aa", "aaa" },
228          { MA, 1, "\\Ga", "aaa" },          { MA, 1, "\\Ga", "aaa" },
229          { MA, 1, "\\Ga", "aba" },          { MA, 1 | F_NOMATCH, "\\Ga", "aba" },
230          { MA, 0, "a\\z", "aaa" },          { MA, 0, "a\\z", "aaa" },
231          { MA, 0, "a\\z", "aab" },          { MA, 0 | F_NOMATCH, "a\\z", "aab" },
232    
233          /* Brackets. */          /* Brackets. */
234          { MUA, 0, "(ab|bb|cd)", "bacde" },          { MUA, 0, "(ab|bb|cd)", "bacde" },
# Line 267  static struct regression_test_case regre Line 301  static struct regression_test_case regre
301          { MUA, 0, "\\b\\w+\\B", "x,a_cd" },          { MUA, 0, "\\b\\w+\\B", "x,a_cd" },
302          { MUAP, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },          { MUAP, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
303          { CMUA, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },          { CMUA, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
304            { CMUAP, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" },
305            { CMUA, 0, "[^\xf0\x90\x90\x80]{2,4}@", "\xf0\x90\x90\xa8\xf0\x90\x90\x80###\xf0\x90\x90\x80@@@" },
306            { CMUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
307            { MUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
308            { MUA, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" },
309    
310          /* Basic character sets. */          /* Basic character sets. */
311          { MUA, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },          { MUA, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
# Line 278  static struct regression_test_case regre Line 317  static struct regression_test_case regre
317    
318          /* Unicode properties. */          /* Unicode properties. */
319          { MUAP, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },          { MUAP, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
320          { MUAP, 0, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },          { MUAP, 0 | F_PROPERTY, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
321          { MUAP, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },          { MUAP, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
322          { MUAP, 0, "[\\P{Any}]", "abc" },          { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}]", "abc" },
323          { MUAP, 0, "[^\\p{Any}]", "abc" },          { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}]", "abc" },
324          { MUAP, 0, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },          { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
325          { MUAP, 0, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },          { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
326          { MUAP, 0, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },          { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
327          { MUAP, 0, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },          { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
328          { MUAP, 0, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },          { MUAP, 0 | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
329          { MUAP, 0, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },          { MUAP, 0 | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
330          { MUAP, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },          { MUAP, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
331          { CMUAP, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },          { CMUAP, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
332          { MUAP, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },          { MUAP, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
333          { MUAP, 0, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },          { MUAP, 0 | F_PROPERTY, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
334          { MUA, 0, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },          { MUA, 0 | F_PROPERTY, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
335          { CMUAP, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },          { CMUAP, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
336          { MUAP, 0, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },          { MUAP, 0 | F_PROPERTY, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
337          { MUAP, 0, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },          { MUAP, 0 | F_PROPERTY, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
338          { PCRE_UCP, 0, "[a-b\\s]{2,5}[^a]", "AB  baaa" },          { PCRE_UCP, 0, "[a-b\\s]{2,5}[^a]", "AB  baaa" },
339    
340          /* Possible empty brackets. */          /* Possible empty brackets. */
# Line 312  static struct regression_test_case regre Line 351  static struct regression_test_case regre
351    
352          /* Start offset. */          /* Start offset. */
353          { MUA, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },          { MUA, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
354          { MUA, 4, "(\\w\\W\\w)+", "ab#d" },          { MUA, 4 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
355          { MUA, 2, "(\\w\\W\\w)+", "ab#d" },          { MUA, 2 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
356          { MUA, 1, "(\\w\\W\\w)+", "ab#d" },          { MUA, 1, "(\\w\\W\\w)+", "ab#d" },
357    
358          /* Newline. */          /* Newline. */
# Line 327  static struct regression_test_case regre Line 366  static struct regression_test_case regre
366          { PCRE_NEWLINE_ANYCRLF, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },          { PCRE_NEWLINE_ANYCRLF, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
367          { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },          { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
368          { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },          { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
369          { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.).", "\xe2\x80\xa8\nb\r" },          { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0 | F_NOMATCH, ".(.).", "\xe2\x80\xa8\nb\r" },
370          { PCRE_NEWLINE_ANY, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },          { PCRE_NEWLINE_ANY, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
371          { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },          { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
372          { PCRE_BSR_ANYCRLF, 0, "\\R", "\r" },          { PCRE_BSR_ANYCRLF, 0, "\\R", "\r" },
# Line 335  static struct regression_test_case regre Line 374  static struct regression_test_case regre
374          { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\xe2\x80\xa8#c" },          { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\xe2\x80\xa8#c" },
375          { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\r\nc" },          { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\r\nc" },
376          { PCRE_NEWLINE_CRLF | PCRE_BSR_UNICODE | PCRE_UTF8, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },          { PCRE_NEWLINE_CRLF | PCRE_BSR_UNICODE | PCRE_UTF8, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
377          { MUA, 0, "\\R+", "ab" },          { MUA, 0 | F_NOMATCH, "\\R+", "ab" },
378          { MUA, 0, "\\R+", "ab\r\n\r" },          { MUA, 0, "\\R+", "ab\r\n\r" },
379          { MUA, 0, "\\R*", "ab\r\n\r" },          { MUA, 0, "\\R*", "ab\r\n\r" },
380          { MUA, 0, "\\R*", "\r\n\r" },          { MUA, 0, "\\R*", "\r\n\r" },
# Line 343  static struct regression_test_case regre Line 382  static struct regression_test_case regre
382          { MUA, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },          { MUA, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
383          { MUA, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },          { MUA, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
384          { MUA, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },          { MUA, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
385          { MUA, 0, "\\R+\\R\\R", "\r\n\r\n" },          { MUA, 0 | F_NOMATCH, "\\R+\\R\\R", "\r\n\r\n" },
386          { MUA, 0, "\\R+\\R\\R", "\r\r\r" },          { MUA, 0, "\\R+\\R\\R", "\r\r\r" },
387          { MUA, 0, "\\R*\\R\\R", "\n\r" },          { MUA, 0, "\\R*\\R\\R", "\n\r" },
388          { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r" },          { MUA, 0 | F_NOMATCH, "\\R{2,4}\\R\\R", "\r\r\r" },
389          { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },          { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
390    
391          /* Atomic groups (no fallback from "next" direction). */          /* Atomic groups (no fallback from "next" direction). */
392          { MUA, 0, "(?>ab)ab", "bab" },          { MUA, 0 | F_NOMATCH, "(?>ab)ab", "bab" },
393          { MUA, 0, "(?>(ab))ab", "bab" },          { MUA, 0 | F_NOMATCH, "(?>(ab))ab", "bab" },
394          { MUA, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",          { MUA, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
395                          "bababcdedefgheijijklmlmnop" },                          "bababcdedefgheijijklmlmnop" },
396          { MUA, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },          { MUA, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
# Line 379  static struct regression_test_case regre Line 418  static struct regression_test_case regre
418          { CMA, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },          { CMA, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
419          { MUA, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },          { MUA, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
420          { MUA, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },          { MUA, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
421          { MUA, 0, "\\X", "\xcc\x8d\xcc\x8d" },          { MUA, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d" },
422          { MUA, 0, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },          { MUA, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
423          { MUA, 0, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },          { MUA, 0 | F_PROPERTY, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
424          { MUA, 0, "\\X{2,4}", "abcdef" },          { MUA, 0 | F_PROPERTY, "\\X{2,4}", "abcdef" },
425          { MUA, 0, "\\X{2,4}?", "abcdef" },          { MUA, 0 | F_PROPERTY, "\\X{2,4}?", "abcdef" },
426          { MUA, 0, "\\X{2,4}..", "#\xcc\x8d##" },          { MUA, 0 | F_NOMATCH | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d##" },
427          { MUA, 0, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },          { MUA, 0 | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
428          { MUA, 0, "(c(ab)?+ab)+", "cabcababcab" },          { MUA, 0, "(c(ab)?+ab)+", "cabcababcab" },
429          { MUA, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },          { MUA, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
430    
# Line 420  static struct regression_test_case regre Line 459  static struct regression_test_case regre
459          { MUA, 0, "((b*))++m", "bxbbxbbbxbbm" },          { MUA, 0, "((b*))++m", "bxbbxbbbxbbm" },
460          { MUA, 0, "((b*))*+m", "bxbbxbbbxm" },          { MUA, 0, "((b*))*+m", "bxbbxbbbxm" },
461          { MUA, 0, "((b*))*+m", "bxbbxbbbxbbm" },          { MUA, 0, "((b*))*+m", "bxbbxbbbxbbm" },
462          { MUA, 0, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },          { MUA, 0 | F_NOMATCH, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
463          { MUA, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },          { MUA, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
464          { MUA, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },          { MUA, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
465          { MUA, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },          { MUA, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
# Line 444  static struct regression_test_case regre Line 483  static struct regression_test_case regre
483          { MUA, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },          { MUA, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
484          { MUA, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },          { MUA, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
485          { MA, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },          { MA, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
486          { MUAP, 0, "(\\P{N})\\1{2,}", ".www." },          { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
487          { MUAP, 0, "(\\P{N})\\1{0,2}", "wwwww." },          { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{0,2}", "wwwww." },
488          { MUAP, 0, "(\\P{N})\\1{1,2}ww", "wwww" },          { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwww" },
489          { MUAP, 0, "(\\P{N})\\1{1,2}ww", "wwwww" },          { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwwww" },
490          { PCRE_UCP, 0, "(\\P{N})\\1{2,}", ".www." },          { PCRE_UCP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
491            { CMUAP, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" },
492    
493          /* Assertions. */          /* Assertions. */
494          { MUA, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },          { MUA, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
# Line 464  static struct regression_test_case regre Line 504  static struct regression_test_case regre
504          { MUA, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },          { MUA, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
505          { MUA, 0, "((?(?=(a))a)+k)", "bbak" },          { MUA, 0, "((?(?=(a))a)+k)", "bbak" },
506          { MUA, 0, "((?(?=a)a)+k)", "bbak" },          { MUA, 0, "((?(?=a)a)+k)", "bbak" },
507          { MUA, 0, "(?=(?>(a))m)amk", "a k" },          { MUA, 0 | F_NOMATCH, "(?=(?>(a))m)amk", "a k" },
508          { MUA, 0, "(?!(?>(a))m)amk", "a k" },          { MUA, 0 | F_NOMATCH, "(?!(?>(a))m)amk", "a k" },
509          { MUA, 0, "(?>(?=(a))am)amk", "a k" },          { MUA, 0 | F_NOMATCH, "(?>(?=(a))am)amk", "a k" },
510          { MUA, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },          { MUA, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
511          { MUA, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },          { MUA, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
512          { MUA, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },          { MUA, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
# Line 477  static struct regression_test_case regre Line 517  static struct regression_test_case regre
517          { MUA, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },          { MUA, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
518    
519          /* Not empty, ACCEPT, FAIL */          /* Not empty, ACCEPT, FAIL */
520          { MUA | PCRE_NOTEMPTY, 0, "a*", "bcx" },          { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*", "bcx" },
521          { MUA | PCRE_NOTEMPTY, 0, "a*", "bcaad" },          { MUA | PCRE_NOTEMPTY, 0, "a*", "bcaad" },
522          { MUA | PCRE_NOTEMPTY, 0, "a*?", "bcaad" },          { MUA | PCRE_NOTEMPTY, 0, "a*?", "bcaad" },
523          { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },          { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
524          { MUA, 0, "a(*ACCEPT)b", "ab" },          { MUA, 0, "a(*ACCEPT)b", "ab" },
525          { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcx" },          { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*(*ACCEPT)b", "bcx" },
526          { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },          { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
527          { MUA | PCRE_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },          { MUA | PCRE_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
528          { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcx" },          { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "(?:z|a*(*ACCEPT)b)", "bcx" },
529          { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },          { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
530          { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },          { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
531          { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },          { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
532          { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "" },          { MUA | PCRE_NOTEMPTY_ATSTART, 0 | F_NOMATCH, "a*(*ACCEPT)b", "" },
533          { MUA, 0, "((a(*ACCEPT)b))", "ab" },          { MUA, 0, "((a(*ACCEPT)b))", "ab" },
534          { MUA, 0, "(a(*FAIL)a|a)", "aaa" },          { MUA, 0, "(a(*FAIL)a|a)", "aaa" },
535          { MUA, 0, "(?=ab(*ACCEPT)b)a", "ab" },          { MUA, 0, "(?=ab(*ACCEPT)b)a", "ab" },
# Line 506  static struct regression_test_case regre Line 546  static struct regression_test_case regre
546          { MUA, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },          { MUA, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
547          { MUA, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },          { MUA, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
548          { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },          { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
549          { MUA | PCRE_BUG, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },          { MUA, 0 | F_DIFF, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
550          { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },          { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
551          { MUA, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },          { MUA, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
552          { MUA, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },          { MUA, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
# Line 520  static struct regression_test_case regre Line 560  static struct regression_test_case regre
560          { MUA, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },          { MUA, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
561          { MUA, 0, "(c)?\?(?(1)a|b)", "cdcaa" },          { MUA, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
562          { MUA, 0, "(c)?\?(?(1)a|b)", "cbb" },          { MUA, 0, "(c)?\?(?(1)a|b)", "cbb" },
563          { MUA | PCRE_BUG, 0, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },          { MUA, 0 | F_DIFF, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
564          { MUA, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },          { MUA, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
565          { MUA, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },          { MUA, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
566          { MUA, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },          { MUA, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
567          { MUA | PCRE_BUG, 0, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },          { MUA, 0 | F_DIFF, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
568          { MUA, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },          { MUA, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
569          { MUA, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },          { MUA, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
570          { MUA, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },          { MUA, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
571          { MUA, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },          { MUA, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
572            { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)*l", "bc ddd abccabccl" },
573            { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+?dd", "bcabcacdb bdddd" },
574            { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+l", "ababccddabdbccd abcccl" },
575    
576          /* Set start of match. */          /* Set start of match. */
577          { MUA, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },          { MUA, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
578          { MUA, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },          { MUA, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
579          { MUA, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },          { MUA, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
580          { MUA | PCRE_NOTEMPTY, 0, "a\\K(*ACCEPT)b", "aa" },          { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a\\K(*ACCEPT)b", "aa" },
581          { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },          { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
582    
583          /* First line. */          /* First line. */
584          { MUA | PCRE_FIRSTLINE, 0, "\\p{Any}a", "bb\naaa" },          { MUA | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}a", "bb\naaa" },
585          { MUA | PCRE_FIRSTLINE, 0, "\\p{Any}a", "bb\r\naaa" },          { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}a", "bb\r\naaa" },
586          { MUA | PCRE_FIRSTLINE, 0, "(?<=a)", "a" },          { MUA | PCRE_FIRSTLINE, 0, "(?<=a)", "a" },
587          { MUA | PCRE_FIRSTLINE, 0, "[^a][^b]", "ab" },          { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[^a][^b]", "ab" },
588          { MUA | PCRE_FIRSTLINE, 0, "a", "\na" },          { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "a", "\na" },
589          { MUA | PCRE_FIRSTLINE, 0, "[abc]", "\na" },          { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[abc]", "\na" },
590          { MUA | PCRE_FIRSTLINE, 0, "^a", "\na" },          { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^a", "\na" },
591          { MUA | PCRE_FIRSTLINE, 0, "^(?<=\n)", "\na" },          { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^(?<=\n)", "\na" },
592          { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0, "#", "\xc2\x85#" },          { MUA | PCRE_FIRSTLINE, 0, "\xf0\x90\x90\x80", "\xf0\x90\x90\x80" },
593          { PCRE_MULTILINE | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0, "#", "\x85#" },          { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\xc2\x85#" },
594          { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0, "^#", "\xe2\x80\xa8#" },          { PCRE_MULTILINE | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\x85#" },
595          { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "\\p{Any}", "\r\na" },          { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^#", "\xe2\x80\xa8#" },
596            { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}", "\r\na" },
597          { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, ".", "\r" },          { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, ".", "\r" },
598          { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "a", "\ra" },          { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "a", "\ra" },
599          { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "ba", "bbb\r\nba" },          { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH, "ba", "bbb\r\nba" },
600          { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "\\p{Any}{4}|a", "\r\na" },          { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}{4}|a", "\r\na" },
601          { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 1, ".", "\r\n" },          { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 1, ".", "\r\n" },
602            { PCRE_FIRSTLINE | PCRE_NEWLINE_LF | PCRE_DOTALL, 0 | F_NOMATCH, "ab.", "ab" },
603    
604          /* Recurse. */          /* Recurse. */
605          { MUA, 0, "(a)(?1)", "aa" },          { MUA, 0, "(a)(?1)", "aa" },
606          { MUA, 0, "((a))(?1)", "aa" },          { MUA, 0, "((a))(?1)", "aa" },
607          { MUA, 0, "(b|a)(?1)", "aa" },          { MUA, 0, "(b|a)(?1)", "aa" },
608          { MUA, 0, "(b|(a))(?1)", "aa" },          { MUA, 0, "(b|(a))(?1)", "aa" },
609          { MUA, 0, "((a)(b)(?:a*))(?1)", "aba" },          { MUA, 0 | F_NOMATCH, "((a)(b)(?:a*))(?1)", "aba" },
610          { MUA, 0, "((a)(b)(?:a*))(?1)", "abab" },          { MUA, 0, "((a)(b)(?:a*))(?1)", "abab" },
611          { MUA, 0, "((a+)c(?2))b(?1)", "aacaabaca" },          { MUA, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
612          { MUA, 0, "((?2)b|(a)){2}(?1)", "aabab" },          { MUA, 0, "((?2)b|(a)){2}(?1)", "aabab" },
# Line 569  static struct regression_test_case regre Line 614  static struct regression_test_case regre
614          { MUA, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },          { MUA, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
615          { MUA, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },          { MUA, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
616          { MUA, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },          { MUA, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
617          { MUA, 0, "(?(DEFINE)(aa|a))(?1)ab", "aab" },          { MUA, 0 | F_NOMATCH, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
618          { MUA, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },          { MUA, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
619          { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },          { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
620          { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababxc" },          { MUA, 0 | F_NOMATCH, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
621          { MUA, 0, "b|<(?R)*>", "<<b>" },          { MUA, 0, "b|<(?R)*>", "<<b>" },
622          { MUA, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },          { MUA, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
623          { MUA, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },          { MUA, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
624            { MUA, 0, "(a)((?(R)a|b))(?2)", "aabbabaa" },
625            { MUA, 0, "(a)((?(R2)a|b))(?2)", "aabbabaa" },
626            { MUA, 0, "(a)((?(R1)a|b))(?2)", "ababba" },
627            { MUA, 0, "(?(R0)aa|bb(?R))", "abba aabb bbaa" },
628            { MUA, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" },
629            { MUA, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" },
630    
631            /* 16 bit specific tests. */
632            { CMA, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" },
633            { CMA, 0 | F_FORCECONV, "\xe1\xbd\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
634            { CMA, 0 | F_FORCECONV, "[\xc3\xa1]", "\xc3\x81\xc3\xa1" },
635            { CMA, 0 | F_FORCECONV, "[\xe1\xbd\xb8]", "\xe1\xbf\xb8\xe1\xbd\xb8" },
636            { CMA, 0 | F_FORCECONV, "[a-\xed\xb0\x80]", "A" },
637            { CMA, 0 | F_NO8 | F_FORCECONV, "[a-\\x{dc00}]", "B" },
638            { CMA, 0 | F_NO8 | F_NOMATCH | F_FORCECONV, "[b-\\x{dc00}]", "a" },
639            { CMA, 0 | F_NO8 | F_FORCECONV, "\xed\xa0\x80\\x{d800}\xed\xb0\x80\\x{dc00}", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80" },
640            { CMA, 0 | F_NO8 | F_FORCECONV, "[\xed\xa0\x80\\x{d800}]{1,2}?[\xed\xb0\x80\\x{dc00}]{1,2}?#", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80#" },
641            { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80\xed\xb0\x80#]{0,3}(?<=\xed\xb0\x80.)", "\xed\xa0\x80#\xed\xa0\x80##\xed\xb0\x80\xed\xa0\x80" },
642            { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\x9f\xbf\xed\xa0\x83" },
643            { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\xb4\x80\xed\xb3\xb0" },
644            { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" },
645            { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" },
646            { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" },
647            { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80][\xed\xb0\x80]{2,}", "\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" },
648            { MA, 0 | F_FORCECONV, "[^\xed\xb0\x80]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
649            { MA, 0 | F_NO8 | F_FORCECONV, "[^\\x{dc00}]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
650            { CMA, 0 | F_FORCECONV, ".\\B.", "\xed\xa0\x80\xed\xb0\x80" },
651            { CMA, 0 | F_FORCECONV, "\\D+(?:\\d+|.)\\S+(?:\\s+|.)\\W+(?:\\w+|.)\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80" },
652            { CMA, 0 | F_FORCECONV, "\\d*\\s*\\w*\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80" },
653            { CMA, 0 | F_FORCECONV | F_NOMATCH, "\\d*?\\D*?\\s*?\\S*?\\w*?\\W*?##", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80#" },
654            { CMA | PCRE_EXTENDED, 0 | F_FORCECONV, "\xed\xa0\x80 \xed\xb0\x80 !", "\xed\xa0\x80\xed\xb0\x80!" },
655            { CMA, 0 | F_FORCECONV, "\xed\xa0\x80+#[^#]+\xed\xa0\x80", "\xed\xa0\x80#a\xed\xa0\x80" },
656            { CMA, 0 | F_FORCECONV, "(\xed\xa0\x80+)#\\1", "\xed\xa0\x80\xed\xa0\x80#\xed\xa0\x80\xed\xa0\x80" },
657            { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0 | F_NO8 | F_FORCECONV, "^-", "a--\xe2\x80\xa8--" },
658            { PCRE_BSR_UNICODE, 0 | F_NO8 | F_FORCECONV, "\\R", "ab\xe2\x80\xa8" },
659            { 0, 0 | F_NO8 | F_FORCECONV, "\\v", "ab\xe2\x80\xa9" },
660            { 0, 0 | F_NO8 | F_FORCECONV, "\\h", "ab\xe1\xa0\x8e" },
661            { 0, 0 | F_NO8 | F_FORCECONV, "\\v+?\\V+?#", "\xe2\x80\xa9\xe2\x80\xa9\xef\xbf\xbf\xef\xbf\xbf#" },
662            { 0, 0 | F_NO8 | F_FORCECONV, "\\h+?\\H+?#", "\xe1\xa0\x8e\xe1\xa0\x8e\xef\xbf\xbf\xef\xbf\xbf#" },
663    
664            /* Partial matching. */
665            { MUA | PCRE_PARTIAL_SOFT, 0, "ab", "a" },
666            { MUA | PCRE_PARTIAL_SOFT, 0, "ab|a", "a" },
667            { MUA | PCRE_PARTIAL_HARD, 0, "ab|a", "a" },
668            { MUA | PCRE_PARTIAL_SOFT, 0, "\\b#", "a" },
669            { MUA | PCRE_PARTIAL_SOFT, 0, "(?<=a)b", "a" },
670            { MUA | PCRE_PARTIAL_SOFT, 0, "abc|(?<=xxa)bc", "xxab" },
671            { MUA | PCRE_PARTIAL_SOFT, 0, "a\\B", "a" },
672            { MUA | PCRE_PARTIAL_HARD, 0, "a\\b", "a" },
673    
674            /* (*MARK) verb. */
675            { MUA, 0, "a(*MARK:aa)a", "ababaa" },
676            { MUA, 0 | F_NOMATCH, "a(*:aa)a", "abab" },
677            { MUA, 0, "a(*:aa)(b(*:bb)b|bc)", "abc" },
678            { MUA, 0 | F_NOMATCH, "a(*:1)x|b(*:2)y", "abc" },
679            { MUA, 0, "(?>a(*:aa))b|ac", "ac" },
680            { MUA, 0, "(?(DEFINE)(a(*:aa)))(?1)", "a" },
681            { MUA, 0 | F_NOMATCH, "(?(DEFINE)((a)(*:aa)))(?1)b", "aa" },
682            { MUA, 0, "(?(DEFINE)(a(*:aa)))a(?1)b|aac", "aac" },
683            { MUA, 0, "(a(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
684            { MUA, 0, "(a(*:aa)){0}(?:b(?1)b)+", "babba" },
685            { MUA, 0 | F_NOMATCH, "(a(*:aa)){0}(?:b(?1)b)+", "ba" },
686            { MUA, 0, "(a\\K(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
687            { MUA, 0, "(a\\K(*:aa)){0}(?:b(?1)b)+", "babba" },
688            { MUA, 0 | F_NOMATCH, "(a\\K(*:aa)){0}(?:b(?1)b)+", "ba" },
689    
690            /* (*COMMIT) verb. */
691            { MUA, 0 | F_NOMATCH, "a(*COMMIT)b", "ac" },
692            { MUA, 0, "aa(*COMMIT)b", "xaxaab" },
693            { MUA, 0 | F_NOMATCH, "a(*COMMIT)(*:msg)b|ac", "ac" },
694            { MUA, 0, "(?=a(*COMMIT)b|ac)ac|(*:m)(a)c", "ac" },
695            { MUA, 0, "(?!a(*COMMIT)(*:msg)b)a(c)|cd", "acd" },
696    
697          /* Deep recursion. */          /* Deep recursion. */
698          { MUA, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },          { MUA, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
699          { MUA, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },          { MUA, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
700          { MUA, 0, "((a?)+)+b", "aaaaaaaaaaaaa b" },          { MUA, 0, "((a?)+)+b", "aaaaaaaaaaaa b" },
701    
702          /* Deep recursion: Stack limit reached. */          /* Deep recursion: Stack limit reached. */
703          { MA, 0, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },          { MA, 0 | F_NOMATCH, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
704          { MA, 0, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },          { MA, 0 | F_NOMATCH, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
705          { MA, 0, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },          { MA, 0 | F_NOMATCH, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
706          { MA, 0, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },          { MA, 0 | F_NOMATCH, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
707          { MA, 0, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },          { MA, 0 | F_NOMATCH, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
708    
709          { 0, 0, NULL, NULL }          { 0, 0, NULL, NULL }
710  };  };
711    
712    static const unsigned char *tables(int mode)
713    {
714            /* The purpose of this function to allow valgrind
715            for reporting invalid reads and writes. */
716            static unsigned char *tables_copy;
717            const char *errorptr;
718            int erroroffset;
719            unsigned char *default_tables;
720    #if defined SUPPORT_PCRE8
721            pcre *regex;
722            char null_str[1] = { 0 };
723    #elif defined SUPPORT_PCRE16
724            pcre16 *regex;
725            PCRE_UCHAR16 null_str[1] = { 0 };
726    #elif defined SUPPORT_PCRE32
727            pcre32 *regex;
728            PCRE_UCHAR32 null_str[1] = { 0 };
729    #endif
730    
731            if (mode) {
732                    if (tables_copy)
733                            free(tables_copy);
734                    tables_copy = NULL;
735                    return NULL;
736            }
737    
738            if (tables_copy)
739                    return tables_copy;
740    
741            default_tables = NULL;
742    #if defined SUPPORT_PCRE8
743            regex = pcre_compile(null_str, 0, &errorptr, &erroroffset, NULL);
744            if (regex) {
745                    pcre_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
746                    pcre_free(regex);
747            }
748    #elif defined SUPPORT_PCRE16
749            regex = pcre16_compile(null_str, 0, &errorptr, &erroroffset, NULL);
750            if (regex) {
751                    pcre16_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
752                    pcre16_free(regex);
753            }
754    #elif defined SUPPORT_PCRE32
755            regex = pcre32_compile(null_str, 0, &errorptr, &erroroffset, NULL);
756            if (regex) {
757                    pcre32_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
758                    pcre32_free(regex);
759            }
760    #endif
761            /* Shouldn't ever happen. */
762            if (!default_tables)
763                    return NULL;
764    
765            /* Unfortunately this value cannot get from pcre_fullinfo.
766            Since this is a test program, this is acceptable at the moment. */
767            tables_copy = (unsigned char *)malloc(1088);
768            if (!tables_copy)
769                    return NULL;
770    
771            memcpy(tables_copy, default_tables, 1088);
772            return tables_copy;
773    }
774    
775    #ifdef SUPPORT_PCRE8
776    static pcre_jit_stack* callback8(void *arg)
777    {
778            return (pcre_jit_stack *)arg;
779    }
780    #endif
781    
782    #ifdef SUPPORT_PCRE16
783    static pcre16_jit_stack* callback16(void *arg)
784    {
785            return (pcre16_jit_stack *)arg;
786    }
787    #endif
788    
789    #ifdef SUPPORT_PCRE32
790    static pcre32_jit_stack* callback32(void *arg)
791    {
792            return (pcre32_jit_stack *)arg;
793    }
794    #endif
795    
796    #ifdef SUPPORT_PCRE8
797    static void setstack8(pcre_extra *extra)
798    {
799            static pcre_jit_stack *stack;
800    
801            if (!extra) {
802                    if (stack)
803                            pcre_jit_stack_free(stack);
804                    stack = NULL;
805                    return;
806            }
807    
808            if (!stack)
809                    stack = pcre_jit_stack_alloc(1, 1024 * 1024);
810            /* Extra can be NULL. */
811            pcre_assign_jit_stack(extra, callback8, stack);
812    }
813    #endif /* SUPPORT_PCRE8 */
814    
815    #ifdef SUPPORT_PCRE16
816    static void setstack16(pcre16_extra *extra)
817    {
818            static pcre16_jit_stack *stack;
819    
820            if (!extra) {
821                    if (stack)
822                            pcre16_jit_stack_free(stack);
823                    stack = NULL;
824                    return;
825            }
826    
827            if (!stack)
828                    stack = pcre16_jit_stack_alloc(1, 1024 * 1024);
829            /* Extra can be NULL. */
830            pcre16_assign_jit_stack(extra, callback16, stack);
831    }
832    #endif /* SUPPORT_PCRE8 */
833    
834    #ifdef SUPPORT_PCRE32
835    static void setstack32(pcre32_extra *extra)
836    {
837            static pcre32_jit_stack *stack;
838    
839            if (!extra) {
840                    if (stack)
841                            pcre32_jit_stack_free(stack);
842                    stack = NULL;
843                    return;
844            }
845    
846            if (!stack)
847                    stack = pcre32_jit_stack_alloc(1, 1024 * 1024);
848            /* Extra can be NULL. */
849            pcre32_assign_jit_stack(extra, callback32, stack);
850    }
851    #endif /* SUPPORT_PCRE8 */
852    
853    #ifdef SUPPORT_PCRE16
854    
855    static int convert_utf8_to_utf16(const char *input, PCRE_UCHAR16 *output, int *offsetmap, int max_length)
856    {
857            unsigned char *iptr = (unsigned char*)input;
858            PCRE_UCHAR16 *optr = output;
859            unsigned int c;
860    
861            if (max_length == 0)
862                    return 0;
863    
864            while (*iptr && max_length > 1) {
865                    c = 0;
866                    if (offsetmap)
867                            *offsetmap++ = (int)(iptr - (unsigned char*)input);
868    
869                    if (!(*iptr & 0x80))
870                            c = *iptr++;
871                    else if (!(*iptr & 0x20)) {
872                            c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
873                            iptr += 2;
874                    } else if (!(*iptr & 0x10)) {
875                            c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
876                            iptr += 3;
877                    } else if (!(*iptr & 0x08)) {
878                            c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
879                            iptr += 4;
880                    }
881    
882                    if (c < 65536) {
883                            *optr++ = c;
884                            max_length--;
885                    } else if (max_length <= 2) {
886                            *optr = '\0';
887                            return (int)(optr - output);
888                    } else {
889                            c -= 0x10000;
890                            *optr++ = 0xd800 | ((c >> 10) & 0x3ff);
891                            *optr++ = 0xdc00 | (c & 0x3ff);
892                            max_length -= 2;
893                            if (offsetmap)
894                                    offsetmap++;
895                    }
896            }
897            if (offsetmap)
898                    *offsetmap = (int)(iptr - (unsigned char*)input);
899            *optr = '\0';
900            return (int)(optr - output);
901    }
902    
903    static int copy_char8_to_char16(const char *input, PCRE_UCHAR16 *output, int max_length)
904    {
905            unsigned char *iptr = (unsigned char*)input;
906            PCRE_UCHAR16 *optr = output;
907    
908            if (max_length == 0)
909                    return 0;
910    
911            while (*iptr && max_length > 1) {
912                    *optr++ = *iptr++;
913                    max_length--;
914            }
915            *optr = '\0';
916            return (int)(optr - output);
917    }
918    
919    #define REGTEST_MAX_LENGTH16 4096
920    static PCRE_UCHAR16 regtest_buf16[REGTEST_MAX_LENGTH16];
921    static int regtest_offsetmap16[REGTEST_MAX_LENGTH16];
922    
923    #endif /* SUPPORT_PCRE16 */
924    
925    #ifdef SUPPORT_PCRE32
926    
927    static int convert_utf8_to_utf32(const char *input, PCRE_UCHAR32 *output, int *offsetmap, int max_length)
928    {
929            unsigned char *iptr = (unsigned char*)input;
930            PCRE_UCHAR32 *optr = output;
931            unsigned int c;
932    
933            if (max_length == 0)
934                    return 0;
935    
936            while (*iptr && max_length > 1) {
937                    c = 0;
938                    if (offsetmap)
939                            *offsetmap++ = (int)(iptr - (unsigned char*)input);
940    
941                    if (!(*iptr & 0x80))
942                            c = *iptr++;
943                    else if (!(*iptr & 0x20)) {
944                            c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
945                            iptr += 2;
946                    } else if (!(*iptr & 0x10)) {
947                            c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
948                            iptr += 3;
949                    } else if (!(*iptr & 0x08)) {
950                            c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
951                            iptr += 4;
952                    }
953    
954                    *optr++ = c;
955                    max_length--;
956            }
957            if (offsetmap)
958                    *offsetmap = (int)(iptr - (unsigned char*)input);
959            *optr = 0;
960            return (int)(optr - output);
961    }
962    
963    static int copy_char8_to_char32(const char *input, PCRE_UCHAR32 *output, int max_length)
964    {
965            unsigned char *iptr = (unsigned char*)input;
966            PCRE_UCHAR32 *optr = output;
967    
968            if (max_length == 0)
969                    return 0;
970    
971            while (*iptr && max_length > 1) {
972                    *optr++ = *iptr++;
973                    max_length--;
974            }
975            *optr = '\0';
976            return (int)(optr - output);
977    }
978    
979    #define REGTEST_MAX_LENGTH32 4096
980    static PCRE_UCHAR32 regtest_buf32[REGTEST_MAX_LENGTH32];
981    static int regtest_offsetmap32[REGTEST_MAX_LENGTH32];
982    
983    #endif /* SUPPORT_PCRE32 */
984    
985    static int check_ascii(const char *input)
986    {
987            const unsigned char *ptr = (unsigned char *)input;
988            while (*ptr) {
989                    if (*ptr > 127)
990                            return 0;
991                    ptr++;
992            }
993            return 1;
994    }
995    
996  static int regression_tests(void)  static int regression_tests(void)
997  {  {
         pcre *re;  
998          struct regression_test_case *current = regression_test_cases;          struct regression_test_case *current = regression_test_cases;
999          const char *error;          const char *error;
1000          pcre_extra *extra;          char *cpu_info;
         int utf8 = 0, ucp = 0;  
         int ovector1[32];  
         int ovector2[32];  
         int return_value1, return_value2;  
1001          int i, err_offs;          int i, err_offs;
1002          int total = 0, succesful = 0;          int is_successful, is_ascii_pattern, is_ascii_input;
1003            int total = 0;
1004            int successful = 0;
1005            int successful_row = 0;
1006          int counter = 0;          int counter = 0;
1007          int disabled_flags = PCRE_BUG;          int study_mode;
1008            int utf = 0, ucp = 0;
1009            int disabled_flags = 0;
1010    #ifdef SUPPORT_PCRE8
1011            pcre *re8;
1012            pcre_extra *extra8;
1013            pcre_extra dummy_extra8;
1014            int ovector8_1[32];
1015            int ovector8_2[32];
1016            int return_value8[2];
1017            unsigned char *mark8_1, *mark8_2;
1018    #endif
1019    #ifdef SUPPORT_PCRE16
1020            pcre16 *re16;
1021            pcre16_extra *extra16;
1022            pcre16_extra dummy_extra16;
1023            int ovector16_1[32];
1024            int ovector16_2[32];
1025            int return_value16[2];
1026            PCRE_UCHAR16 *mark16_1, *mark16_2;
1027            int length16;
1028    #endif
1029    #ifdef SUPPORT_PCRE32
1030            pcre32 *re32;
1031            pcre32_extra *extra32;
1032            pcre32_extra dummy_extra32;
1033            int ovector32_1[32];
1034            int ovector32_2[32];
1035            int return_value32[2];
1036            PCRE_UCHAR32 *mark32_1, *mark32_2;
1037            int length32;
1038    #endif
1039    
1040          /* This test compares the behaviour of interpreter and JIT. Although disabling          /* This test compares the behaviour of interpreter and JIT. Although disabling
1041          utf8 or ucp may make tests fail, if the pcre_exec result is the SAME, it is          utf or ucp may make tests fail, if the pcre_exec result is the SAME, it is
1042          still considered successful from pcre_jit_test point of view. */          still considered successful from pcre_jit_test point of view. */
1043    
1044          pcre_config(PCRE_CONFIG_UTF8, &utf8);  #if defined SUPPORT_PCRE8
1045            pcre_config(PCRE_CONFIG_JITTARGET, &cpu_info);
1046    #elif defined SUPPORT_PCRE16
1047            pcre16_config(PCRE_CONFIG_JITTARGET, &cpu_info);
1048    #elif defined SUPPORT_PCRE32
1049            pcre32_config(PCRE_CONFIG_JITTARGET, &cpu_info);
1050    #endif
1051    
1052            printf("Running JIT regression tests\n");
1053            printf("  target CPU of SLJIT compiler: %s\n", cpu_info);
1054    
1055    #if defined SUPPORT_PCRE8
1056            pcre_config(PCRE_CONFIG_UTF8, &utf);
1057          pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);          pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
1058          if (!utf8)  #elif defined SUPPORT_PCRE16
1059                  disabled_flags |= PCRE_UTF8;          pcre16_config(PCRE_CONFIG_UTF16, &utf);
1060            pcre16_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
1061    #elif defined SUPPORT_PCRE16
1062            pcre32_config(PCRE_CONFIG_UTF32, &utf);
1063            pcre32_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
1064    #endif
1065    
1066            if (!utf)
1067                    disabled_flags |= PCRE_UTF8 | PCRE_UTF16 | PCRE_UTF32;
1068          if (!ucp)          if (!ucp)
1069                  disabled_flags |= PCRE_UCP;                  disabled_flags |= PCRE_UCP;
1070    #ifdef SUPPORT_PCRE8
1071            printf("  in  8 bit mode with UTF-8  %s and ucp %s:\n", utf ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
1072    #endif
1073    #ifdef SUPPORT_PCRE16
1074            printf("  in 16 bit mode with UTF-16 %s and ucp %s:\n", utf ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
1075    #endif
1076    #ifdef SUPPORT_PCRE32
1077            printf("  in 32 bit mode with UTF-32 %s and ucp %s:\n", utf ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
1078    #endif
1079    
         printf("Running JIT regression tests with utf8 %s and ucp %s:\n", utf8 ? "enabled" : "disabled", ucp ? "enabled" : "disabled");  
1080          while (current->pattern) {          while (current->pattern) {
1081                  /* printf("\nPattern: %s :\n", current->pattern); */                  /* printf("\nPattern: %s :\n", current->pattern); */
1082                  total++;                  total++;
1083                    if (current->start_offset & F_PROPERTY) {
1084                            is_ascii_pattern = 0;
1085                            is_ascii_input = 0;
1086                    } else {
1087                            is_ascii_pattern = check_ascii(current->pattern);
1088                            is_ascii_input = check_ascii(current->input);
1089                    }
1090    
1091                    if (current->flags & PCRE_PARTIAL_SOFT)
1092                            study_mode = PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE;
1093                    else if (current->flags & PCRE_PARTIAL_HARD)
1094                            study_mode = PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE;
1095                    else
1096                            study_mode = PCRE_STUDY_JIT_COMPILE;
1097                  error = NULL;                  error = NULL;
1098                  re = pcre_compile(current->pattern, current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags), &error, &err_offs, NULL);  #ifdef SUPPORT_PCRE8
1099                    re8 = NULL;
1100                  if (!re) {                  if (!(current->start_offset & F_NO8))
1101                          if (utf8 && ucp)                          re8 = pcre_compile(current->pattern,
1102                                  printf("\nCannot compile pattern: %s\n", current->pattern);                                  current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags),
1103                          else {                                  &error, &err_offs, tables(0));
1104                                  /* Some patterns cannot be compiled when either of utf8  
1105                                  or ucp is disabled. We just skip them. */                  extra8 = NULL;
1106                                  printf(".");                  if (re8) {
1107                                  succesful++;                          error = NULL;
1108                            extra8 = pcre_study(re8, study_mode, &error);
1109                            if (!extra8) {
1110                                    printf("\n8 bit: Cannot study pattern: %s\n", current->pattern);
1111                                    pcre_free(re8);
1112                                    re8 = NULL;
1113                          }                          }
1114                          current++;                          else if (!(extra8->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
1115                          continue;                                  printf("\n8 bit: JIT compiler does not support: %s\n", current->pattern);
1116                  }                                  pcre_free_study(extra8);
1117                                    pcre_free(re8);
1118                                    re8 = NULL;
1119                            }
1120                            extra8->flags |= PCRE_EXTRA_MARK;
1121                    } else if (((utf && ucp) || is_ascii_pattern) && !(current->start_offset & F_NO8))
1122                            printf("\n8 bit: Cannot compile pattern \"%s\": %s\n", current->pattern, error);
1123    #endif
1124    #ifdef SUPPORT_PCRE16
1125                    if ((current->flags & PCRE_UTF16) || (current->start_offset & F_FORCECONV))
1126                            convert_utf8_to_utf16(current->pattern, regtest_buf16, NULL, REGTEST_MAX_LENGTH16);
1127                    else
1128                            copy_char8_to_char16(current->pattern, regtest_buf16, REGTEST_MAX_LENGTH16);
1129    
1130                    re16 = NULL;
1131                    if (!(current->start_offset & F_NO16))
1132                            re16 = pcre16_compile(regtest_buf16,
1133                                    current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags),
1134                                    &error, &err_offs, tables(0));
1135    
1136                    extra16 = NULL;
1137                    if (re16) {
1138                            error = NULL;
1139                            extra16 = pcre16_study(re16, study_mode, &error);
1140                            if (!extra16) {
1141                                    printf("\n16 bit: Cannot study pattern: %s\n", current->pattern);
1142                                    pcre16_free(re16);
1143                                    re16 = NULL;
1144                            }
1145                            else if (!(extra16->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
1146                                    printf("\n16 bit: JIT compiler does not support: %s\n", current->pattern);
1147                                    pcre16_free_study(extra16);
1148                                    pcre16_free(re16);
1149                                    re16 = NULL;
1150                            }
1151                            extra16->flags |= PCRE_EXTRA_MARK;
1152                    } else if (((utf && ucp) || is_ascii_pattern) && !(current->start_offset & F_NO16))
1153                            printf("\n16 bit: Cannot compile pattern \"%s\": %s\n", current->pattern, error);
1154    #endif
1155    #ifdef SUPPORT_PCRE32
1156                    if ((current->flags & PCRE_UTF32) || (current->start_offset & F_FORCECONV))
1157                            convert_utf8_to_utf32(current->pattern, regtest_buf32, NULL, REGTEST_MAX_LENGTH32);
1158                    else
1159                            copy_char8_to_char32(current->pattern, regtest_buf32, REGTEST_MAX_LENGTH32);
1160    
1161                    re32 = NULL;
1162                    if (!(current->start_offset & F_NO32))
1163                            re32 = pcre32_compile(regtest_buf32,
1164                                    current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags),
1165                                    &error, &err_offs, tables(0));
1166    
1167                    extra32 = NULL;
1168                    if (re32) {
1169                            error = NULL;
1170                            extra32 = pcre32_study(re32, study_mode, &error);
1171                            if (!extra32) {
1172                                    printf("\n32 bit: Cannot study pattern: %s\n", current->pattern);
1173                                    pcre32_free(re32);
1174                                    re32 = NULL;
1175                            }
1176                            if (!(extra32->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
1177                                    printf("\n32 bit: JIT compiler does not support: %s\n", current->pattern);
1178                                    pcre32_free_study(extra32);
1179                                    pcre32_free(re32);
1180                                    re32 = NULL;
1181                            }
1182                            extra32->flags |= PCRE_EXTRA_MARK;
1183                    } else if (((utf && ucp) || is_ascii_pattern) && !(current->start_offset & F_NO32))
1184                            printf("\n32 bit: Cannot compile pattern \"%s\": %s\n", current->pattern, error);
1185    #endif
1186    
1187                  error = NULL;                  counter++;
1188                  extra = pcre_study(re, PCRE_STUDY_JIT_COMPILE, &error);                  if ((counter & 0x3) != 0) {
1189                  if (!extra) {  #ifdef SUPPORT_PCRE8
1190                          printf("\nCannot study pattern: %s\n", current->pattern);                          setstack8(NULL);
1191                          current++;  #endif
1192                          continue;  #ifdef SUPPORT_PCRE16
1193                            setstack16(NULL);
1194    #endif
1195    #ifdef SUPPORT_PCRE32
1196                            setstack32(NULL);
1197    #endif
1198                  }                  }
1199    
1200                  if (!(extra->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {  #ifdef SUPPORT_PCRE8
1201                          printf("\nJIT compiler does not support: %s\n", current->pattern);                  return_value8[0] = -1000;
1202                          current++;                  return_value8[1] = -1000;
1203                          continue;                  for (i = 0; i < 32; ++i)
1204                            ovector8_1[i] = -2;
1205                    for (i = 0; i < 32; ++i)
1206                            ovector8_2[i] = -2;
1207                    if (re8) {
1208                            mark8_1 = NULL;
1209                            mark8_2 = NULL;
1210                            setstack8(extra8);
1211                            extra8->mark = &mark8_1;
1212                            return_value8[0] = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1213                                    current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_1, 32);
1214                            memset(&dummy_extra8, 0, sizeof(pcre_extra));
1215                            dummy_extra8.flags = PCRE_EXTRA_MARK;
1216                            dummy_extra8.mark = &mark8_2;
1217                            return_value8[1] = pcre_exec(re8, &dummy_extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1218                                    current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_2, 32);
1219                  }                  }
1220    #endif
1221    
1222                  counter++;  #ifdef SUPPORT_PCRE16
1223                  if ((counter & 0x3) != 0)                  return_value16[0] = -1000;
1224                          setstack(extra);                  return_value16[1] = -1000;
   
1225                  for (i = 0; i < 32; ++i)                  for (i = 0; i < 32; ++i)
1226                          ovector1[i] = -2;                          ovector16_1[i] = -2;
                 return_value1 = pcre_exec(re, extra, current->input, strlen(current->input), current->start_offset, current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector1, 32);  
   
1227                  for (i = 0; i < 32; ++i)                  for (i = 0; i < 32; ++i)
1228                          ovector2[i] = -2;                          ovector16_2[i] = -2;
1229                  return_value2 = pcre_exec(re, NULL, current->input, strlen(current->input), current->start_offset, current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector2, 32);                  if (re16) {
1230                            mark16_1 = NULL;
1231                            mark16_2 = NULL;
1232                            setstack16(extra16);
1233                            if ((current->flags & PCRE_UTF16) || (current->start_offset & F_FORCECONV))
1234                                    length16 = convert_utf8_to_utf16(current->input, regtest_buf16, regtest_offsetmap16, REGTEST_MAX_LENGTH16);
1235                            else
1236                                    length16 = copy_char8_to_char16(current->input, regtest_buf16, REGTEST_MAX_LENGTH16);
1237                            extra16->mark = &mark16_1;
1238                            return_value16[0] = pcre16_exec(re16, extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
1239                                    current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_1, 32);
1240                            memset(&dummy_extra16, 0, sizeof(pcre16_extra));
1241                            dummy_extra16.flags = PCRE_EXTRA_MARK;
1242                            dummy_extra16.mark = &mark16_2;
1243                            return_value16[1] = pcre16_exec(re16, &dummy_extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
1244                                    current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_2, 32);
1245                    }
1246    #endif
1247    
1248                  /* If PCRE_BUG is set, just run the test, but do not compare the results.  #ifdef SUPPORT_PCRE32
1249                    return_value32[0] = -1000;
1250                    return_value32[1] = -1000;
1251                    for (i = 0; i < 32; ++i)
1252                            ovector32_1[i] = -2;
1253                    for (i = 0; i < 32; ++i)
1254                            ovector32_2[i] = -2;
1255                    if (re32) {
1256                            mark32_1 = NULL;
1257                            mark32_2 = NULL;
1258                            setstack32(extra32);
1259                            if ((current->flags & PCRE_UTF32) || (current->start_offset & F_FORCECONV))
1260                                    length32 = convert_utf8_to_utf32(current->input, regtest_buf32, regtest_offsetmap32, REGTEST_MAX_LENGTH32);
1261                            else
1262                                    length32 = copy_char8_to_char32(current->input, regtest_buf32, REGTEST_MAX_LENGTH32);
1263                            extra32->mark = &mark32_1;
1264                            return_value32[0] = pcre32_exec(re32, extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
1265                                    current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector32_1, 32);
1266                            memset(&dummy_extra32, 0, sizeof(pcre32_extra));
1267                            dummy_extra32.flags = PCRE_EXTRA_MARK;
1268                            dummy_extra32.mark = &mark32_2;
1269                            return_value32[1] = pcre32_exec(re32, &dummy_extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
1270                                    current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector32_2, 32);
1271                    }
1272    #endif
1273    
1274                    /* printf("[%d-%d-%d|%d-%d|%d-%d|%d-%d]%s",
1275                     *        return_value8[0], return_value16[0],
1276                     *        ovector8_1[0], ovector8_1[1],
1277                     *        ovector16_1[0], ovector16_1[1],
1278                     *        ovector32_1[0], ovector32_1[1],
1279                     *        (current->flags & PCRE_CASELESS) ? "C" : ""); */
1280    
1281                    /* If F_DIFF is set, just run the test, but do not compare the results.
1282                  Segfaults can still be captured. */                  Segfaults can still be captured. */
                 if (!(current->flags & PCRE_BUG)) {  
                         if (return_value1 != return_value2) {  
                                 printf("\nReturn value differs(%d:%d): '%s' @ '%s'\n", return_value1, return_value2, current->pattern, current->input);  
                                 current++;  
                                 continue;  
                         }  
1283    
1284                          if (return_value1 >= 0) {                  is_successful = 1;
1285                                  return_value1 *= 2;                  if (!(current->start_offset & F_DIFF)) {
1286                                  err_offs = 0;  #if defined SUPPORT_UTF && ((defined(SUPPORT_PCRE8) + defined(SUPPORT_PCRE16) + defined(SUPPORT_PCRE32)) >= 2)
1287                                  for (i = 0; i < return_value1; ++i)                          if (!(current->start_offset & F_FORCECONV)) {
1288                                          if (ovector1[i] != ovector2[i]) {                                  int return_value;
1289                                                  printf("\nOvector[%d] value differs(%d:%d): '%s' @ '%s' \n", i, ovector1[i], ovector2[i], current->pattern, current->input);  
1290                                                  err_offs = 1;                                  /* All results must be the same. */
1291    #ifdef SUPPORT_PCRE8
1292                                    if ((return_value = return_value8[0]) != return_value8[1]) {
1293                                            printf("\n8 bit: Return value differs(J8:%d,I8:%d): [%d] '%s' @ '%s'\n",
1294                                                    return_value8[0], return_value8[1], total, current->pattern, current->input);
1295                                            is_successful = 0;
1296                                    } else
1297    #endif
1298    #ifdef SUPPORT_PCRE16
1299                                    if ((return_value = return_value16[0]) != return_value16[1]) {
1300                                            printf("\n16 bit: Return value differs(J16:%d,I16:%d): [%d] '%s' @ '%s'\n",
1301                                                    return_value16[0], return_value16[1], total, current->pattern, current->input);
1302                                            is_successful = 0;
1303                                    } else
1304    #endif
1305    #ifdef SUPPORT_PCRE32
1306                                    if ((return_value = return_value32[0]) != return_value32[1]) {
1307                                            printf("\n32 bit: Return value differs(J32:%d,I32:%d): [%d] '%s' @ '%s'\n",
1308                                                    return_value32[0], return_value32[1], total, current->pattern, current->input);
1309                                            is_successful = 0;
1310                                    } else
1311    #endif
1312    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1313                                    if (return_value8[0] != return_value16[0]) {
1314                                            printf("\n8 and 16 bit: Return value differs(J8:%d,J16:%d): [%d] '%s' @ '%s'\n",
1315                                                    return_value8[0], return_value16[0],
1316                                                    total, current->pattern, current->input);
1317                                            is_successful = 0;
1318                                    } else
1319    #endif
1320    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE32
1321                                    if (return_value8[0] != return_value32[0]) {
1322                                            printf("\n8 and 32 bit: Return value differs(J8:%d,J32:%d): [%d] '%s' @ '%s'\n",
1323                                                    return_value8[0], return_value32[0],
1324                                                    total, current->pattern, current->input);
1325                                            is_successful = 0;
1326                                    } else
1327    #endif
1328    #if defined SUPPORT_PCRE16 && defined SUPPORT_PCRE32
1329                                    if (return_value16[0] != return_value32[0]) {
1330                                            printf("\n16 and 32 bit: Return value differs(J16:%d,J32:%d): [%d] '%s' @ '%s'\n",
1331                                                    return_value16[0], return_value32[0],
1332                                                    total, current->pattern, current->input);
1333                                            is_successful = 0;
1334                                    } else
1335    #endif
1336                                    if (return_value >= 0 || return_value == PCRE_ERROR_PARTIAL) {
1337                                            if (return_value == PCRE_ERROR_PARTIAL) {
1338                                                    return_value = 2;
1339                                            } else {
1340                                                    return_value *= 2;
1341                                          }                                          }
1342                                  if (err_offs) {  #ifdef SUPPORT_PCRE8
1343                                          current++;                                          return_value8[0] = return_value;
1344                                          continue;  #endif
1345    #ifdef SUPPORT_PCRE16
1346                                            return_value16[0] = return_value;
1347    #endif
1348    #ifdef SUPPORT_PCRE32
1349                                            return_value32[0] = return_value;
1350    #endif
1351                                            /* Transform back the results. */
1352                                            if (current->flags & PCRE_UTF8) {
1353    #ifdef SUPPORT_PCRE16
1354                                                    for (i = 0; i < return_value; ++i) {
1355                                                            if (ovector16_1[i] >= 0)
1356                                                                    ovector16_1[i] = regtest_offsetmap16[ovector16_1[i]];
1357                                                            if (ovector16_2[i] >= 0)
1358                                                                    ovector16_2[i] = regtest_offsetmap16[ovector16_2[i]];
1359                                                    }
1360    #endif
1361    #ifdef SUPPORT_PCRE32
1362                                                    for (i = 0; i < return_value; ++i) {
1363                                                            if (ovector32_1[i] >= 0)
1364                                                                    ovector32_1[i] = regtest_offsetmap32[ovector32_1[i]];
1365                                                            if (ovector32_2[i] >= 0)
1366                                                                    ovector32_2[i] = regtest_offsetmap32[ovector32_2[i]];
1367                                                    }
1368    #endif
1369                                            }
1370    
1371                                            for (i = 0; i < return_value; ++i) {
1372    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1373                                                    if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
1374                                                            printf("\n8 and 16 bit: Ovector[%d] value differs(J8:%d,I8:%d,J16:%d,I16:%d): [%d] '%s' @ '%s' \n",
1375                                                                    i, ovector8_1[i], ovector8_2[i], ovector16_1[i], ovector16_2[i],
1376                                                                    total, current->pattern, current->input);
1377                                                            is_successful = 0;
1378                                                    }
1379    #endif
1380    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE32
1381                                                    if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector32_1[i] || ovector8_1[i] != ovector32_2[i]) {
1382                                                            printf("\n8 and 32 bit: Ovector[%d] value differs(J8:%d,I8:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
1383                                                                    i, ovector8_1[i], ovector8_2[i], ovector32_1[i], ovector32_2[i],
1384                                                                    total, current->pattern, current->input);
1385                                                            is_successful = 0;
1386                                                    }
1387    #endif
1388    #if defined SUPPORT_PCRE16 && defined SUPPORT_PCRE16
1389                                                    if (ovector16_1[i] != ovector16_2[i] || ovector16_1[i] != ovector16_1[i] || ovector16_1[i] != ovector16_2[i]) {
1390                                                            printf("\n16 and 16 bit: Ovector[%d] value differs(J16:%d,I16:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
1391                                                                    i, ovector16_1[i], ovector16_2[i], ovector16_1[i], ovector16_2[i],
1392                                                                    total, current->pattern, current->input);
1393                                                            is_successful = 0;
1394                                                    }
1395    #endif
1396                                            }
1397                                    }
1398                            } else
1399    #endif /* more than one of SUPPORT_PCRE8, SUPPORT_PCRE16 and SUPPORT_PCRE32 */
1400                            {
1401                                    /* Only the 8 bit and 16 bit results must be equal. */
1402    #ifdef SUPPORT_PCRE8
1403                                    if (return_value8[0] != return_value8[1]) {
1404                                            printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1405                                                    return_value8[0], return_value8[1], total, current->pattern, current->input);
1406                                            is_successful = 0;
1407                                    } else if (return_value8[0] >= 0 || return_value8[0] == PCRE_ERROR_PARTIAL) {
1408                                            if (return_value8[0] == PCRE_ERROR_PARTIAL)
1409                                                    return_value8[0] = 2;
1410                                            else
1411                                                    return_value8[0] *= 2;
1412    
1413                                            for (i = 0; i < return_value8[0]; ++i)
1414                                                    if (ovector8_1[i] != ovector8_2[i]) {
1415                                                            printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1416                                                                    i, ovector8_1[i], ovector8_2[i], total, current->pattern, current->input);
1417                                                            is_successful = 0;
1418                                                    }
1419                                    }
1420    #endif
1421    
1422    #ifdef SUPPORT_PCRE16
1423                                    if (return_value16[0] != return_value16[1]) {
1424                                            printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1425                                                    return_value16[0], return_value16[1], total, current->pattern, current->input);
1426                                            is_successful = 0;
1427                                    } else if (return_value16[0] >= 0 || return_value16[0] == PCRE_ERROR_PARTIAL) {
1428                                            if (return_value16[0] == PCRE_ERROR_PARTIAL)
1429                                                    return_value16[0] = 2;
1430                                            else
1431                                                    return_value16[0] *= 2;
1432    
1433                                            for (i = 0; i < return_value16[0]; ++i)
1434                                                    if (ovector16_1[i] != ovector16_2[i]) {
1435                                                            printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1436                                                                    i, ovector16_1[i], ovector16_2[i], total, current->pattern, current->input);
1437                                                            is_successful = 0;
1438                                                    }
1439                                    }
1440    #endif
1441    
1442    #ifdef SUPPORT_PCRE32
1443                                    if (return_value32[0] != return_value32[1]) {
1444                                            printf("\n32 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1445                                                    return_value32[0], return_value32[1], total, current->pattern, current->input);
1446                                            is_successful = 0;
1447                                    } else if (return_value32[0] >= 0 || return_value32[0] == PCRE_ERROR_PARTIAL) {
1448                                            if (return_value32[0] == PCRE_ERROR_PARTIAL)
1449                                                    return_value32[0] = 2;
1450                                            else
1451                                                    return_value32[0] *= 2;
1452    
1453                                            for (i = 0; i < return_value32[0]; ++i)
1454                                                    if (ovector32_1[i] != ovector32_2[i]) {
1455                                                            printf("\n32 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1456                                                                    i, ovector32_1[i], ovector32_2[i], total, current->pattern, current->input);
1457                                                            is_successful = 0;
1458                                                    }
1459                                    }
1460    #endif
1461                            }
1462                    }
1463    
1464                    if (is_successful) {
1465    #ifdef SUPPORT_PCRE8
1466                            if (!(current->start_offset & F_NO8) && ((utf && ucp) || is_ascii_input)) {
1467                                    if (return_value8[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1468                                            printf("8 bit: Test should match: [%d] '%s' @ '%s'\n",
1469                                                    total, current->pattern, current->input);
1470                                            is_successful = 0;
1471                                    }
1472    
1473                                    if (return_value8[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1474                                            printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n",
1475                                                    total, current->pattern, current->input);
1476                                            is_successful = 0;
1477                                    }
1478                            }
1479    #endif
1480    #ifdef SUPPORT_PCRE16
1481                            if (!(current->start_offset & F_NO16) && ((utf && ucp) || is_ascii_input)) {
1482                                    if (return_value16[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1483                                            printf("16 bit: Test should match: [%d] '%s' @ '%s'\n",
1484                                                    total, current->pattern, current->input);
1485                                            is_successful = 0;
1486                                    }
1487    
1488                                    if (return_value16[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1489                                            printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n",
1490                                                    total, current->pattern, current->input);
1491                                            is_successful = 0;
1492                                  }                                  }
1493                          }                          }
1494    #endif
1495    #ifdef SUPPORT_PCRE32
1496                            if (!(current->start_offset & F_NO32) && ((utf && ucp) || is_ascii_input)) {
1497                                    if (return_value32[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1498                                            printf("32 bit: Test should match: [%d] '%s' @ '%s'\n",
1499                                                    total, current->pattern, current->input);
1500                                            is_successful = 0;
1501                                    }
1502    
1503                                    if (return_value32[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1504                                            printf("32 bit: Test should not match: [%d] '%s' @ '%s'\n",
1505                                                    total, current->pattern, current->input);
1506                                            is_successful = 0;
1507                                    }
1508                            }
1509    #endif
1510                    }
1511    
1512                    if (is_successful) {
1513    #ifdef SUPPORT_PCRE8
1514                            if (mark8_1 != mark8_2) {
1515                                    printf("8 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1516                                            total, current->pattern, current->input);
1517                                    is_successful = 0;
1518                            }
1519    #endif
1520    #ifdef SUPPORT_PCRE16
1521                            if (mark16_1 != mark16_2) {
1522                                    printf("16 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1523                                            total, current->pattern, current->input);
1524                                    is_successful = 0;
1525                            }
1526    #endif
1527    #ifdef SUPPORT_PCRE32
1528                            if (mark32_1 != mark32_2) {
1529                                    printf("32 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1530                                            total, current->pattern, current->input);
1531                                    is_successful = 0;
1532                            }
1533    #endif
1534                  }                  }
1535    
1536                  pcre_free_study(extra);  #ifdef SUPPORT_PCRE8
1537                  pcre_free(re);                  if (re8) {
1538                            pcre_free_study(extra8);
1539                            pcre_free(re8);
1540                    }
1541    #endif
1542    #ifdef SUPPORT_PCRE16
1543                    if (re16) {
1544                            pcre16_free_study(extra16);
1545                            pcre16_free(re16);
1546                    }
1547    #endif
1548    #ifdef SUPPORT_PCRE32
1549                    if (re32) {
1550                            pcre32_free_study(extra32);
1551                            pcre32_free(re32);
1552                    }
1553    #endif
1554    
1555                    if (is_successful) {
1556                            successful++;
1557                            successful_row++;
1558                            printf(".");
1559                            if (successful_row >= 60) {
1560                                    successful_row = 0;
1561                                    printf("\n");
1562                            }
1563                    } else
1564                            successful_row = 0;
1565    
                 /* printf("[%d-%d]%s", ovector1[0], ovector1[1], (current->flags & PCRE_CASELESS) ? "C" : ""); */  
                 printf(".");  
1566                  fflush(stdout);                  fflush(stdout);
1567                  current++;                  current++;
                 succesful++;  
1568          }          }
1569            tables(1);
1570    #ifdef SUPPORT_PCRE8
1571            setstack8(NULL);
1572    #endif
1573    #ifdef SUPPORT_PCRE16
1574            setstack16(NULL);
1575    #endif
1576    #ifdef SUPPORT_PCRE32
1577            setstack32(NULL);
1578    #endif
1579    
1580          if (total == succesful) {          if (total == successful) {
1581                  printf("\nAll JIT regression tests are successfully passed.\n");                  printf("\nAll JIT regression tests are successfully passed.\n");
1582                  return 0;                  return 0;
1583          } else {          } else {
1584                  printf("\nSuccessful test ratio: %d%%\n", succesful * 100 / total);                  printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
1585                  return 1;                  return 1;
1586          }          }
1587  }  }

Legend:
Removed from v.736  
changed lines
  Added in v.1055

  ViewVC Help
Powered by ViewVC 1.1.5