/[pcre]/code/trunk/pcre_dfa_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_dfa_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 168 by ph10, Tue May 29 15:18:18 2007 UTC revision 1041 by ph10, Sun Sep 16 10:16:27 2012 UTC
# Line 3  Line 3 
3  *************************************************/  *************************************************/
4    
5  /* PCRE is a library of functions to support regular expressions whose syntax  /* PCRE is a library of functions to support regular expressions whose syntax
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language (but see
7    below for why this module is different).
8    
9                         Written by Philip Hazel                         Written by Philip Hazel
10             Copyright (c) 1997-2007 University of Cambridge             Copyright (c) 1997-2012 University of Cambridge
11    
12  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
13  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 37  POSSIBILITY OF SUCH DAMAGE. Line 38  POSSIBILITY OF SUCH DAMAGE.
38  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
39  */  */
40    
   
41  /* This module contains the external function pcre_dfa_exec(), which is an  /* This module contains the external function pcre_dfa_exec(), which is an
42  alternative matching function that uses a sort of DFA algorithm (not a true  alternative matching function that uses a sort of DFA algorithm (not a true
43  FSM). This is NOT Perl- compatible, but it has advantages in certain  FSM). This is NOT Perl-compatible, but it has advantages in certain
44  applications. */  applications. */
45    
46    
47    /* NOTE ABOUT PERFORMANCE: A user of this function sent some code that improved
48    the performance of his patterns greatly. I could not use it as it stood, as it
49    was not thread safe, and made assumptions about pattern sizes. Also, it caused
50    test 7 to loop, and test 9 to crash with a segfault.
51    
52    The issue is the check for duplicate states, which is done by a simple linear
53    search up the state list. (Grep for "duplicate" below to find the code.) For
54    many patterns, there will never be many states active at one time, so a simple
55    linear search is fine. In patterns that have many active states, it might be a
56    bottleneck. The suggested code used an indexing scheme to remember which states
57    had previously been used for each character, and avoided the linear search when
58    it knew there was no chance of a duplicate. This was implemented when adding
59    states to the state lists.
60    
61    I wrote some thread-safe, not-limited code to try something similar at the time
62    of checking for duplicates (instead of when adding states), using index vectors
63    on the stack. It did give a 13% improvement with one specially constructed
64    pattern for certain subject strings, but on other strings and on many of the
65    simpler patterns in the test suite it did worse. The major problem, I think,
66    was the extra time to initialize the index. This had to be done for each call
67    of internal_dfa_exec(). (The supplied patch used a static vector, initialized
68    only once - I suspect this was the cause of the problems with the tests.)
69    
70    Overall, I concluded that the gains in some cases did not outweigh the losses
71    in others, so I abandoned this code. */
72    
73    
74    
75    #ifdef HAVE_CONFIG_H
76    #include "config.h"
77    #endif
78    
79  #define NLBLOCK md             /* Block containing newline information */  #define NLBLOCK md             /* Block containing newline information */
80  #define PSSTART start_subject  /* Field containing processed string start */  #define PSSTART start_subject  /* Field containing processed string start */
81  #define PSEND   end_subject    /* Field containing processed string end */  #define PSEND   end_subject    /* Field containing processed string end */
# Line 56  applications. */ Line 88  applications. */
88  #define SP "                   "  #define SP "                   "
89    
90    
   
91  /*************************************************  /*************************************************
92  *      Code parameters and static tables         *  *      Code parameters and static tables         *
93  *************************************************/  *************************************************/
94    
95  /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes  /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes
96  into others, under special conditions. A gap of 20 between the blocks should be  into others, under special conditions. A gap of 20 between the blocks should be
97  enough. */  enough. The resulting opcodes don't have to be less than 256 because they are
98    never stored, so we push them well clear of the normal opcodes. */
99    
100  #define OP_PROP_EXTRA 100  #define OP_PROP_EXTRA       300
101  #define OP_EXTUNI_EXTRA 120  #define OP_EXTUNI_EXTRA     320
102  #define OP_ANYNL_EXTRA 140  #define OP_ANYNL_EXTRA      340
103    #define OP_HSPACE_EXTRA     360
104    #define OP_VSPACE_EXTRA     380
105    
106    
107  /* This table identifies those opcodes that are followed immediately by a  /* This table identifies those opcodes that are followed immediately by a
108  character that is to be tested in some way. This makes is possible to  character that is to be tested in some way. This makes it possible to
109  centralize the loading of these characters. In the case of Type * etc, the  centralize the loading of these characters. In the case of Type * etc, the
110  "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a  "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a
111  small value. ***NOTE*** If the start of this table is modified, the two tables  small value. Non-zero values in the table are the offsets from the opcode where
112  that follow must also be modified. */  the character is to be found. ***NOTE*** If the start of this table is
113    modified, the three tables that follow must also be modified. */
114    
115  static uschar coptable[] = {  static const pcre_uint8 coptable[] = {
116    0,                             /* End                                    */    0,                             /* End                                    */
117    0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */    0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */
118    0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */    0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */
119    0, 0,                          /* Any, Anybyte                           */    0, 0, 0,                       /* Any, AllAny, Anybyte                   */
120    0, 0, 0, 0,                    /* NOTPROP, PROP, EXTUNI, ANYNL           */    0, 0,                          /* \P, \p                                 */
121    0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */    0, 0, 0, 0, 0,                 /* \R, \H, \h, \V, \v                     */
122      0,                             /* \X                                     */
123      0, 0, 0, 0, 0, 0,              /* \Z, \z, ^, ^M, $, $M                   */
124    1,                             /* Char                                   */    1,                             /* Char                                   */
125    1,                             /* Charnc                                 */    1,                             /* Chari                                  */
126    1,                             /* not                                    */    1,                             /* not                                    */
127      1,                             /* noti                                   */
128    /* Positive single-char repeats                                          */    /* Positive single-char repeats                                          */
129    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
130    3, 3, 3,                       /* upto, minupto, exact                   */    1+IMM2_SIZE, 1+IMM2_SIZE,      /* upto, minupto                          */
131    1, 1, 1, 3,                    /* *+, ++, ?+, upto+                      */    1+IMM2_SIZE,                   /* exact                                  */
132      1, 1, 1, 1+IMM2_SIZE,          /* *+, ++, ?+, upto+                      */
133      1, 1, 1, 1, 1, 1,              /* *I, *?I, +I, +?I, ?I, ??I              */
134      1+IMM2_SIZE, 1+IMM2_SIZE,      /* upto I, minupto I                      */
135      1+IMM2_SIZE,                   /* exact I                                */
136      1, 1, 1, 1+IMM2_SIZE,          /* *+I, ++I, ?+I, upto+I                  */
137    /* Negative single-char repeats - only for chars < 256                   */    /* Negative single-char repeats - only for chars < 256                   */
138    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */
139    3, 3, 3,                       /* NOT upto, minupto, exact               */    1+IMM2_SIZE, 1+IMM2_SIZE,      /* NOT upto, minupto                      */
140    1, 1, 1, 3,                    /* NOT *+, ++, ?+, updo+                  */    1+IMM2_SIZE,                   /* NOT exact                              */
141      1, 1, 1, 1+IMM2_SIZE,          /* NOT *+, ++, ?+, upto+                  */
142      1, 1, 1, 1, 1, 1,              /* NOT *I, *?I, +I, +?I, ?I, ??I          */
143      1+IMM2_SIZE, 1+IMM2_SIZE,      /* NOT upto I, minupto I                  */
144      1+IMM2_SIZE,                   /* NOT exact I                            */
145      1, 1, 1, 1+IMM2_SIZE,          /* NOT *+I, ++I, ?+I, upto+I              */
146    /* Positive type repeats                                                 */    /* Positive type repeats                                                 */
147    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */
148    3, 3, 3,                       /* Type upto, minupto, exact              */    1+IMM2_SIZE, 1+IMM2_SIZE,      /* Type upto, minupto                     */
149    1, 1, 1, 3,                    /* Type *+, ++, ?+, upto+                 */    1+IMM2_SIZE,                   /* Type exact                             */
150      1, 1, 1, 1+IMM2_SIZE,          /* Type *+, ++, ?+, upto+                 */
151    /* Character class & ref repeats                                         */    /* Character class & ref repeats                                         */
152    0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */    0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */
153    0, 0,                          /* CRRANGE, CRMINRANGE                    */    0, 0,                          /* CRRANGE, CRMINRANGE                    */
# Line 106  static uschar coptable[] = { Line 155  static uschar coptable[] = {
155    0,                             /* NCLASS                                 */    0,                             /* NCLASS                                 */
156    0,                             /* XCLASS - variable length               */    0,                             /* XCLASS - variable length               */
157    0,                             /* REF                                    */    0,                             /* REF                                    */
158      0,                             /* REFI                                   */
159    0,                             /* RECURSE                                */    0,                             /* RECURSE                                */
160    0,                             /* CALLOUT                                */    0,                             /* CALLOUT                                */
161    0,                             /* Alt                                    */    0,                             /* Alt                                    */
162    0,                             /* Ket                                    */    0,                             /* Ket                                    */
163    0,                             /* KetRmax                                */    0,                             /* KetRmax                                */
164    0,                             /* KetRmin                                */    0,                             /* KetRmin                                */
165      0,                             /* KetRpos                                */
166      0,                             /* Reverse                                */
167    0,                             /* Assert                                 */    0,                             /* Assert                                 */
168    0,                             /* Assert not                             */    0,                             /* Assert not                             */
169    0,                             /* Assert behind                          */    0,                             /* Assert behind                          */
170    0,                             /* Assert behind not                      */    0,                             /* Assert behind not                      */
171      0, 0,                          /* ONCE, ONCE_NC                          */
172      0, 0, 0, 0, 0,                 /* BRA, BRAPOS, CBRA, CBRAPOS, COND       */
173      0, 0, 0, 0, 0,                 /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND  */
174      0, 0,                          /* CREF, NCREF                            */
175      0, 0,                          /* RREF, NRREF                            */
176      0,                             /* DEF                                    */
177      0, 0, 0,                       /* BRAZERO, BRAMINZERO, BRAPOSZERO        */
178      0, 0, 0,                       /* MARK, PRUNE, PRUNE_ARG                 */
179      0, 0, 0, 0,                    /* SKIP, SKIP_ARG, THEN, THEN_ARG         */
180      0, 0, 0, 0,                    /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT    */
181      0, 0                           /* CLOSE, SKIPZERO  */
182    };
183    
184    /* This table identifies those opcodes that inspect a character. It is used to
185    remember the fact that a character could have been inspected when the end of
186    the subject is reached. ***NOTE*** If the start of this table is modified, the
187    two tables that follow must also be modified. */
188    
189    static const pcre_uint8 poptable[] = {
190      0,                             /* End                                    */
191      0, 0, 0, 1, 1,                 /* \A, \G, \K, \B, \b                     */
192      1, 1, 1, 1, 1, 1,              /* \D, \d, \S, \s, \W, \w                 */
193      1, 1, 1,                       /* Any, AllAny, Anybyte                   */
194      1, 1,                          /* \P, \p                                 */
195      1, 1, 1, 1, 1,                 /* \R, \H, \h, \V, \v                     */
196      1,                             /* \X                                     */
197      0, 0, 0, 0, 0, 0,              /* \Z, \z, ^, ^M, $, $M                   */
198      1,                             /* Char                                   */
199      1,                             /* Chari                                  */
200      1,                             /* not                                    */
201      1,                             /* noti                                   */
202      /* Positive single-char repeats                                          */
203      1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
204      1, 1, 1,                       /* upto, minupto, exact                   */
205      1, 1, 1, 1,                    /* *+, ++, ?+, upto+                      */
206      1, 1, 1, 1, 1, 1,              /* *I, *?I, +I, +?I, ?I, ??I              */
207      1, 1, 1,                       /* upto I, minupto I, exact I             */
208      1, 1, 1, 1,                    /* *+I, ++I, ?+I, upto+I                  */
209      /* Negative single-char repeats - only for chars < 256                   */
210      1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */
211      1, 1, 1,                       /* NOT upto, minupto, exact               */
212      1, 1, 1, 1,                    /* NOT *+, ++, ?+, upto+                  */
213      1, 1, 1, 1, 1, 1,              /* NOT *I, *?I, +I, +?I, ?I, ??I          */
214      1, 1, 1,                       /* NOT upto I, minupto I, exact I         */
215      1, 1, 1, 1,                    /* NOT *+I, ++I, ?+I, upto+I              */
216      /* Positive type repeats                                                 */
217      1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */
218      1, 1, 1,                       /* Type upto, minupto, exact              */
219      1, 1, 1, 1,                    /* Type *+, ++, ?+, upto+                 */
220      /* Character class & ref repeats                                         */
221      1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
222      1, 1,                          /* CRRANGE, CRMINRANGE                    */
223      1,                             /* CLASS                                  */
224      1,                             /* NCLASS                                 */
225      1,                             /* XCLASS - variable length               */
226      0,                             /* REF                                    */
227      0,                             /* REFI                                   */
228      0,                             /* RECURSE                                */
229      0,                             /* CALLOUT                                */
230      0,                             /* Alt                                    */
231      0,                             /* Ket                                    */
232      0,                             /* KetRmax                                */
233      0,                             /* KetRmin                                */
234      0,                             /* KetRpos                                */
235    0,                             /* Reverse                                */    0,                             /* Reverse                                */
236    0, 0, 0, 0,                    /* ONCE, BRA, CBRA, COND                  */    0,                             /* Assert                                 */
237    0, 0, 0,                       /* SBRA, SCBRA, SCOND                     */    0,                             /* Assert not                             */
238    0,                             /* CREF                                   */    0,                             /* Assert behind                          */
239    0,                             /* RREF                                   */    0,                             /* Assert behind not                      */
240      0, 0,                          /* ONCE, ONCE_NC                          */
241      0, 0, 0, 0, 0,                 /* BRA, BRAPOS, CBRA, CBRAPOS, COND       */
242      0, 0, 0, 0, 0,                 /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND  */
243      0, 0,                          /* CREF, NCREF                            */
244      0, 0,                          /* RREF, NRREF                            */
245    0,                             /* DEF                                    */    0,                             /* DEF                                    */
246    0, 0                           /* BRAZERO, BRAMINZERO                    */    0, 0, 0,                       /* BRAZERO, BRAMINZERO, BRAPOSZERO        */
247      0, 0, 0,                       /* MARK, PRUNE, PRUNE_ARG                 */
248      0, 0, 0, 0,                    /* SKIP, SKIP_ARG, THEN, THEN_ARG         */
249      0, 0, 0, 0,                    /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT    */
250      0, 0                           /* CLOSE, SKIPZERO                        */
251  };  };
252    
253  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
254  and \w */  and \w */
255    
256  static uschar toptable1[] = {  static const pcre_uint8 toptable1[] = {
257    0, 0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
258    ctype_digit, ctype_digit,    ctype_digit, ctype_digit,
259    ctype_space, ctype_space,    ctype_space, ctype_space,
260    ctype_word,  ctype_word,    ctype_word,  ctype_word,
261    0                               /* OP_ANY */    0, 0                            /* OP_ANY, OP_ALLANY */
262  };  };
263    
264  static uschar toptable2[] = {  static const pcre_uint8 toptable2[] = {
265    0, 0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
266    ctype_digit, 0,    ctype_digit, 0,
267    ctype_space, 0,    ctype_space, 0,
268    ctype_word,  0,    ctype_word,  0,
269    1                               /* OP_ANY */    1, 1                            /* OP_ANY, OP_ALLANY */
270  };  };
271    
272    
# Line 153  these structures in, is a vector of ints Line 278  these structures in, is a vector of ints
278  typedef struct stateblock {  typedef struct stateblock {
279    int offset;                     /* Offset to opcode */    int offset;                     /* Offset to opcode */
280    int count;                      /* Count for repeats */    int count;                      /* Count for repeats */
   int ims;                        /* ims flag bits */  
281    int data;                       /* Some use extra data */    int data;                       /* Some use extra data */
282  } stateblock;  } stateblock;
283    
284  #define INTS_PER_STATEBLOCK  (sizeof(stateblock)/sizeof(int))  #define INTS_PER_STATEBLOCK  (int)(sizeof(stateblock)/sizeof(int))
285    
286    
287  #ifdef DEBUG  #ifdef PCRE_DEBUG
288  /*************************************************  /*************************************************
289  *             Print character string             *  *             Print character string             *
290  *************************************************/  *************************************************/
# Line 176  Returns:       nothing Line 300  Returns:       nothing
300  */  */
301    
302  static void  static void
303  pchars(unsigned char *p, int length, FILE *f)  pchars(const pcre_uchar *p, int length, FILE *f)
304  {  {
305  int c;  int c;
306  while (length-- > 0)  while (length-- > 0)
# Line 209  Arguments: Line 333  Arguments:
333    offsetcount       size of same    offsetcount       size of same
334    workspace         vector of workspace    workspace         vector of workspace
335    wscount           size of same    wscount           size of same
   ims               the current ims flags  
336    rlevel            function call recursion level    rlevel            function call recursion level
   recursing         regex recursive call level  
337    
338  Returns:            > 0 =>  Returns:            > 0 => number of match offset pairs placed in offsets
339                      = 0 =>                      = 0 => offsets overflowed; longest matches are present
340                       -1 => failed to match                       -1 => failed to match
341                     < -1 => some kind of unexpected problem                     < -1 => some kind of unexpected problem
342    
# Line 226  for the current character, one for the f Line 348  for the current character, one for the f
348      { \      { \
349      next_active_state->offset = (x); \      next_active_state->offset = (x); \
350      next_active_state->count  = (y); \      next_active_state->count  = (y); \
     next_active_state->ims    = ims; \  
351      next_active_state++; \      next_active_state++; \
352      DPRINTF(("%.*sADD_ACTIVE(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \      DPRINTF(("%.*sADD_ACTIVE(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \
353      } \      } \
# Line 237  for the current character, one for the f Line 358  for the current character, one for the f
358      { \      { \
359      next_active_state->offset = (x); \      next_active_state->offset = (x); \
360      next_active_state->count  = (y); \      next_active_state->count  = (y); \
     next_active_state->ims    = ims; \  
361      next_active_state->data   = (z); \      next_active_state->data   = (z); \
362      next_active_state++; \      next_active_state++; \
363      DPRINTF(("%.*sADD_ACTIVE_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \      DPRINTF(("%.*sADD_ACTIVE_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \
# Line 249  for the current character, one for the f Line 369  for the current character, one for the f
369      { \      { \
370      next_new_state->offset = (x); \      next_new_state->offset = (x); \
371      next_new_state->count  = (y); \      next_new_state->count  = (y); \
     next_new_state->ims    = ims; \  
372      next_new_state++; \      next_new_state++; \
373      DPRINTF(("%.*sADD_NEW(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \      DPRINTF(("%.*sADD_NEW(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \
374      } \      } \
# Line 260  for the current character, one for the f Line 379  for the current character, one for the f
379      { \      { \
380      next_new_state->offset = (x); \      next_new_state->offset = (x); \
381      next_new_state->count  = (y); \      next_new_state->count  = (y); \
     next_new_state->ims    = ims; \  
382      next_new_state->data   = (z); \      next_new_state->data   = (z); \
383      next_new_state++; \      next_new_state++; \
384      DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \      DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d) line %d\n", rlevel*2-2, SP, \
385          (x), (y), (z), __LINE__)); \
386      } \      } \
387    else return PCRE_ERROR_DFA_WSSIZE    else return PCRE_ERROR_DFA_WSSIZE
388    
# Line 272  for the current character, one for the f Line 391  for the current character, one for the f
391  static int  static int
392  internal_dfa_exec(  internal_dfa_exec(
393    dfa_match_data *md,    dfa_match_data *md,
394    const uschar *this_start_code,    const pcre_uchar *this_start_code,
395    const uschar *current_subject,    const pcre_uchar *current_subject,
396    int start_offset,    int start_offset,
397    int *offsets,    int *offsets,
398    int offsetcount,    int offsetcount,
399    int *workspace,    int *workspace,
400    int wscount,    int wscount,
401    int ims,    int  rlevel)
   int  rlevel,  
   int  recursing)  
402  {  {
403  stateblock *active_states, *new_states, *temp_states;  stateblock *active_states, *new_states, *temp_states;
404  stateblock *next_active_state, *next_new_state;  stateblock *next_active_state, *next_new_state;
405    
406  const uschar *ctypes, *lcc, *fcc;  const pcre_uint8 *ctypes, *lcc, *fcc;
407  const uschar *ptr;  const pcre_uchar *ptr;
408  const uschar *end_code, *first_op;  const pcre_uchar *end_code, *first_op;
409    
410    dfa_recursion_info new_recursive;
411    
412  int active_count, new_count, match_count;  int active_count, new_count, match_count;
413    
414  /* Some fields in the md block are frequently referenced, so we load them into  /* Some fields in the md block are frequently referenced, so we load them into
415  independent variables in the hope that this will perform better. */  independent variables in the hope that this will perform better. */
416    
417  const uschar *start_subject = md->start_subject;  const pcre_uchar *start_subject = md->start_subject;
418  const uschar *end_subject = md->end_subject;  const pcre_uchar *end_subject = md->end_subject;
419  const uschar *start_code = md->start_code;  const pcre_uchar *start_code = md->start_code;
420    
421  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
422  BOOL utf8 = (md->poptions & PCRE_UTF8) != 0;  BOOL utf = (md->poptions & PCRE_UTF8) != 0;
423  #else  #else
424  BOOL utf8 = FALSE;  BOOL utf = FALSE;
425  #endif  #endif
426    
427    BOOL reset_could_continue = FALSE;
428    
429  rlevel++;  rlevel++;
430  offsetcount &= (-2);  offsetcount &= (-2);
431    
# Line 313  wscount = (wscount - (wscount % (INTS_PE Line 434  wscount = (wscount - (wscount % (INTS_PE
434            (2 * INTS_PER_STATEBLOCK);            (2 * INTS_PER_STATEBLOCK);
435    
436  DPRINTF(("\n%.*s---------------------\n"  DPRINTF(("\n%.*s---------------------\n"
437    "%.*sCall to internal_dfa_exec f=%d r=%d\n",    "%.*sCall to internal_dfa_exec f=%d\n",
438    rlevel*2-2, SP, rlevel*2-2, SP, rlevel, recursing));    rlevel*2-2, SP, rlevel*2-2, SP, rlevel));
439    
440  ctypes = md->tables + ctypes_offset;  ctypes = md->tables + ctypes_offset;
441  lcc = md->tables + lcc_offset;  lcc = md->tables + lcc_offset;
# Line 327  next_new_state = new_states = active_sta Line 448  next_new_state = new_states = active_sta
448  new_count = 0;  new_count = 0;
449    
450  first_op = this_start_code + 1 + LINK_SIZE +  first_op = this_start_code + 1 + LINK_SIZE +
451    ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA)? 2:0);    ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
452        *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
453        ? IMM2_SIZE:0);
454    
455  /* The first thing in any (sub) pattern is a bracket of some sort. Push all  /* The first thing in any (sub) pattern is a bracket of some sort. Push all
456  the alternative states onto the list, and find out where the end is. This  the alternative states onto the list, and find out where the end is. This
# Line 355  if (*first_op == OP_REVERSE) Line 478  if (*first_op == OP_REVERSE)
478    /* If we can't go back the amount required for the longest lookbehind    /* If we can't go back the amount required for the longest lookbehind
479    pattern, go back as far as we can; some alternatives may still be viable. */    pattern, go back as far as we can; some alternatives may still be viable. */
480    
481  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
482    /* In character mode we have to step back character by character */    /* In character mode we have to step back character by character */
483    
484    if (utf8)    if (utf)
485      {      {
486      for (gone_back = 0; gone_back < max_back; gone_back++)      for (gone_back = 0; gone_back < max_back; gone_back++)
487        {        {
488        if (current_subject <= start_subject) break;        if (current_subject <= start_subject) break;
489        current_subject--;        current_subject--;
490        while (current_subject > start_subject &&        ACROSSCHAR(current_subject > start_subject, *current_subject, current_subject--);
              (*current_subject & 0xc0) == 0x80)  
         current_subject--;  
491        }        }
492      }      }
493    else    else
# Line 376  if (*first_op == OP_REVERSE) Line 497  if (*first_op == OP_REVERSE)
497    
498      {      {
499      gone_back = (current_subject - max_back < start_subject)?      gone_back = (current_subject - max_back < start_subject)?
500        current_subject - start_subject : max_back;        (int)(current_subject - start_subject) : max_back;
501      current_subject -= gone_back;      current_subject -= gone_back;
502      }      }
503    
504      /* Save the earliest consulted character */
505    
506      if (current_subject < md->start_used_ptr)
507        md->start_used_ptr = current_subject;
508    
509    /* Now we can process the individual branches. */    /* Now we can process the individual branches. */
510    
511    end_code = this_start_code;    end_code = this_start_code;
# Line 388  if (*first_op == OP_REVERSE) Line 514  if (*first_op == OP_REVERSE)
514      int back = GET(end_code, 2+LINK_SIZE);      int back = GET(end_code, 2+LINK_SIZE);
515      if (back <= gone_back)      if (back <= gone_back)
516        {        {
517        int bstate = end_code - start_code + 2 + 2*LINK_SIZE;        int bstate = (int)(end_code - start_code + 2 + 2*LINK_SIZE);
518        ADD_NEW_DATA(-bstate, 0, gone_back - back);        ADD_NEW_DATA(-bstate, 0, gone_back - back);
519        }        }
520      end_code += GET(end_code, 1);      end_code += GET(end_code, 1);
# Line 421  else Line 547  else
547    else    else
548      {      {
549      int length = 1 + LINK_SIZE +      int length = 1 + LINK_SIZE +
550        ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA)? 2:0);        ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
551            *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
552            ? IMM2_SIZE:0);
553      do      do
554        {        {
555        ADD_NEW(end_code - start_code + length, 0);        ADD_NEW((int)(end_code - start_code + length), 0);
556        end_code += GET(end_code, 1);        end_code += GET(end_code, 1);
557        length = 1 + LINK_SIZE;        length = 1 + LINK_SIZE;
558        }        }
# Line 434  else Line 562  else
562    
563  workspace[0] = 0;    /* Bit indicating which vector is current */  workspace[0] = 0;    /* Bit indicating which vector is current */
564    
565  DPRINTF(("%.*sEnd state = %d\n", rlevel*2-2, SP, end_code - start_code));  DPRINTF(("%.*sEnd state = %d\n", rlevel*2-2, SP, (int)(end_code - start_code)));
566    
567  /* Loop for scanning the subject */  /* Loop for scanning the subject */
568    
# Line 444  for (;;) Line 572  for (;;)
572    int i, j;    int i, j;
573    int clen, dlen;    int clen, dlen;
574    unsigned int c, d;    unsigned int c, d;
575      int forced_fail = 0;
576      BOOL partial_newline = FALSE;
577      BOOL could_continue = reset_could_continue;
578      reset_could_continue = FALSE;
579    
580    /* Make the new state list into the active state list and empty the    /* Make the new state list into the active state list and empty the
581    new state list. */    new state list. */
# Line 457  for (;;) Line 589  for (;;)
589    workspace[0] ^= 1;              /* Remember for the restarting feature */    workspace[0] ^= 1;              /* Remember for the restarting feature */
590    workspace[1] = active_count;    workspace[1] = active_count;
591    
592  #ifdef DEBUG  #ifdef PCRE_DEBUG
593    printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP);    printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP);
594    pchars((uschar *)ptr, strlen((char *)ptr), stdout);    pchars(ptr, STRLEN_UC(ptr), stdout);
595    printf("\"\n");    printf("\"\n");
596    
597    printf("%.*sActive states: ", rlevel*2-2, SP);    printf("%.*sActive states: ", rlevel*2-2, SP);
# Line 479  for (;;) Line 611  for (;;)
611    
612    if (ptr < end_subject)    if (ptr < end_subject)
613      {      {
614      clen = 1;        /* Number of bytes in the character */      clen = 1;        /* Number of data items in the character */
615  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
616      if (utf8) { GETCHARLEN(c, ptr, clen); } else      if (utf) { GETCHARLEN(c, ptr, clen); } else
617  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
618      c = *ptr;      c = *ptr;
619      }      }
620    else    else
# Line 499  for (;;) Line 631  for (;;)
631    for (i = 0; i < active_count; i++)    for (i = 0; i < active_count; i++)
632      {      {
633      stateblock *current_state = active_states + i;      stateblock *current_state = active_states + i;
634      const uschar *code;      BOOL caseless = FALSE;
635        const pcre_uchar *code;
636      int state_offset = current_state->offset;      int state_offset = current_state->offset;
637      int count, codevalue;      int count, codevalue, rrc;
 #ifdef SUPPORT_UCP  
     int chartype, script;  
 #endif  
638    
639  #ifdef DEBUG  #ifdef PCRE_DEBUG
640      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
641      if (clen == 0) printf("EOL\n");      if (clen == 0) printf("EOL\n");
642        else if (c > 32 && c < 127) printf("'%c'\n", c);        else if (c > 32 && c < 127) printf("'%c'\n", c);
643          else printf("0x%02x\n", c);          else printf("0x%02x\n", c);
644  #endif  #endif
645    
     /* This variable is referred to implicity in the ADD_xxx macros. */  
   
     ims = current_state->ims;  
   
646      /* A negative offset is a special case meaning "hold off going to this      /* A negative offset is a special case meaning "hold off going to this
647      (negated) state until the number of characters in the data field have      (negated) state until the number of characters in the data field have
648      been skipped". */      been skipped". If the could_continue flag was passed over from a previous
649        state, arrange for it to passed on. */
650    
651      if (state_offset < 0)      if (state_offset < 0)
652        {        {
# Line 528  for (;;) Line 655  for (;;)
655          DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP));          DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP));
656          ADD_NEW_DATA(state_offset, current_state->count,          ADD_NEW_DATA(state_offset, current_state->count,
657            current_state->data - 1);            current_state->data - 1);
658            if (could_continue) reset_could_continue = TRUE;
659          continue;          continue;
660          }          }
661        else        else
# Line 536  for (;;) Line 664  for (;;)
664          }          }
665        }        }
666    
667      /* Check for a duplicate state with the same count, and skip if found. */      /* Check for a duplicate state with the same count, and skip if found.
668        See the note at the head of this module about the possibility of improving
669        performance here. */
670    
671      for (j = 0; j < i; j++)      for (j = 0; j < i; j++)
672        {        {
# Line 553  for (;;) Line 683  for (;;)
683      code = start_code + state_offset;      code = start_code + state_offset;
684      codevalue = *code;      codevalue = *code;
685    
686        /* If this opcode inspects a character, but we are at the end of the
687        subject, remember the fact for use when testing for a partial match. */
688    
689        if (clen == 0 && poptable[codevalue] != 0)
690          could_continue = TRUE;
691    
692      /* If this opcode is followed by an inline character, load it. It is      /* If this opcode is followed by an inline character, load it. It is
693      tempting to test for the presence of a subject character here, but that      tempting to test for the presence of a subject character here, but that
694      is wrong, because sometimes zero repetitions of the subject are      is wrong, because sometimes zero repetitions of the subject are
695      permitted.      permitted.
696    
697      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
698      argument that is not a data character - but is always one byte long.      argument that is not a data character - but is always one byte long because
699      Unfortunately, we have to take special action to deal with  \P, \p, and      the values are small. We have to take special action to deal with  \P, \p,
700      \X in this case. To keep the other cases fast, convert these ones to new      \H, \h, \V, \v and \X in this case. To keep the other cases fast, convert
701      opcodes. */      these ones to new opcodes. */
702    
703      if (coptable[codevalue] > 0)      if (coptable[codevalue] > 0)
704        {        {
705        dlen = 1;        dlen = 1;
706  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
707        if (utf8) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else        if (utf) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else
708  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
709        d = code[coptable[codevalue]];        d = code[coptable[codevalue]];
710        if (codevalue >= OP_TYPESTAR)        if (codevalue >= OP_TYPESTAR)
711          {          {
# Line 580  for (;;) Line 716  for (;;)
716            case OP_PROP: codevalue += OP_PROP_EXTRA; break;            case OP_PROP: codevalue += OP_PROP_EXTRA; break;
717            case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;            case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;
718            case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;            case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;
719              case OP_NOT_HSPACE:
720              case OP_HSPACE: codevalue += OP_HSPACE_EXTRA; break;
721              case OP_NOT_VSPACE:
722              case OP_VSPACE: codevalue += OP_VSPACE_EXTRA; break;
723            default: break;            default: break;
724            }            }
725          }          }
# Line 595  for (;;) Line 735  for (;;)
735    
736      switch (codevalue)      switch (codevalue)
737        {        {
738    /* ========================================================================== */
739          /* These cases are never obeyed. This is a fudge that causes a compile-
740          time error if the vectors coptable or poptable, which are indexed by
741          opcode, are not the correct length. It seems to be the only way to do
742          such a check at compile time, as the sizeof() operator does not work
743          in the C preprocessor. */
744    
745          case OP_TABLE_LENGTH:
746          case OP_TABLE_LENGTH +
747            ((sizeof(coptable) == OP_TABLE_LENGTH) &&
748             (sizeof(poptable) == OP_TABLE_LENGTH)):
749          break;
750    
751  /* ========================================================================== */  /* ========================================================================== */
752        /* Reached a closing bracket. If not at the end of the pattern, carry        /* Reached a closing bracket. If not at the end of the pattern, carry
753        on with the next opcode. Otherwise, unless we have an empty string and        on with the next opcode. For repeating opcodes, also add the repeat
754        PCRE_NOTEMPTY is set, save the match data, shifting up all previous        state. Note that KETRPOS will always be encountered at the end of the
755          subpattern, because the possessive subpattern repeats are always handled
756          using recursive calls. Thus, it never adds any new states.
757    
758          At the end of the (sub)pattern, unless we have an empty string and
759          PCRE_NOTEMPTY is set, or PCRE_NOTEMPTY_ATSTART is set and we are at the
760          start of the subject, save the match data, shifting up all previous
761        matches so we always have the longest first. */        matches so we always have the longest first. */
762    
763        case OP_KET:        case OP_KET:
764        case OP_KETRMIN:        case OP_KETRMIN:
765        case OP_KETRMAX:        case OP_KETRMAX:
766          case OP_KETRPOS:
767        if (code != end_code)        if (code != end_code)
768          {          {
769          ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0);          ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0);
# Line 613  for (;;) Line 772  for (;;)
772            ADD_ACTIVE(state_offset - GET(code, 1), 0);            ADD_ACTIVE(state_offset - GET(code, 1), 0);
773            }            }
774          }          }
775        else if (ptr > current_subject || (md->moptions & PCRE_NOTEMPTY) == 0)        else
776          {          {
777          if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;          if (ptr > current_subject ||
778            else if (match_count > 0 && ++match_count * 2 >= offsetcount)              ((md->moptions & PCRE_NOTEMPTY) == 0 &&
779              match_count = 0;                ((md->moptions & PCRE_NOTEMPTY_ATSTART) == 0 ||
780          count = ((match_count == 0)? offsetcount : match_count * 2) - 2;                  current_subject > start_subject + md->start_offset)))
781          if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int));            {
782          if (offsetcount >= 2)            if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;
783            {              else if (match_count > 0 && ++match_count * 2 > offsetcount)
784            offsets[0] = current_subject - start_subject;                match_count = 0;
785            offsets[1] = ptr - start_subject;            count = ((match_count == 0)? offsetcount : match_count * 2) - 2;
786            DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,            if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int));
787              offsets[1] - offsets[0], current_subject));            if (offsetcount >= 2)
788            }              {
789          if ((md->moptions & PCRE_DFA_SHORTEST) != 0)              offsets[0] = (int)(current_subject - start_subject);
790            {              offsets[1] = (int)(ptr - start_subject);
791            DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"              DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,
792              "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel,                offsets[1] - offsets[0], (char *)current_subject));
793              match_count, rlevel*2-2, SP));              }
794            return match_count;            if ((md->moptions & PCRE_DFA_SHORTEST) != 0)
795                {
796                DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
797                  "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel,
798                  match_count, rlevel*2-2, SP));
799                return match_count;
800                }
801            }            }
802          }          }
803        break;        break;
# Line 644  for (;;) Line 809  for (;;)
809        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
810        case OP_ALT:        case OP_ALT:
811        do { code += GET(code, 1); } while (*code == OP_ALT);        do { code += GET(code, 1); } while (*code == OP_ALT);
812        ADD_ACTIVE(code - start_code, 0);        ADD_ACTIVE((int)(code - start_code), 0);
813        break;        break;
814    
815        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
# Line 652  for (;;) Line 817  for (;;)
817        case OP_SBRA:        case OP_SBRA:
818        do        do
819          {          {
820          ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);          ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
821          code += GET(code, 1);          code += GET(code, 1);
822          }          }
823        while (*code == OP_ALT);        while (*code == OP_ALT);
# Line 661  for (;;) Line 826  for (;;)
826        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
827        case OP_CBRA:        case OP_CBRA:
828        case OP_SCBRA:        case OP_SCBRA:
829        ADD_ACTIVE(code - start_code + 3 + LINK_SIZE,  0);        ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE + IMM2_SIZE),  0);
830        code += GET(code, 1);        code += GET(code, 1);
831        while (*code == OP_ALT)        while (*code == OP_ALT)
832          {          {
833          ADD_ACTIVE(code - start_code + 1 + LINK_SIZE,  0);          ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE),  0);
834          code += GET(code, 1);          code += GET(code, 1);
835          }          }
836        break;        break;
# Line 676  for (;;) Line 841  for (;;)
841        ADD_ACTIVE(state_offset + 1, 0);        ADD_ACTIVE(state_offset + 1, 0);
842        code += 1 + GET(code, 2);        code += 1 + GET(code, 2);
843        while (*code == OP_ALT) code += GET(code, 1);        while (*code == OP_ALT) code += GET(code, 1);
844        ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);        ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
845          break;
846    
847          /*-----------------------------------------------------------------*/
848          case OP_SKIPZERO:
849          code += 1 + GET(code, 2);
850          while (*code == OP_ALT) code += GET(code, 1);
851          ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
852        break;        break;
853    
854        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
855        case OP_CIRC:        case OP_CIRC:
856        if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||        if (ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0)
           ((ims & PCRE_MULTILINE) != 0 &&  
             ptr != end_subject &&  
             WAS_NEWLINE(ptr)))  
857          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
858        break;        break;
859    
860        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
861        case OP_EOD:        case OP_CIRCM:
862        if (ptr >= end_subject) { ADD_ACTIVE(state_offset + 1, 0); }        if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||
863              (ptr != end_subject && WAS_NEWLINE(ptr)))
864            { ADD_ACTIVE(state_offset + 1, 0); }
865        break;        break;
866    
867        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
868        case OP_OPT:        case OP_EOD:
869        ims = code[1];        if (ptr >= end_subject)
870        ADD_ACTIVE(state_offset + 2, 0);          {
871            if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
872              could_continue = TRUE;
873            else { ADD_ACTIVE(state_offset + 1, 0); }
874            }
875        break;        break;
876    
877        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
# Line 718  for (;;) Line 893  for (;;)
893    
894        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
895        case OP_ANY:        case OP_ANY:
896        if (clen > 0 && ((ims & PCRE_DOTALL) != 0 || !IS_NEWLINE(ptr)))        if (clen > 0 && !IS_NEWLINE(ptr))
897            {
898            if (ptr + 1 >= md->end_subject &&
899                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
900                NLBLOCK->nltype == NLTYPE_FIXED &&
901                NLBLOCK->nllen == 2 &&
902                c == NLBLOCK->nl[0])
903              {
904              could_continue = partial_newline = TRUE;
905              }
906            else
907              {
908              ADD_NEW(state_offset + 1, 0);
909              }
910            }
911          break;
912    
913          /*-----------------------------------------------------------------*/
914          case OP_ALLANY:
915          if (clen > 0)
916          { ADD_NEW(state_offset + 1, 0); }          { ADD_NEW(state_offset + 1, 0); }
917        break;        break;
918    
919        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
920        case OP_EODN:        case OP_EODN:
921        if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - md->nllen))        if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
922            could_continue = TRUE;
923          else if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - md->nllen))
924          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
925        break;        break;
926    
# Line 732  for (;;) Line 928  for (;;)
928        case OP_DOLL:        case OP_DOLL:
929        if ((md->moptions & PCRE_NOTEOL) == 0)        if ((md->moptions & PCRE_NOTEOL) == 0)
930          {          {
931          if (clen == 0 ||          if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
932              (IS_NEWLINE(ptr) &&            could_continue = TRUE;
933                 ((ims & PCRE_MULTILINE) != 0 || ptr == end_subject - md->nllen)          else if (clen == 0 ||
934                ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr) &&
935                   (ptr == end_subject - md->nllen)
936              ))              ))
937            { ADD_ACTIVE(state_offset + 1, 0); }            { ADD_ACTIVE(state_offset + 1, 0); }
938            else if (ptr + 1 >= md->end_subject &&
939                     (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
940                     NLBLOCK->nltype == NLTYPE_FIXED &&
941                     NLBLOCK->nllen == 2 &&
942                     c == NLBLOCK->nl[0])
943              {
944              if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
945                {
946                reset_could_continue = TRUE;
947                ADD_NEW_DATA(-(state_offset + 1), 0, 1);
948                }
949              else could_continue = partial_newline = TRUE;
950              }
951            }
952          break;
953    
954          /*-----------------------------------------------------------------*/
955          case OP_DOLLM:
956          if ((md->moptions & PCRE_NOTEOL) == 0)
957            {
958            if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
959              could_continue = TRUE;
960            else if (clen == 0 ||
961                ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr)))
962              { ADD_ACTIVE(state_offset + 1, 0); }
963            else if (ptr + 1 >= md->end_subject &&
964                     (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
965                     NLBLOCK->nltype == NLTYPE_FIXED &&
966                     NLBLOCK->nllen == 2 &&
967                     c == NLBLOCK->nl[0])
968              {
969              if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
970                {
971                reset_could_continue = TRUE;
972                ADD_NEW_DATA(-(state_offset + 1), 0, 1);
973                }
974              else could_continue = partial_newline = TRUE;
975              }
976          }          }
977        else if ((ims & PCRE_MULTILINE) != 0 && IS_NEWLINE(ptr))        else if (IS_NEWLINE(ptr))
978          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
979        break;        break;
980    
# Line 769  for (;;) Line 1005  for (;;)
1005    
1006          if (ptr > start_subject)          if (ptr > start_subject)
1007            {            {
1008            const uschar *temp = ptr - 1;            const pcre_uchar *temp = ptr - 1;
1009  #ifdef SUPPORT_UTF8            if (temp < md->start_used_ptr) md->start_used_ptr = temp;
1010            if (utf8) BACKCHAR(temp);  #ifdef SUPPORT_UTF
1011              if (utf) { BACKCHAR(temp); }
1012  #endif  #endif
1013            GETCHARTEST(d, temp);            GETCHARTEST(d, temp);
1014    #ifdef SUPPORT_UCP
1015              if ((md->poptions & PCRE_UCP) != 0)
1016                {
1017                if (d == '_') left_word = TRUE; else
1018                  {
1019                  int cat = UCD_CATEGORY(d);
1020                  left_word = (cat == ucp_L || cat == ucp_N);
1021                  }
1022                }
1023              else
1024    #endif
1025            left_word = d < 256 && (ctypes[d] & ctype_word) != 0;            left_word = d < 256 && (ctypes[d] & ctype_word) != 0;
1026            }            }
1027          else left_word = 0;          else left_word = FALSE;
1028    
1029          if (clen > 0) right_word = c < 256 && (ctypes[c] & ctype_word) != 0;          if (clen > 0)
1030            else right_word = 0;            {
1031    #ifdef SUPPORT_UCP
1032              if ((md->poptions & PCRE_UCP) != 0)
1033                {
1034                if (c == '_') right_word = TRUE; else
1035                  {
1036                  int cat = UCD_CATEGORY(c);
1037                  right_word = (cat == ucp_L || cat == ucp_N);
1038                  }
1039                }
1040              else
1041    #endif
1042              right_word = c < 256 && (ctypes[c] & ctype_word) != 0;
1043              }
1044            else right_word = FALSE;
1045    
1046          if ((left_word == right_word) == (codevalue == OP_NOT_WORD_BOUNDARY))          if ((left_word == right_word) == (codevalue == OP_NOT_WORD_BOUNDARY))
1047            { ADD_ACTIVE(state_offset + 1, 0); }            { ADD_ACTIVE(state_offset + 1, 0); }
# Line 798  for (;;) Line 1060  for (;;)
1060        if (clen > 0)        if (clen > 0)
1061          {          {
1062          BOOL OK;          BOOL OK;
1063          int category = _pcre_ucp_findprop(c, &chartype, &script);          const ucd_record * prop = GET_UCD(c);
1064          switch(code[1])          switch(code[1])
1065            {            {
1066            case PT_ANY:            case PT_ANY:
# Line 806  for (;;) Line 1068  for (;;)
1068            break;            break;
1069    
1070            case PT_LAMP:            case PT_LAMP:
1071            OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
1072                   prop->chartype == ucp_Lt;
1073            break;            break;
1074    
1075            case PT_GC:            case PT_GC:
1076            OK = category == code[2];            OK = PRIV(ucp_gentype)[prop->chartype] == code[2];
1077            break;            break;
1078    
1079            case PT_PC:            case PT_PC:
1080            OK = chartype == code[2];            OK = prop->chartype == code[2];
1081            break;            break;
1082    
1083            case PT_SC:            case PT_SC:
1084            OK = script == code[2];            OK = prop->script == code[2];
1085              break;
1086    
1087              /* These are specials for combination cases. */
1088    
1089              case PT_ALNUM:
1090              OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1091                   PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1092              break;
1093    
1094              case PT_SPACE:    /* Perl space */
1095              OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1096                   c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
1097              break;
1098    
1099              case PT_PXSPACE:  /* POSIX space */
1100              OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1101                   c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
1102                   c == CHAR_FF || c == CHAR_CR;
1103              break;
1104    
1105              case PT_WORD:
1106              OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1107                   PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1108                   c == CHAR_UNDERSCORE;
1109            break;            break;
1110    
1111            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
# Line 838  for (;;) Line 1125  for (;;)
1125  /* ========================================================================== */  /* ========================================================================== */
1126        /* These opcodes likewise inspect the subject character, but have an        /* These opcodes likewise inspect the subject character, but have an
1127        argument that is not a data character. It is one of these opcodes:        argument that is not a data character. It is one of these opcodes:
1128        OP_ANY, OP_DIGIT, OP_NOT_DIGIT, OP_WHITESPACE, OP_NOT_SPACE, OP_WORDCHAR,        OP_ANY, OP_ALLANY, OP_DIGIT, OP_NOT_DIGIT, OP_WHITESPACE, OP_NOT_SPACE,
1129        OP_NOT_WORDCHAR. The value is loaded into d. */        OP_WORDCHAR, OP_NOT_WORDCHAR. The value is loaded into d. */
1130    
1131        case OP_TYPEPLUS:        case OP_TYPEPLUS:
1132        case OP_TYPEMINPLUS:        case OP_TYPEMINPLUS:
# Line 848  for (;;) Line 1135  for (;;)
1135        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1136        if (clen > 0)        if (clen > 0)
1137          {          {
1138          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1139                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1140                NLBLOCK->nltype == NLTYPE_FIXED &&
1141                NLBLOCK->nllen == 2 &&
1142                c == NLBLOCK->nl[0])
1143              {
1144              could_continue = partial_newline = TRUE;
1145              }
1146            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1147              (c < 256 &&              (c < 256 &&
1148                (d != OP_ANY ||                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
                (ims & PCRE_DOTALL) != 0 ||  
                !IS_NEWLINE(ptr)  
               ) &&  
1149                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
1150            {            {
1151            if (count > 0 && codevalue == OP_TYPEPOSPLUS)            if (count > 0 && codevalue == OP_TYPEPOSPLUS)
# Line 874  for (;;) Line 1166  for (;;)
1166        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
1167        if (clen > 0)        if (clen > 0)
1168          {          {
1169          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1170                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1171                NLBLOCK->nltype == NLTYPE_FIXED &&
1172                NLBLOCK->nllen == 2 &&
1173                c == NLBLOCK->nl[0])
1174              {
1175              could_continue = partial_newline = TRUE;
1176              }
1177            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1178              (c < 256 &&              (c < 256 &&
1179                (d != OP_ANY ||                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
                (ims & PCRE_DOTALL) != 0 ||  
                !IS_NEWLINE(ptr)  
               ) &&  
1180                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
1181            {            {
1182            if (codevalue == OP_TYPEPOSQUERY)            if (codevalue == OP_TYPEPOSQUERY)
# Line 899  for (;;) Line 1196  for (;;)
1196        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
1197        if (clen > 0)        if (clen > 0)
1198          {          {
1199          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1200                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1201                NLBLOCK->nltype == NLTYPE_FIXED &&
1202                NLBLOCK->nllen == 2 &&
1203                c == NLBLOCK->nl[0])
1204              {
1205              could_continue = partial_newline = TRUE;
1206              }
1207            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1208              (c < 256 &&              (c < 256 &&
1209                (d != OP_ANY ||                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
                (ims & PCRE_DOTALL) != 0 ||  
                !IS_NEWLINE(ptr)  
               ) &&  
1210                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
1211            {            {
1212            if (codevalue == OP_TYPEPOSSTAR)            if (codevalue == OP_TYPEPOSSTAR)
# Line 922  for (;;) Line 1224  for (;;)
1224        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1225        if (clen > 0)        if (clen > 0)
1226          {          {
1227          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1228                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1229                NLBLOCK->nltype == NLTYPE_FIXED &&
1230                NLBLOCK->nllen == 2 &&
1231                c == NLBLOCK->nl[0])
1232              {
1233              could_continue = partial_newline = TRUE;
1234              }
1235            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1236              (c < 256 &&              (c < 256 &&
1237                (d != OP_ANY ||                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
                (ims & PCRE_DOTALL) != 0 ||  
                !IS_NEWLINE(ptr)  
               ) &&  
1238                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
1239            {            {
1240            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1241              { ADD_NEW(state_offset + 4, 0); }              { ADD_NEW(state_offset + 1 + IMM2_SIZE + 1, 0); }
1242            else            else
1243              { ADD_NEW(state_offset, count); }              { ADD_NEW(state_offset, count); }
1244            }            }
# Line 942  for (;;) Line 1249  for (;;)
1249        case OP_TYPEUPTO:        case OP_TYPEUPTO:
1250        case OP_TYPEMINUPTO:        case OP_TYPEMINUPTO:
1251        case OP_TYPEPOSUPTO:        case OP_TYPEPOSUPTO:
1252        ADD_ACTIVE(state_offset + 4, 0);        ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0);
1253        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1254        if (clen > 0)        if (clen > 0)
1255          {          {
1256          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1257                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1258                NLBLOCK->nltype == NLTYPE_FIXED &&
1259                NLBLOCK->nllen == 2 &&
1260                c == NLBLOCK->nl[0])
1261              {
1262              could_continue = partial_newline = TRUE;
1263              }
1264            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1265              (c < 256 &&              (c < 256 &&
1266                (d != OP_ANY ||                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
                (ims & PCRE_DOTALL) != 0 ||  
                !IS_NEWLINE(ptr)  
               ) &&  
1267                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
1268            {            {
1269            if (codevalue == OP_TYPEPOSUPTO)            if (codevalue == OP_TYPEPOSUPTO)
# Line 960  for (;;) Line 1272  for (;;)
1272              next_active_state--;              next_active_state--;
1273              }              }
1274            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1275              { ADD_NEW(state_offset + 4, 0); }              { ADD_NEW(state_offset + 2 + IMM2_SIZE, 0); }
1276            else            else
1277              { ADD_NEW(state_offset, count); }              { ADD_NEW(state_offset, count); }
1278            }            }
# Line 982  for (;;) Line 1294  for (;;)
1294        if (clen > 0)        if (clen > 0)
1295          {          {
1296          BOOL OK;          BOOL OK;
1297          int category = _pcre_ucp_findprop(c, &chartype, &script);          const ucd_record * prop = GET_UCD(c);
1298          switch(code[2])          switch(code[2])
1299            {            {
1300            case PT_ANY:            case PT_ANY:
# Line 990  for (;;) Line 1302  for (;;)
1302            break;            break;
1303    
1304            case PT_LAMP:            case PT_LAMP:
1305            OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
1306                prop->chartype == ucp_Lt;
1307            break;            break;
1308    
1309            case PT_GC:            case PT_GC:
1310            OK = category == code[3];            OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
1311            break;            break;
1312    
1313            case PT_PC:            case PT_PC:
1314            OK = chartype == code[3];            OK = prop->chartype == code[3];
1315            break;            break;
1316    
1317            case PT_SC:            case PT_SC:
1318            OK = script == code[3];            OK = prop->script == code[3];
1319              break;
1320    
1321              /* These are specials for combination cases. */
1322    
1323              case PT_ALNUM:
1324              OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1325                   PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1326              break;
1327    
1328              case PT_SPACE:    /* Perl space */
1329              OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1330                   c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
1331              break;
1332    
1333              case PT_PXSPACE:  /* POSIX space */
1334              OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1335                   c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
1336                   c == CHAR_FF || c == CHAR_CR;
1337              break;
1338    
1339              case PT_WORD:
1340              OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1341                   PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1342                   c == CHAR_UNDERSCORE;
1343            break;            break;
1344    
1345            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
# Line 1031  for (;;) Line 1368  for (;;)
1368        case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:        case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
1369        count = current_state->count;  /* Already matched */        count = current_state->count;  /* Already matched */
1370        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1371        if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)        if (clen > 0)
1372          {          {
1373          const uschar *nptr = ptr + clen;          int lgb, rgb;
1374            const pcre_uchar *nptr = ptr + clen;
1375          int ncount = 0;          int ncount = 0;
1376          if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)          if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
1377            {            {
1378            active_count--;           /* Remove non-match possibility */            active_count--;           /* Remove non-match possibility */
1379            next_active_state--;            next_active_state--;
1380            }            }
1381            lgb = UCD_GRAPHBREAK(c);
1382          while (nptr < end_subject)          while (nptr < end_subject)
1383            {            {
1384            int nd;            dlen = 1;
1385            int ndlen = 1;            if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
1386            GETCHARLEN(nd, nptr, ndlen);            rgb = UCD_GRAPHBREAK(d);
1387            if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;            if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
1388            ncount++;            ncount++;
1389            nptr += ndlen;            lgb = rgb;
1390              nptr += dlen;
1391            }            }
1392          count++;          count++;
1393          ADD_NEW_DATA(-state_offset, count, ncount);          ADD_NEW_DATA(-state_offset, count, ncount);
# Line 1066  for (;;) Line 1406  for (;;)
1406          int ncount = 0;          int ncount = 0;
1407          switch (c)          switch (c)
1408            {            {
1409            case 0x000d:            case CHAR_VT:
1410            if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;            case CHAR_FF:
1411            /* Fall through */            case CHAR_NEL:
1412            case 0x000a:  #ifndef EBCDIC
           case 0x000b:  
           case 0x000c:  
           case 0x0085:  
1413            case 0x2028:            case 0x2028:
1414            case 0x2029:            case 0x2029:
1415    #endif  /* Not EBCDIC */
1416              if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1417              goto ANYNL01;
1418    
1419              case CHAR_CR:
1420              if (ptr + 1 < end_subject && ptr[1] == CHAR_LF) ncount = 1;
1421              /* Fall through */
1422    
1423              ANYNL01:
1424              case CHAR_LF:
1425            if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)            if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
1426              {              {
1427              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
# Line 1083  for (;;) Line 1430  for (;;)
1430            count++;            count++;
1431            ADD_NEW_DATA(-state_offset, count, ncount);            ADD_NEW_DATA(-state_offset, count, ncount);
1432            break;            break;
1433    
1434              default:
1435              break;
1436              }
1437            }
1438          break;
1439    
1440          /*-----------------------------------------------------------------*/
1441          case OP_VSPACE_EXTRA + OP_TYPEPLUS:
1442          case OP_VSPACE_EXTRA + OP_TYPEMINPLUS:
1443          case OP_VSPACE_EXTRA + OP_TYPEPOSPLUS:
1444          count = current_state->count;  /* Already matched */
1445          if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1446          if (clen > 0)
1447            {
1448            BOOL OK;
1449            switch (c)
1450              {
1451              VSPACE_CASES:
1452              OK = TRUE;
1453              break;
1454    
1455              default:
1456              OK = FALSE;
1457              break;
1458              }
1459    
1460            if (OK == (d == OP_VSPACE))
1461              {
1462              if (count > 0 && codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSPLUS)
1463                {
1464                active_count--;           /* Remove non-match possibility */
1465                next_active_state--;
1466                }
1467              count++;
1468              ADD_NEW_DATA(-state_offset, count, 0);
1469              }
1470            }
1471          break;
1472    
1473          /*-----------------------------------------------------------------*/
1474          case OP_HSPACE_EXTRA + OP_TYPEPLUS:
1475          case OP_HSPACE_EXTRA + OP_TYPEMINPLUS:
1476          case OP_HSPACE_EXTRA + OP_TYPEPOSPLUS:
1477          count = current_state->count;  /* Already matched */
1478          if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1479          if (clen > 0)
1480            {
1481            BOOL OK;
1482            switch (c)
1483              {
1484              HSPACE_CASES:
1485              OK = TRUE;
1486              break;
1487    
1488            default:            default:
1489              OK = FALSE;
1490            break;            break;
1491            }            }
1492    
1493            if (OK == (d == OP_HSPACE))
1494              {
1495              if (count > 0 && codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSPLUS)
1496                {
1497                active_count--;           /* Remove non-match possibility */
1498                next_active_state--;
1499                }
1500              count++;
1501              ADD_NEW_DATA(-state_offset, count, 0);
1502              }
1503          }          }
1504        break;        break;
1505    
# Line 1108  for (;;) Line 1522  for (;;)
1522        if (clen > 0)        if (clen > 0)
1523          {          {
1524          BOOL OK;          BOOL OK;
1525          int category = _pcre_ucp_findprop(c, &chartype, &script);          const ucd_record * prop = GET_UCD(c);
1526          switch(code[2])          switch(code[2])
1527            {            {
1528            case PT_ANY:            case PT_ANY:
# Line 1116  for (;;) Line 1530  for (;;)
1530            break;            break;
1531    
1532            case PT_LAMP:            case PT_LAMP:
1533            OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
1534                prop->chartype == ucp_Lt;
1535            break;            break;
1536    
1537            case PT_GC:            case PT_GC:
1538            OK = category == code[3];            OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
1539            break;            break;
1540    
1541            case PT_PC:            case PT_PC:
1542            OK = chartype == code[3];            OK = prop->chartype == code[3];
1543            break;            break;
1544    
1545            case PT_SC:            case PT_SC:
1546            OK = script == code[3];            OK = prop->script == code[3];
1547              break;
1548    
1549              /* These are specials for combination cases. */
1550    
1551              case PT_ALNUM:
1552              OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1553                   PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1554              break;
1555    
1556              case PT_SPACE:    /* Perl space */
1557              OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1558                   c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
1559              break;
1560    
1561              case PT_PXSPACE:  /* POSIX space */
1562              OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1563                   c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
1564                   c == CHAR_FF || c == CHAR_CR;
1565              break;
1566    
1567              case PT_WORD:
1568              OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1569                   PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1570                   c == CHAR_UNDERSCORE;
1571            break;            break;
1572    
1573            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
# Line 1166  for (;;) Line 1605  for (;;)
1605        QS2:        QS2:
1606    
1607        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
1608        if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)        if (clen > 0)
1609          {          {
1610          const uschar *nptr = ptr + clen;          int lgb, rgb;
1611            const pcre_uchar *nptr = ptr + clen;
1612          int ncount = 0;          int ncount = 0;
1613          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
1614              codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)              codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)
# Line 1176  for (;;) Line 1616  for (;;)
1616            active_count--;           /* Remove non-match possibility */            active_count--;           /* Remove non-match possibility */
1617            next_active_state--;            next_active_state--;
1618            }            }
1619            lgb = UCD_GRAPHBREAK(c);
1620          while (nptr < end_subject)          while (nptr < end_subject)
1621            {            {
1622            int nd;            dlen = 1;
1623            int ndlen = 1;            if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
1624            GETCHARLEN(nd, nptr, ndlen);            rgb = UCD_GRAPHBREAK(d);
1625            if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;            if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
1626            ncount++;            ncount++;
1627            nptr += ndlen;            lgb = rgb;
1628              nptr += dlen;
1629            }            }
1630          ADD_NEW_DATA(-(state_offset + count), 0, ncount);          ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1631          }          }
# Line 1209  for (;;) Line 1651  for (;;)
1651          int ncount = 0;          int ncount = 0;
1652          switch (c)          switch (c)
1653            {            {
1654            case 0x000d:            case CHAR_VT:
1655            if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;            case CHAR_FF:
1656            /* Fall through */            case CHAR_NEL:
1657            case 0x000a:  #ifndef EBCDIC
           case 0x000b:  
           case 0x000c:  
           case 0x0085:  
1658            case 0x2028:            case 0x2028:
1659            case 0x2029:            case 0x2029:
1660    #endif  /* Not EBCDIC */
1661              if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1662              goto ANYNL02;
1663    
1664              case CHAR_CR:
1665              if (ptr + 1 < end_subject && ptr[1] == CHAR_LF) ncount = 1;
1666              /* Fall through */
1667    
1668              ANYNL02:
1669              case CHAR_LF:
1670            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||
1671                codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)                codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
1672              {              {
# Line 1226  for (;;) Line 1675  for (;;)
1675              }              }
1676            ADD_NEW_DATA(-(state_offset + count), 0, ncount);            ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1677            break;            break;
1678    
1679            default:            default:
1680            break;            break;
1681            }            }
# Line 1233  for (;;) Line 1683  for (;;)
1683        break;        break;
1684    
1685        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1686  #ifdef SUPPORT_UCP        case OP_VSPACE_EXTRA + OP_TYPEQUERY:
1687        case OP_PROP_EXTRA + OP_TYPEEXACT:        case OP_VSPACE_EXTRA + OP_TYPEMINQUERY:
1688        case OP_PROP_EXTRA + OP_TYPEUPTO:        case OP_VSPACE_EXTRA + OP_TYPEPOSQUERY:
1689        case OP_PROP_EXTRA + OP_TYPEMINUPTO:        count = 2;
1690        case OP_PROP_EXTRA + OP_TYPEPOSUPTO:        goto QS4;
1691        if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)  
1692          { ADD_ACTIVE(state_offset + 6, 0); }        case OP_VSPACE_EXTRA + OP_TYPESTAR:
1693        count = current_state->count;  /* Number already matched */        case OP_VSPACE_EXTRA + OP_TYPEMINSTAR:
1694          case OP_VSPACE_EXTRA + OP_TYPEPOSSTAR:
1695          count = 0;
1696    
1697          QS4:
1698          ADD_ACTIVE(state_offset + 2, 0);
1699        if (clen > 0)        if (clen > 0)
1700          {          {
1701          BOOL OK;          BOOL OK;
1702          int category = _pcre_ucp_findprop(c, &chartype, &script);          switch (c)
         switch(code[4])  
1703            {            {
1704            case PT_ANY:            VSPACE_CASES:
1705            OK = TRUE;            OK = TRUE;
1706            break;            break;
1707    
1708            case PT_LAMP:            default:
1709            OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;            OK = FALSE;
1710            break;            break;
1711              }
1712            if (OK == (d == OP_VSPACE))
1713              {
1714              if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSSTAR ||
1715                  codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSQUERY)
1716                {
1717                active_count--;           /* Remove non-match possibility */
1718                next_active_state--;
1719                }
1720              ADD_NEW_DATA(-(state_offset + count), 0, 0);
1721              }
1722            }
1723          break;
1724    
1725            case PT_GC:        /*-----------------------------------------------------------------*/
1726            OK = category == code[5];        case OP_HSPACE_EXTRA + OP_TYPEQUERY:
1727            break;        case OP_HSPACE_EXTRA + OP_TYPEMINQUERY:
1728          case OP_HSPACE_EXTRA + OP_TYPEPOSQUERY:
1729          count = 2;
1730          goto QS5;
1731    
1732            case PT_PC:        case OP_HSPACE_EXTRA + OP_TYPESTAR:
1733            OK = chartype == code[5];        case OP_HSPACE_EXTRA + OP_TYPEMINSTAR:
1734            break;        case OP_HSPACE_EXTRA + OP_TYPEPOSSTAR:
1735          count = 0;
1736    
1737            case PT_SC:        QS5:
1738            OK = script == code[5];        ADD_ACTIVE(state_offset + 2, 0);
1739          if (clen > 0)
1740            {
1741            BOOL OK;
1742            switch (c)
1743              {
1744              HSPACE_CASES:
1745              OK = TRUE;
1746            break;            break;
1747    
           /* Should never occur, but keep compilers from grumbling. */  
   
1748            default:            default:
1749            OK = codevalue != OP_PROP;            OK = FALSE;
1750            break;            break;
1751            }            }
1752    
1753          if (OK == (d == OP_PROP))          if (OK == (d == OP_HSPACE))
1754            {            {
1755            if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSUPTO)            if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSSTAR ||
1756                  codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSQUERY)
1757              {              {
1758              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
1759              next_active_state--;              next_active_state--;
1760              }              }
1761            if (++count >= GET2(code, 1))            ADD_NEW_DATA(-(state_offset + count), 0, 0);
             { ADD_NEW(state_offset + 6, 0); }  
           else  
             { ADD_NEW(state_offset, count); }  
1762            }            }
1763          }          }
1764        break;        break;
1765    
1766        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1767        case OP_EXTUNI_EXTRA + OP_TYPEEXACT:  #ifdef SUPPORT_UCP
1768        case OP_EXTUNI_EXTRA + OP_TYPEUPTO:        case OP_PROP_EXTRA + OP_TYPEEXACT:
1769        case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:        case OP_PROP_EXTRA + OP_TYPEUPTO:
1770          case OP_PROP_EXTRA + OP_TYPEMINUPTO:
1771          case OP_PROP_EXTRA + OP_TYPEPOSUPTO:
1772          if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)
1773            { ADD_ACTIVE(state_offset + 1 + IMM2_SIZE + 3, 0); }
1774          count = current_state->count;  /* Number already matched */
1775          if (clen > 0)
1776            {
1777            BOOL OK;
1778            const ucd_record * prop = GET_UCD(c);
1779            switch(code[1 + IMM2_SIZE + 1])
1780              {
1781              case PT_ANY:
1782              OK = TRUE;
1783              break;
1784    
1785              case PT_LAMP:
1786              OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
1787                prop->chartype == ucp_Lt;
1788              break;
1789    
1790              case PT_GC:
1791              OK = PRIV(ucp_gentype)[prop->chartype] == code[1 + IMM2_SIZE + 2];
1792              break;
1793    
1794              case PT_PC:
1795              OK = prop->chartype == code[1 + IMM2_SIZE + 2];
1796              break;
1797    
1798              case PT_SC:
1799              OK = prop->script == code[1 + IMM2_SIZE + 2];
1800              break;
1801    
1802              /* These are specials for combination cases. */
1803    
1804              case PT_ALNUM:
1805              OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1806                   PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1807              break;
1808    
1809              case PT_SPACE:    /* Perl space */
1810              OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1811                   c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
1812              break;
1813    
1814              case PT_PXSPACE:  /* POSIX space */
1815              OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1816                   c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
1817                   c == CHAR_FF || c == CHAR_CR;
1818              break;
1819    
1820              case PT_WORD:
1821              OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1822                   PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1823                   c == CHAR_UNDERSCORE;
1824              break;
1825    
1826              /* Should never occur, but keep compilers from grumbling. */
1827    
1828              default:
1829              OK = codevalue != OP_PROP;
1830              break;
1831              }
1832    
1833            if (OK == (d == OP_PROP))
1834              {
1835              if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSUPTO)
1836                {
1837                active_count--;           /* Remove non-match possibility */
1838                next_active_state--;
1839                }
1840              if (++count >= GET2(code, 1))
1841                { ADD_NEW(state_offset + 1 + IMM2_SIZE + 3, 0); }
1842              else
1843                { ADD_NEW(state_offset, count); }
1844              }
1845            }
1846          break;
1847    
1848          /*-----------------------------------------------------------------*/
1849          case OP_EXTUNI_EXTRA + OP_TYPEEXACT:
1850          case OP_EXTUNI_EXTRA + OP_TYPEUPTO:
1851          case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:
1852        case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:        case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:
1853        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
1854          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1855        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1856        if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)        if (clen > 0)
1857          {          {
1858          const uschar *nptr = ptr + clen;          int lgb, rgb;
1859            const pcre_uchar *nptr = ptr + clen;
1860          int ncount = 0;          int ncount = 0;
1861          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
1862            {            {
1863            active_count--;           /* Remove non-match possibility */            active_count--;           /* Remove non-match possibility */
1864            next_active_state--;            next_active_state--;
1865            }            }
1866            lgb = UCD_GRAPHBREAK(c);
1867          while (nptr < end_subject)          while (nptr < end_subject)
1868            {            {
1869            int nd;            dlen = 1;
1870            int ndlen = 1;            if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
1871            GETCHARLEN(nd, nptr, ndlen);            rgb = UCD_GRAPHBREAK(d);
1872            if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;            if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
1873            ncount++;            ncount++;
1874            nptr += ndlen;            lgb = rgb;
1875              nptr += dlen;
1876            }            }
1877            if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
1878                reset_could_continue = TRUE;
1879          if (++count >= GET2(code, 1))          if (++count >= GET2(code, 1))
1880            { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }            { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
1881          else          else
1882            { ADD_NEW_DATA(-state_offset, count, ncount); }            { ADD_NEW_DATA(-state_offset, count, ncount); }
1883          }          }
# Line 1329  for (;;) Line 1890  for (;;)
1890        case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:        case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:
1891        case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:        case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:
1892        if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)
1893          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1894        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1895        if (clen > 0)        if (clen > 0)
1896          {          {
1897          int ncount = 0;          int ncount = 0;
1898          switch (c)          switch (c)
1899            {            {
1900            case 0x000d:            case CHAR_VT:
1901            if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;            case CHAR_FF:
1902            /* Fall through */            case CHAR_NEL:
1903            case 0x000a:  #ifndef EBCDIC
           case 0x000b:  
           case 0x000c:  
           case 0x0085:  
1904            case 0x2028:            case 0x2028:
1905            case 0x2029:            case 0x2029:
1906    #endif  /* Not EBCDIC */
1907              if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1908              goto ANYNL03;
1909    
1910              case CHAR_CR:
1911              if (ptr + 1 < end_subject && ptr[1] == CHAR_LF) ncount = 1;
1912              /* Fall through */
1913    
1914              ANYNL03:
1915              case CHAR_LF:
1916            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)
1917              {              {
1918              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
1919              next_active_state--;              next_active_state--;
1920              }              }
1921            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1922              { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }              { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
1923            else            else
1924              { ADD_NEW_DATA(-state_offset, count, ncount); }              { ADD_NEW_DATA(-state_offset, count, ncount); }
1925            break;            break;
1926    
1927            default:            default:
1928            break;            break;
1929            }            }
1930          }          }
1931        break;        break;
1932    
1933          /*-----------------------------------------------------------------*/
1934          case OP_VSPACE_EXTRA + OP_TYPEEXACT:
1935          case OP_VSPACE_EXTRA + OP_TYPEUPTO:
1936          case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:
1937          case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:
1938          if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)
1939            { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1940          count = current_state->count;  /* Number already matched */
1941          if (clen > 0)
1942            {
1943            BOOL OK;
1944            switch (c)
1945              {
1946              VSPACE_CASES:
1947              OK = TRUE;
1948              break;
1949    
1950              default:
1951              OK = FALSE;
1952              }
1953    
1954            if (OK == (d == OP_VSPACE))
1955              {
1956              if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSUPTO)
1957                {
1958                active_count--;           /* Remove non-match possibility */
1959                next_active_state--;
1960                }
1961              if (++count >= GET2(code, 1))
1962                { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
1963              else
1964                { ADD_NEW_DATA(-state_offset, count, 0); }
1965              }
1966            }
1967          break;
1968    
1969          /*-----------------------------------------------------------------*/
1970          case OP_HSPACE_EXTRA + OP_TYPEEXACT:
1971          case OP_HSPACE_EXTRA + OP_TYPEUPTO:
1972          case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:
1973          case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:
1974          if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)
1975            { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1976          count = current_state->count;  /* Number already matched */
1977          if (clen > 0)
1978            {
1979            BOOL OK;
1980            switch (c)
1981              {
1982              HSPACE_CASES:
1983              OK = TRUE;
1984              break;
1985    
1986              default:
1987              OK = FALSE;
1988              break;
1989              }
1990    
1991            if (OK == (d == OP_HSPACE))
1992              {
1993              if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSUPTO)
1994                {
1995                active_count--;           /* Remove non-match possibility */
1996                next_active_state--;
1997                }
1998              if (++count >= GET2(code, 1))
1999                { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
2000              else
2001                { ADD_NEW_DATA(-state_offset, count, 0); }
2002              }
2003            }
2004          break;
2005    
2006  /* ========================================================================== */  /* ========================================================================== */
2007        /* These opcodes are followed by a character that is usually compared        /* These opcodes are followed by a character that is usually compared
2008        to the current subject character; it is loaded into d. We still get        to the current subject character; it is loaded into d. We still get
# Line 1373  for (;;) Line 2015  for (;;)
2015        break;        break;
2016    
2017        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2018        case OP_CHARNC:        case OP_CHARI:
2019        if (clen == 0) break;        if (clen == 0) break;
2020    
2021  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2022        if (utf8)        if (utf)
2023          {          {
2024          if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else          if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
2025            {            {
2026            unsigned int othercase;            unsigned int othercase;
2027            if (c < 128) othercase = fcc[c]; else            if (c < 128)
2028                othercase = fcc[c];
2029            /* If we have Unicode property support, we can use it to test the            else
2030            other case of the character. */              /* If we have Unicode property support, we can use it to test the
2031                other case of the character. */
2032  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2033            othercase = _pcre_ucp_othercase(c);              othercase = UCD_OTHERCASE(c);
2034  #else  #else
2035            othercase = NOTACHAR;              othercase = NOTACHAR;
2036  #endif  #endif
2037    
2038            if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }            if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }
2039            }            }
2040          }          }
2041        else        else
2042  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2043          /* Not UTF mode */
       /* Non-UTF-8 mode */  
2044          {          {
2045          if (lcc[c] == lcc[d]) { ADD_NEW(state_offset + 2, 0); }          if (TABLE_GET(c, lcc, c) == TABLE_GET(d, lcc, d))
2046              { ADD_NEW(state_offset + 2, 0); }
2047          }          }
2048        break;        break;
2049    
# Line 1413  for (;;) Line 2055  for (;;)
2055        to wait for them to pass before continuing. */        to wait for them to pass before continuing. */
2056    
2057        case OP_EXTUNI:        case OP_EXTUNI:
2058        if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)        if (clen > 0)
2059          {          {
2060          const uschar *nptr = ptr + clen;          int lgb, rgb;
2061            const pcre_uchar *nptr = ptr + clen;
2062          int ncount = 0;          int ncount = 0;
2063            lgb = UCD_GRAPHBREAK(c);
2064          while (nptr < end_subject)          while (nptr < end_subject)
2065            {            {
2066            int nclen = 1;            dlen = 1;
2067            GETCHARLEN(c, nptr, nclen);            if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
2068            if (_pcre_ucp_findprop(c, &chartype, &script) != ucp_M) break;            rgb = UCD_GRAPHBREAK(d);
2069              if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
2070            ncount++;            ncount++;
2071            nptr += nclen;            lgb = rgb;
2072              nptr += dlen;
2073            }            }
2074            if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
2075                reset_could_continue = TRUE;
2076          ADD_NEW_DATA(-(state_offset + 1), 0, ncount);          ADD_NEW_DATA(-(state_offset + 1), 0, ncount);
2077          }          }
2078        break;        break;
# Line 1438  for (;;) Line 2086  for (;;)
2086        case OP_ANYNL:        case OP_ANYNL:
2087        if (clen > 0) switch(c)        if (clen > 0) switch(c)
2088          {          {
2089          case 0x000a:          case CHAR_VT:
2090          case 0x000b:          case CHAR_FF:
2091          case 0x000c:          case CHAR_NEL:
2092          case 0x0085:  #ifndef EBCDIC
2093          case 0x2028:          case 0x2028:
2094          case 0x2029:          case 0x2029:
2095    #endif  /* Not EBCDIC */
2096            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
2097    
2098            case CHAR_LF:
2099          ADD_NEW(state_offset + 1, 0);          ADD_NEW(state_offset + 1, 0);
2100          break;          break;
2101          case 0x000d:  
2102          if (ptr + 1 < end_subject && ptr[1] == 0x0a)          case CHAR_CR:
2103            if (ptr + 1 >= end_subject)
2104              {
2105              ADD_NEW(state_offset + 1, 0);
2106              if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
2107                reset_could_continue = TRUE;
2108              }
2109            else if (ptr[1] == CHAR_LF)
2110            {            {
2111            ADD_NEW_DATA(-(state_offset + 1), 0, 1);            ADD_NEW_DATA(-(state_offset + 1), 0, 1);
2112            }            }
# Line 1460  for (;;) Line 2119  for (;;)
2119        break;        break;
2120    
2121        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2122        /* Match a negated single character. This is only used for one-byte        case OP_NOT_VSPACE:
2123        characters, that is, we know that d < 256. The character we are        if (clen > 0) switch(c)
2124        checking (c) can be multibyte. */          {
2125            VSPACE_CASES:
2126            break;
2127    
2128            default:
2129            ADD_NEW(state_offset + 1, 0);
2130            break;
2131            }
2132          break;
2133    
2134          /*-----------------------------------------------------------------*/
2135          case OP_VSPACE:
2136          if (clen > 0) switch(c)
2137            {
2138            VSPACE_CASES:
2139            ADD_NEW(state_offset + 1, 0);
2140            break;
2141    
2142            default:
2143            break;
2144            }
2145          break;
2146    
2147          /*-----------------------------------------------------------------*/
2148          case OP_NOT_HSPACE:
2149          if (clen > 0) switch(c)
2150            {
2151            HSPACE_CASES:
2152            break;
2153    
2154            default:
2155            ADD_NEW(state_offset + 1, 0);
2156            break;
2157            }
2158          break;
2159    
2160          /*-----------------------------------------------------------------*/
2161          case OP_HSPACE:
2162          if (clen > 0) switch(c)
2163            {
2164            HSPACE_CASES:
2165            ADD_NEW(state_offset + 1, 0);
2166            break;
2167    
2168            default:
2169            break;
2170            }
2171          break;
2172    
2173          /*-----------------------------------------------------------------*/
2174          /* Match a negated single character casefully. */
2175    
2176        case OP_NOT:        case OP_NOT:
2177          if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); }
2178          break;
2179    
2180          /*-----------------------------------------------------------------*/
2181          /* Match a negated single character caselessly. */
2182    
2183          case OP_NOTI:
2184        if (clen > 0)        if (clen > 0)
2185          {          {
2186          unsigned int otherd = ((ims & PCRE_CASELESS) != 0)? fcc[d] : d;          unsigned int otherd;
2187          if (c != d && c != otherd) { ADD_NEW(state_offset + dlen + 1, 0); }  #ifdef SUPPORT_UTF
2188            if (utf && d >= 128)
2189              {
2190    #ifdef SUPPORT_UCP
2191              otherd = UCD_OTHERCASE(d);
2192    #endif  /* SUPPORT_UCP */
2193              }
2194            else
2195    #endif  /* SUPPORT_UTF */
2196            otherd = TABLE_GET(d, fcc, d);
2197            if (c != d && c != otherd)
2198              { ADD_NEW(state_offset + dlen + 1, 0); }
2199          }          }
2200        break;        break;
2201    
2202        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2203          case OP_PLUSI:
2204          case OP_MINPLUSI:
2205          case OP_POSPLUSI:
2206          case OP_NOTPLUSI:
2207          case OP_NOTMINPLUSI:
2208          case OP_NOTPOSPLUSI:
2209          caseless = TRUE;
2210          codevalue -= OP_STARI - OP_STAR;
2211    
2212          /* Fall through */
2213        case OP_PLUS:        case OP_PLUS:
2214        case OP_MINPLUS:        case OP_MINPLUS:
2215        case OP_POSPLUS:        case OP_POSPLUS:
# Line 1484  for (;;) Line 2221  for (;;)
2221        if (clen > 0)        if (clen > 0)
2222          {          {
2223          unsigned int otherd = NOTACHAR;          unsigned int otherd = NOTACHAR;
2224          if ((ims & PCRE_CASELESS) != 0)          if (caseless)
2225            {            {
2226  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2227            if (utf8 && d >= 128)            if (utf && d >= 128)
2228              {              {
2229  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2230              otherd = _pcre_ucp_othercase(d);              otherd = UCD_OTHERCASE(d);
2231  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2232              }              }
2233            else            else
2234  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2235            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2236            }            }
2237          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2238            {            {
# Line 1512  for (;;) Line 2249  for (;;)
2249        break;        break;
2250    
2251        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2252          case OP_QUERYI:
2253          case OP_MINQUERYI:
2254          case OP_POSQUERYI:
2255          case OP_NOTQUERYI:
2256          case OP_NOTMINQUERYI:
2257          case OP_NOTPOSQUERYI:
2258          caseless = TRUE;
2259          codevalue -= OP_STARI - OP_STAR;
2260          /* Fall through */
2261        case OP_QUERY:        case OP_QUERY:
2262        case OP_MINQUERY:        case OP_MINQUERY:
2263        case OP_POSQUERY:        case OP_POSQUERY:
# Line 1522  for (;;) Line 2268  for (;;)
2268        if (clen > 0)        if (clen > 0)
2269          {          {
2270          unsigned int otherd = NOTACHAR;          unsigned int otherd = NOTACHAR;
2271          if ((ims & PCRE_CASELESS) != 0)          if (caseless)
2272            {            {
2273  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2274            if (utf8 && d >= 128)            if (utf && d >= 128)
2275              {              {
2276  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2277              otherd = _pcre_ucp_othercase(d);              otherd = UCD_OTHERCASE(d);
2278  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2279              }              }
2280            else            else
2281  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2282            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2283            }            }
2284          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2285            {            {
# Line 1548  for (;;) Line 2294  for (;;)
2294        break;        break;
2295    
2296        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2297          case OP_STARI:
2298          case OP_MINSTARI:
2299          case OP_POSSTARI:
2300          case OP_NOTSTARI:
2301          case OP_NOTMINSTARI:
2302          case OP_NOTPOSSTARI:
2303          caseless = TRUE;
2304          codevalue -= OP_STARI - OP_STAR;
2305          /* Fall through */
2306        case OP_STAR:        case OP_STAR:
2307        case OP_MINSTAR:        case OP_MINSTAR:
2308        case OP_POSSTAR:        case OP_POSSTAR:
# Line 1558  for (;;) Line 2313  for (;;)
2313        if (clen > 0)        if (clen > 0)
2314          {          {
2315          unsigned int otherd = NOTACHAR;          unsigned int otherd = NOTACHAR;
2316          if ((ims & PCRE_CASELESS) != 0)          if (caseless)
2317            {            {
2318  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2319            if (utf8 && d >= 128)            if (utf && d >= 128)
2320              {              {
2321  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2322              otherd = _pcre_ucp_othercase(d);              otherd = UCD_OTHERCASE(d);
2323  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2324              }              }
2325            else            else
2326  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2327            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2328            }            }
2329          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2330            {            {
# Line 1584  for (;;) Line 2339  for (;;)
2339        break;        break;
2340    
2341        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2342          case OP_EXACTI:
2343          case OP_NOTEXACTI:
2344          caseless = TRUE;
2345          codevalue -= OP_STARI - OP_STAR;
2346          /* Fall through */
2347        case OP_EXACT:        case OP_EXACT:
2348        case OP_NOTEXACT:        case OP_NOTEXACT:
2349        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
2350        if (clen > 0)        if (clen > 0)
2351          {          {
2352          unsigned int otherd = NOTACHAR;          unsigned int otherd = NOTACHAR;
2353          if ((ims & PCRE_CASELESS) != 0)          if (caseless)
2354            {            {
2355  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2356            if (utf8 && d >= 128)            if (utf && d >= 128)
2357              {              {
2358  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2359              otherd = _pcre_ucp_othercase(d);              otherd = UCD_OTHERCASE(d);
2360  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2361              }              }
2362            else            else
2363  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2364            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2365            }            }
2366          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2367            {            {
2368            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
2369              { ADD_NEW(state_offset + dlen + 3, 0); }              { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
2370            else            else
2371              { ADD_NEW(state_offset, count); }              { ADD_NEW(state_offset, count); }
2372            }            }
# Line 1614  for (;;) Line 2374  for (;;)
2374        break;        break;
2375    
2376        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2377          case OP_UPTOI:
2378          case OP_MINUPTOI:
2379          case OP_POSUPTOI:
2380          case OP_NOTUPTOI:
2381          case OP_NOTMINUPTOI:
2382          case OP_NOTPOSUPTOI:
2383          caseless = TRUE;
2384          codevalue -= OP_STARI - OP_STAR;
2385          /* Fall through */
2386        case OP_UPTO:        case OP_UPTO:
2387        case OP_MINUPTO:        case OP_MINUPTO:
2388        case OP_POSUPTO:        case OP_POSUPTO:
2389        case OP_NOTUPTO:        case OP_NOTUPTO:
2390        case OP_NOTMINUPTO:        case OP_NOTMINUPTO:
2391        case OP_NOTPOSUPTO:        case OP_NOTPOSUPTO:
2392        ADD_ACTIVE(state_offset + dlen + 3, 0);        ADD_ACTIVE(state_offset + dlen + 1 + IMM2_SIZE, 0);
2393        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
2394        if (clen > 0)        if (clen > 0)
2395          {          {
2396          unsigned int otherd = NOTACHAR;          unsigned int otherd = NOTACHAR;
2397          if ((ims & PCRE_CASELESS) != 0)          if (caseless)
2398            {            {
2399  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2400            if (utf8 && d >= 128)            if (utf && d >= 128)
2401              {              {
2402  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2403              otherd = _pcre_ucp_othercase(d);              otherd = UCD_OTHERCASE(d);
2404  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2405              }              }
2406            else            else
2407  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2408            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2409            }            }
2410          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2411            {            {
# Line 1646  for (;;) Line 2415  for (;;)
2415              next_active_state--;              next_active_state--;
2416              }              }
2417            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
2418              { ADD_NEW(state_offset + dlen + 3, 0); }              { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
2419            else            else
2420              { ADD_NEW(state_offset, count); }              { ADD_NEW(state_offset, count); }
2421            }            }
# Line 1663  for (;;) Line 2432  for (;;)
2432          {          {
2433          BOOL isinclass = FALSE;          BOOL isinclass = FALSE;
2434          int next_state_offset;          int next_state_offset;
2435          const uschar *ecode;          const pcre_uchar *ecode;
2436    
2437          /* For a simple class, there is always just a 32-byte table, and we          /* For a simple class, there is always just a 32-byte table, and we
2438          can set isinclass from it. */          can set isinclass from it. */
2439    
2440          if (codevalue != OP_XCLASS)          if (codevalue != OP_XCLASS)
2441            {            {
2442            ecode = code + 33;            ecode = code + 1 + (32 / sizeof(pcre_uchar));
2443            if (clen > 0)            if (clen > 0)
2444              {              {
2445              isinclass = (c > 255)? (codevalue == OP_NCLASS) :              isinclass = (c > 255)? (codevalue == OP_NCLASS) :
2446                ((code[1 + c/8] & (1 << (c&7))) != 0);                ((((pcre_uint8 *)(code + 1))[c/8] & (1 << (c&7))) != 0);
2447              }              }
2448            }            }
2449    
# Line 1685  for (;;) Line 2454  for (;;)
2454          else          else
2455           {           {
2456           ecode = code + GET(code, 1);           ecode = code + GET(code, 1);
2457           if (clen > 0) isinclass = _pcre_xclass(c, code + 1 + LINK_SIZE);           if (clen > 0) isinclass = PRIV(xclass)(c, code + 1 + LINK_SIZE, utf);
2458           }           }
2459    
2460          /* At this point, isinclass is set for all kinds of class, and ecode          /* At this point, isinclass is set for all kinds of class, and ecode
2461          points to the byte after the end of the class. If there is a          points to the byte after the end of the class. If there is a
2462          quantifier, this is where it will be. */          quantifier, this is where it will be. */
2463    
2464          next_state_offset = ecode - start_code;          next_state_offset = (int)(ecode - start_code);
2465    
2466          switch (*ecode)          switch (*ecode)
2467            {            {
# Line 1719  for (;;) Line 2488  for (;;)
2488            case OP_CRMINRANGE:            case OP_CRMINRANGE:
2489            count = current_state->count;  /* Already matched */            count = current_state->count;  /* Already matched */
2490            if (count >= GET2(ecode, 1))            if (count >= GET2(ecode, 1))
2491              { ADD_ACTIVE(next_state_offset + 5, 0); }              { ADD_ACTIVE(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
2492            if (isinclass)            if (isinclass)
2493              {              {
2494              int max = GET2(ecode, 3);              int max = GET2(ecode, 1 + IMM2_SIZE);
2495              if (++count >= max && max != 0)   /* Max 0 => no limit */              if (++count >= max && max != 0)   /* Max 0 => no limit */
2496                { ADD_NEW(next_state_offset + 5, 0); }                { ADD_NEW(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
2497              else              else
2498                { ADD_NEW(state_offset, count); }                { ADD_NEW(state_offset, count); }
2499              }              }
# Line 1739  for (;;) Line 2508  for (;;)
2508    
2509  /* ========================================================================== */  /* ========================================================================== */
2510        /* These are the opcodes for fancy brackets of various kinds. We have        /* These are the opcodes for fancy brackets of various kinds. We have
2511        to use recursion in order to handle them. */        to use recursion in order to handle them. The "always failing" assertion
2512          (?!) is optimised to OP_FAIL when compiling, so we have to support that,
2513          though the other "backtracking verbs" are not supported. */
2514    
2515          case OP_FAIL:
2516          forced_fail++;    /* Count FAILs for multiple states */
2517          break;
2518    
2519        case OP_ASSERT:        case OP_ASSERT:
2520        case OP_ASSERT_NOT:        case OP_ASSERT_NOT:
# Line 1749  for (;;) Line 2524  for (;;)
2524          int rc;          int rc;
2525          int local_offsets[2];          int local_offsets[2];
2526          int local_workspace[1000];          int local_workspace[1000];
2527          const uschar *endasscode = code + GET(code, 1);          const pcre_uchar *endasscode = code + GET(code, 1);
2528    
2529          while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);          while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
2530    
# Line 1757  for (;;) Line 2532  for (;;)
2532            md,                                   /* static match data */            md,                                   /* static match data */
2533            code,                                 /* this subexpression's code */            code,                                 /* this subexpression's code */
2534            ptr,                                  /* where we currently are */            ptr,                                  /* where we currently are */
2535            ptr - start_subject,                  /* start offset */            (int)(ptr - start_subject),           /* start offset */
2536            local_offsets,                        /* offset vector */            local_offsets,                        /* offset vector */
2537            sizeof(local_offsets)/sizeof(int),    /* size of same */            sizeof(local_offsets)/sizeof(int),    /* size of same */
2538            local_workspace,                      /* workspace vector */            local_workspace,                      /* workspace vector */
2539            sizeof(local_workspace)/sizeof(int),  /* size of same */            sizeof(local_workspace)/sizeof(int),  /* size of same */
2540            ims,                                  /* the current ims flags */            rlevel);                              /* function recursion level */
           rlevel,                               /* function recursion level */  
           recursing);                           /* pass on regex recursion */  
2541    
2542            if (rc == PCRE_ERROR_DFA_UITEM) return rc;
2543          if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK))          if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK))
2544              { ADD_ACTIVE(endasscode + LINK_SIZE + 1 - start_code, 0); }              { ADD_ACTIVE((int)(endasscode + LINK_SIZE + 1 - start_code), 0); }
2545          }          }
2546        break;        break;
2547    
# Line 1777  for (;;) Line 2551  for (;;)
2551          {          {
2552          int local_offsets[1000];          int local_offsets[1000];
2553          int local_workspace[1000];          int local_workspace[1000];
2554          int condcode = code[LINK_SIZE+1];          int codelink = GET(code, 1);
2555            int condcode;
2556    
2557            /* Because of the way auto-callout works during compile, a callout item
2558            is inserted between OP_COND and an assertion condition. This does not
2559            happen for the other conditions. */
2560    
2561            if (code[LINK_SIZE+1] == OP_CALLOUT)
2562              {
2563              rrc = 0;
2564              if (PUBL(callout) != NULL)
2565                {
2566                PUBL(callout_block) cb;
2567                cb.version          = 1;   /* Version 1 of the callout block */
2568                cb.callout_number   = code[LINK_SIZE+2];
2569                cb.offset_vector    = offsets;
2570    #ifdef COMPILE_PCRE8
2571                cb.subject          = (PCRE_SPTR)start_subject;
2572    #else
2573                cb.subject          = (PCRE_SPTR16)start_subject;
2574    #endif
2575                cb.subject_length   = (int)(end_subject - start_subject);
2576                cb.start_match      = (int)(current_subject - start_subject);
2577                cb.current_position = (int)(ptr - start_subject);
2578                cb.pattern_position = GET(code, LINK_SIZE + 3);
2579                cb.next_item_length = GET(code, 3 + 2*LINK_SIZE);
2580                cb.capture_top      = 1;
2581                cb.capture_last     = -1;
2582                cb.callout_data     = md->callout_data;
2583                cb.mark             = NULL;   /* No (*MARK) support */
2584                if ((rrc = (*PUBL(callout))(&cb)) < 0) return rrc;   /* Abandon */
2585                }
2586              if (rrc > 0) break;                      /* Fail this thread */
2587              code += PRIV(OP_lengths)[OP_CALLOUT];    /* Skip callout data */
2588              }
2589    
2590            condcode = code[LINK_SIZE+1];
2591    
2592          /* Back reference conditions are not supported */          /* Back reference conditions are not supported */
2593    
2594          if (condcode == OP_CREF) return PCRE_ERROR_DFA_UCOND;          if (condcode == OP_CREF || condcode == OP_NCREF)
2595              return PCRE_ERROR_DFA_UCOND;
2596    
2597          /* The DEFINE condition is always false */          /* The DEFINE condition is always false */
2598    
2599          if (condcode == OP_DEF)          if (condcode == OP_DEF)
2600            {            { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
           ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0);  
           }  
2601    
2602          /* The only supported version of OP_RREF is for the value RREF_ANY,          /* The only supported version of OP_RREF is for the value RREF_ANY,
2603          which means "test if in any recursion". We can't test for specifically          which means "test if in any recursion". We can't test for specifically
2604          recursed groups. */          recursed groups. */
2605    
2606          else if (condcode == OP_RREF)          else if (condcode == OP_RREF || condcode == OP_NRREF)
2607            {            {
2608            int value = GET2(code, LINK_SIZE+2);            int value = GET2(code, LINK_SIZE + 2);
2609            if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;            if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
2610            if (recursing > 0) { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }            if (md->recursive != NULL)
2611              else { ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0); }              { ADD_ACTIVE(state_offset + LINK_SIZE + 2 + IMM2_SIZE, 0); }
2612              else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
2613            }            }
2614    
2615          /* Otherwise, the condition is an assertion */          /* Otherwise, the condition is an assertion */
# Line 1807  for (;;) Line 2617  for (;;)
2617          else          else
2618            {            {
2619            int rc;            int rc;
2620            const uschar *asscode = code + LINK_SIZE + 1;            const pcre_uchar *asscode = code + LINK_SIZE + 1;
2621            const uschar *endasscode = asscode + GET(asscode, 1);            const pcre_uchar *endasscode = asscode + GET(asscode, 1);
2622    
2623            while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);            while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
2624    
# Line 1816  for (;;) Line 2626  for (;;)
2626              md,                                   /* fixed match data */              md,                                   /* fixed match data */
2627              asscode,                              /* this subexpression's code */              asscode,                              /* this subexpression's code */
2628              ptr,                                  /* where we currently are */              ptr,                                  /* where we currently are */
2629              ptr - start_subject,                  /* start offset */              (int)(ptr - start_subject),           /* start offset */
2630              local_offsets,                        /* offset vector */              local_offsets,                        /* offset vector */
2631              sizeof(local_offsets)/sizeof(int),    /* size of same */              sizeof(local_offsets)/sizeof(int),    /* size of same */
2632              local_workspace,                      /* workspace vector */              local_workspace,                      /* workspace vector */
2633              sizeof(local_workspace)/sizeof(int),  /* size of same */              sizeof(local_workspace)/sizeof(int),  /* size of same */
2634              ims,                                  /* the current ims flags */              rlevel);                              /* function recursion level */
             rlevel,                               /* function recursion level */  
             recursing);                           /* pass on regex recursion */  
2635    
2636              if (rc == PCRE_ERROR_DFA_UITEM) return rc;
2637            if ((rc >= 0) ==            if ((rc >= 0) ==
2638                  (condcode == OP_ASSERT || condcode == OP_ASSERTBACK))                  (condcode == OP_ASSERT || condcode == OP_ASSERTBACK))
2639              { ADD_ACTIVE(endasscode + LINK_SIZE + 1 - start_code, 0); }              { ADD_ACTIVE((int)(endasscode + LINK_SIZE + 1 - start_code), 0); }
2640            else            else
2641              { ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0); }              { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
2642            }            }
2643          }          }
2644        break;        break;
# Line 1837  for (;;) Line 2646  for (;;)
2646        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2647        case OP_RECURSE:        case OP_RECURSE:
2648          {          {
2649            dfa_recursion_info *ri;
2650          int local_offsets[1000];          int local_offsets[1000];
2651          int local_workspace[1000];          int local_workspace[1000];
2652            const pcre_uchar *callpat = start_code + GET(code, 1);
2653            int recno = (callpat == md->start_code)? 0 :
2654              GET2(callpat, 1 + LINK_SIZE);
2655          int rc;          int rc;
2656    
2657          DPRINTF(("%.*sStarting regex recursion %d\n", rlevel*2-2, SP,          DPRINTF(("%.*sStarting regex recursion\n", rlevel*2-2, SP));
2658            recursing + 1));  
2659            /* Check for repeating a recursion without advancing the subject
2660            pointer. This should catch convoluted mutual recursions. (Some simple
2661            cases are caught at compile time.) */
2662    
2663            for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
2664              if (recno == ri->group_num && ptr == ri->subject_position)
2665                return PCRE_ERROR_RECURSELOOP;
2666    
2667            /* Remember this recursion and where we started it so as to
2668            catch infinite loops. */
2669    
2670            new_recursive.group_num = recno;
2671            new_recursive.subject_position = ptr;
2672            new_recursive.prevrec = md->recursive;
2673            md->recursive = &new_recursive;
2674    
2675          rc = internal_dfa_exec(          rc = internal_dfa_exec(
2676            md,                                   /* fixed match data */            md,                                   /* fixed match data */
2677            start_code + GET(code, 1),            /* this subexpression's code */            callpat,                              /* this subexpression's code */
2678            ptr,                                  /* where we currently are */            ptr,                                  /* where we currently are */
2679            ptr - start_subject,                  /* start offset */            (int)(ptr - start_subject),           /* start offset */
2680            local_offsets,                        /* offset vector */            local_offsets,                        /* offset vector */
2681            sizeof(local_offsets)/sizeof(int),    /* size of same */            sizeof(local_offsets)/sizeof(int),    /* size of same */
2682            local_workspace,                      /* workspace vector */            local_workspace,                      /* workspace vector */
2683            sizeof(local_workspace)/sizeof(int),  /* size of same */            sizeof(local_workspace)/sizeof(int),  /* size of same */
2684            ims,                                  /* the current ims flags */            rlevel);                              /* function recursion level */
2685            rlevel,                               /* function recursion level */  
2686            recursing + 1);                       /* regex recurse level */          md->recursive = new_recursive.prevrec;  /* Done this recursion */
2687    
2688          DPRINTF(("%.*sReturn from regex recursion %d: rc=%d\n", rlevel*2-2, SP,          DPRINTF(("%.*sReturn from regex recursion: rc=%d\n", rlevel*2-2, SP,
2689            recursing + 1, rc));            rc));
2690    
2691          /* Ran out of internal offsets */          /* Ran out of internal offsets */
2692    
# Line 1872  for (;;) Line 2700  for (;;)
2700            {            {
2701            for (rc = rc*2 - 2; rc >= 0; rc -= 2)            for (rc = rc*2 - 2; rc >= 0; rc -= 2)
2702              {              {
             const uschar *p = start_subject + local_offsets[rc];  
             const uschar *pp = start_subject + local_offsets[rc+1];  
2703              int charcount = local_offsets[rc+1] - local_offsets[rc];              int charcount = local_offsets[rc+1] - local_offsets[rc];
2704              while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;  #ifdef SUPPORT_UTF
2705                if (utf)
2706                  {
2707                  const pcre_uchar *p = start_subject + local_offsets[rc];
2708                  const pcre_uchar *pp = start_subject + local_offsets[rc+1];
2709                  while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2710                  }
2711    #endif
2712              if (charcount > 0)              if (charcount > 0)
2713                {                {
2714                ADD_NEW_DATA(-(state_offset + LINK_SIZE + 1), 0, (charcount - 1));                ADD_NEW_DATA(-(state_offset + LINK_SIZE + 1), 0, (charcount - 1));
# Line 1891  for (;;) Line 2724  for (;;)
2724        break;        break;
2725    
2726        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2727          case OP_BRAPOS:
2728          case OP_SBRAPOS:
2729          case OP_CBRAPOS:
2730          case OP_SCBRAPOS:
2731          case OP_BRAPOSZERO:
2732            {
2733            int charcount, matched_count;
2734            const pcre_uchar *local_ptr = ptr;
2735            BOOL allow_zero;
2736    
2737            if (codevalue == OP_BRAPOSZERO)
2738              {
2739              allow_zero = TRUE;
2740              codevalue = *(++code);  /* Codevalue will be one of above BRAs */
2741              }
2742            else allow_zero = FALSE;
2743    
2744            /* Loop to match the subpattern as many times as possible as if it were
2745            a complete pattern. */
2746    
2747            for (matched_count = 0;; matched_count++)
2748              {
2749              int local_offsets[2];
2750              int local_workspace[1000];
2751    
2752              int rc = internal_dfa_exec(
2753                md,                                   /* fixed match data */
2754                code,                                 /* this subexpression's code */
2755                local_ptr,                            /* where we currently are */
2756                (int)(ptr - start_subject),           /* start offset */
2757                local_offsets,                        /* offset vector */
2758                sizeof(local_offsets)/sizeof(int),    /* size of same */
2759                local_workspace,                      /* workspace vector */
2760                sizeof(local_workspace)/sizeof(int),  /* size of same */
2761                rlevel);                              /* function recursion level */
2762    
2763              /* Failed to match */
2764    
2765              if (rc < 0)
2766                {
2767                if (rc != PCRE_ERROR_NOMATCH) return rc;
2768                break;
2769                }
2770    
2771              /* Matched: break the loop if zero characters matched. */
2772    
2773              charcount = local_offsets[1] - local_offsets[0];
2774              if (charcount == 0) break;
2775              local_ptr += charcount;    /* Advance temporary position ptr */
2776              }
2777    
2778            /* At this point we have matched the subpattern matched_count
2779            times, and local_ptr is pointing to the character after the end of the
2780            last match. */
2781    
2782            if (matched_count > 0 || allow_zero)
2783              {
2784              const pcre_uchar *end_subpattern = code;
2785              int next_state_offset;
2786    
2787              do { end_subpattern += GET(end_subpattern, 1); }
2788                while (*end_subpattern == OP_ALT);
2789              next_state_offset =
2790                (int)(end_subpattern - start_code + LINK_SIZE + 1);
2791    
2792              /* Optimization: if there are no more active states, and there
2793              are no new states yet set up, then skip over the subject string
2794              right here, to save looping. Otherwise, set up the new state to swing
2795              into action when the end of the matched substring is reached. */
2796    
2797              if (i + 1 >= active_count && new_count == 0)
2798                {
2799                ptr = local_ptr;
2800                clen = 0;
2801                ADD_NEW(next_state_offset, 0);
2802                }
2803              else
2804                {
2805                const pcre_uchar *p = ptr;
2806                const pcre_uchar *pp = local_ptr;
2807                charcount = (int)(pp - p);
2808    #ifdef SUPPORT_UTF
2809                if (utf) while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2810    #endif
2811                ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
2812                }
2813              }
2814            }
2815          break;
2816    
2817          /*-----------------------------------------------------------------*/
2818        case OP_ONCE:        case OP_ONCE:
2819          case OP_ONCE_NC:
2820          {          {
2821          int local_offsets[2];          int local_offsets[2];
2822          int local_workspace[1000];          int local_workspace[1000];
# Line 1900  for (;;) Line 2825  for (;;)
2825            md,                                   /* fixed match data */            md,                                   /* fixed match data */
2826            code,                                 /* this subexpression's code */            code,                                 /* this subexpression's code */
2827            ptr,                                  /* where we currently are */            ptr,                                  /* where we currently are */
2828            ptr - start_subject,                  /* start offset */            (int)(ptr - start_subject),           /* start offset */
2829            local_offsets,                        /* offset vector */            local_offsets,                        /* offset vector */
2830            sizeof(local_offsets)/sizeof(int),    /* size of same */            sizeof(local_offsets)/sizeof(int),    /* size of same */
2831            local_workspace,                      /* workspace vector */            local_workspace,                      /* workspace vector */
2832            sizeof(local_workspace)/sizeof(int),  /* size of same */            sizeof(local_workspace)/sizeof(int),  /* size of same */
2833            ims,                                  /* the current ims flags */            rlevel);                              /* function recursion level */
           rlevel,                               /* function recursion level */  
           recursing);                           /* pass on regex recursion */  
2834    
2835          if (rc >= 0)          if (rc >= 0)
2836            {            {
2837            const uschar *end_subpattern = code;            const pcre_uchar *end_subpattern = code;
2838            int charcount = local_offsets[1] - local_offsets[0];            int charcount = local_offsets[1] - local_offsets[0];
2839            int next_state_offset, repeat_state_offset;            int next_state_offset, repeat_state_offset;
2840    
2841            do { end_subpattern += GET(end_subpattern, 1); }            do { end_subpattern += GET(end_subpattern, 1); }
2842              while (*end_subpattern == OP_ALT);              while (*end_subpattern == OP_ALT);
2843            next_state_offset = end_subpattern - start_code + LINK_SIZE + 1;            next_state_offset =
2844                (int)(end_subpattern - start_code + LINK_SIZE + 1);
2845    
2846            /* If the end of this subpattern is KETRMAX or KETRMIN, we must            /* If the end of this subpattern is KETRMAX or KETRMIN, we must
2847            arrange for the repeat state also to be added to the relevant list.            arrange for the repeat state also to be added to the relevant list.
# Line 1925  for (;;) Line 2849  for (;;)
2849    
2850            repeat_state_offset = (*end_subpattern == OP_KETRMAX ||            repeat_state_offset = (*end_subpattern == OP_KETRMAX ||
2851                                   *end_subpattern == OP_KETRMIN)?                                   *end_subpattern == OP_KETRMIN)?
2852              end_subpattern - start_code - GET(end_subpattern, 1) : -1;              (int)(end_subpattern - start_code - GET(end_subpattern, 1)) : -1;
2853    
2854            /* If we have matched an empty string, add the next state at the            /* If we have matched an empty string, add the next state at the
2855            current character pointer. This is important so that the duplicate            current character pointer. This is important so that the duplicate
# Line 1940  for (;;) Line 2864  for (;;)
2864            /* Optimization: if there are no more active states, and there            /* Optimization: if there are no more active states, and there
2865            are no new states yet set up, then skip over the subject string            are no new states yet set up, then skip over the subject string
2866            right here, to save looping. Otherwise, set up the new state to swing            right here, to save looping. Otherwise, set up the new state to swing
2867            into action when the end of the substring is reached. */            into action when the end of the matched substring is reached. */
2868    
2869            else if (i + 1 >= active_count && new_count == 0)            else if (i + 1 >= active_count && new_count == 0)
2870              {              {
# Line 1963  for (;;) Line 2887  for (;;)
2887              }              }
2888            else            else
2889              {              {
2890              const uschar *p = start_subject + local_offsets[0];  #ifdef SUPPORT_UTF
2891              const uschar *pp = start_subject + local_offsets[1];              if (utf)
2892              while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;                {
2893                  const pcre_uchar *p = start_subject + local_offsets[0];
2894                  const pcre_uchar *pp = start_subject + local_offsets[1];
2895                  while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2896                  }
2897    #endif
2898              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
2899              if (repeat_state_offset >= 0)              if (repeat_state_offset >= 0)
2900                { ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); }                { ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); }
2901              }              }
   
2902            }            }
2903          else if (rc != PCRE_ERROR_NOMATCH) return rc;          else if (rc != PCRE_ERROR_NOMATCH) return rc;
2904          }          }
# Line 1981  for (;;) Line 2909  for (;;)
2909        /* Handle callouts */        /* Handle callouts */
2910    
2911        case OP_CALLOUT:        case OP_CALLOUT:
2912        if (pcre_callout != NULL)        rrc = 0;
2913          if (PUBL(callout) != NULL)
2914          {          {
2915          int rrc;          PUBL(callout_block) cb;
         pcre_callout_block cb;  
2916          cb.version          = 1;   /* Version 1 of the callout block */          cb.version          = 1;   /* Version 1 of the callout block */
2917          cb.callout_number   = code[1];          cb.callout_number   = code[1];
2918          cb.offset_vector    = offsets;          cb.offset_vector    = offsets;
2919    #ifdef COMPILE_PCRE8
2920          cb.subject          = (PCRE_SPTR)start_subject;          cb.subject          = (PCRE_SPTR)start_subject;
2921          cb.subject_length   = end_subject - start_subject;  #else
2922          cb.start_match      = current_subject - start_subject;          cb.subject          = (PCRE_SPTR16)start_subject;
2923          cb.current_position = ptr - start_subject;  #endif
2924            cb.subject_length   = (int)(end_subject - start_subject);
2925            cb.start_match      = (int)(current_subject - start_subject);
2926            cb.current_position = (int)(ptr - start_subject);
2927          cb.pattern_position = GET(code, 2);          cb.pattern_position = GET(code, 2);
2928          cb.next_item_length = GET(code, 2 + LINK_SIZE);          cb.next_item_length = GET(code, 2 + LINK_SIZE);
2929          cb.capture_top      = 1;          cb.capture_top      = 1;
2930          cb.capture_last     = -1;          cb.capture_last     = -1;
2931          cb.callout_data     = md->callout_data;          cb.callout_data     = md->callout_data;
2932          if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */          cb.mark             = NULL;   /* No (*MARK) support */
2933          if (rrc == 0) { ADD_ACTIVE(state_offset + 2 + 2*LINK_SIZE, 0); }          if ((rrc = (*PUBL(callout))(&cb)) < 0) return rrc;   /* Abandon */
2934          }          }
2935          if (rrc == 0)
2936            { ADD_ACTIVE(state_offset + PRIV(OP_lengths)[OP_CALLOUT], 0); }
2937        break;        break;
2938    
2939    
# Line 2015  for (;;) Line 2949  for (;;)
2949    /* We have finished the processing at the current subject character. If no    /* We have finished the processing at the current subject character. If no
2950    new states have been set for the next character, we have found all the    new states have been set for the next character, we have found all the
2951    matches that we are going to find. If we are at the top level and partial    matches that we are going to find. If we are at the top level and partial
2952    matching has been requested, check for appropriate conditions. */    matching has been requested, check for appropriate conditions.
2953    
2954      The "forced_ fail" variable counts the number of (*F) encountered for the
2955      character. If it is equal to the original active_count (saved in
2956      workspace[1]) it means that (*F) was found on every active state. In this
2957      case we don't want to give a partial match.
2958    
2959      The "could_continue" variable is true if a state could have continued but
2960      for the fact that the end of the subject was reached. */
2961    
2962    if (new_count <= 0)    if (new_count <= 0)
2963      {      {
2964      if (match_count < 0 &&                     /* No matches found */      if (rlevel == 1 &&                               /* Top level, and */
2965          rlevel == 1 &&                         /* Top level match function */          could_continue &&                            /* Some could go on, and */
2966          (md->moptions & PCRE_PARTIAL) != 0 &&  /* Want partial matching */          forced_fail != workspace[1] &&               /* Not all forced fail & */
2967          ptr >= end_subject &&                  /* Reached end of subject */          (                                            /* either... */
2968          ptr > current_subject)                 /* Matched non-empty string */          (md->moptions & PCRE_PARTIAL_HARD) != 0      /* Hard partial */
2969            ||                                           /* or... */
2970            ((md->moptions & PCRE_PARTIAL_SOFT) != 0 &&  /* Soft partial and */
2971             match_count < 0)                            /* no matches */
2972            ) &&                                         /* And... */
2973            (
2974            partial_newline ||                           /* Either partial NL */
2975              (                                          /* or ... */
2976              ptr >= end_subject &&                /* End of subject and */
2977              ptr > md->start_used_ptr)            /* Inspected non-empty string */
2978              )
2979            )
2980        {        {
2981        if (offsetcount >= 2)        if (offsetcount >= 2)
2982          {          {
2983          offsets[0] = current_subject - start_subject;          offsets[0] = (int)(md->start_used_ptr - start_subject);
2984          offsets[1] = end_subject - start_subject;          offsets[1] = (int)(end_subject - start_subject);
2985          }          }
2986        match_count = PCRE_ERROR_PARTIAL;        match_count = PCRE_ERROR_PARTIAL;
2987        }        }
# Line 2082  Returns:          > 0 => number of match Line 3035  Returns:          > 0 => number of match
3035                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
3036  */  */
3037    
3038  PCRE_EXP_DEFN int  #ifdef COMPILE_PCRE8
3039    PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
3040  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
3041    const char *subject, int length, int start_offset, int options, int *offsets,    const char *subject, int length, int start_offset, int options, int *offsets,
3042    int offsetcount, int *workspace, int wscount)    int offsetcount, int *workspace, int wscount)
3043    #else
3044    PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
3045    pcre16_dfa_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
3046      PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
3047      int offsetcount, int *workspace, int wscount)
3048    #endif
3049  {  {
3050  real_pcre *re = (real_pcre *)argument_re;  REAL_PCRE *re = (REAL_PCRE *)argument_re;
3051  dfa_match_data match_block;  dfa_match_data match_block;
3052  dfa_match_data *md = &match_block;  dfa_match_data *md = &match_block;
3053  BOOL utf8, anchored, startline, firstline;  BOOL utf, anchored, startline, firstline;
3054  const uschar *current_subject, *end_subject, *lcc;  const pcre_uchar *current_subject, *end_subject;
   
 pcre_study_data internal_study;  
3055  const pcre_study_data *study = NULL;  const pcre_study_data *study = NULL;
 real_pcre internal_re;  
3056    
3057  const uschar *req_byte_ptr;  const pcre_uchar *req_char_ptr;
3058  const uschar *start_bits = NULL;  const pcre_uint8 *start_bits = NULL;
3059  BOOL first_byte_caseless = FALSE;  BOOL has_first_char = FALSE;
3060  BOOL req_byte_caseless = FALSE;  BOOL has_req_char = FALSE;
3061  int first_byte = -1;  pcre_uchar first_char = 0;
3062  int req_byte = -1;  pcre_uchar first_char2 = 0;
3063  int req_byte2 = -1;  pcre_uchar req_char = 0;
3064    pcre_uchar req_char2 = 0;
3065  int newline;  int newline;
3066    
3067  /* Plausibility checks */  /* Plausibility checks */
# Line 2113  if (re == NULL || subject == NULL || wor Line 3071  if (re == NULL || subject == NULL || wor
3071     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
3072  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
3073  if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;  if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;
3074    if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
3075    
3076  /* We need to find the pointer to any study data before we test for byte  /* Check that the first field in the block is the magic number. If it is not,
3077  flipping, so we scan the extra_data block first. This may set two fields in the  return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
3078  match block, so we must initialize them beforehand. However, the other fields  REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
3079  in the match block must not be set until after the byte flipping. */  means that the pattern is likely compiled with different endianness. */
3080    
3081    if (re->magic_number != MAGIC_NUMBER)
3082      return re->magic_number == REVERSED_MAGIC_NUMBER?
3083        PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
3084    if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
3085    
3086    /* If restarting after a partial match, do some sanity checks on the contents
3087    of the workspace. */
3088    
3089    if ((options & PCRE_DFA_RESTART) != 0)
3090      {
3091      if ((workspace[0] & (-2)) != 0 || workspace[1] < 1 ||
3092        workspace[1] > (wscount - 2)/INTS_PER_STATEBLOCK)
3093          return PCRE_ERROR_DFA_BADRESTART;
3094      }
3095    
3096    /* Set up study, callout, and table data */
3097    
3098  md->tables = re->tables;  md->tables = re->tables;
3099  md->callout_data = NULL;  md->callout_data = NULL;
# Line 2136  if (extra_data != NULL) Line 3112  if (extra_data != NULL)
3112      md->tables = extra_data->tables;      md->tables = extra_data->tables;
3113    }    }
3114    
 /* Check that the first field in the block is the magic number. If it is not,  
 test for a regex that was compiled on a host of opposite endianness. If this is  
 the case, flipped values are put in internal_re and internal_study if there was  
 study data too. */  
   
 if (re->magic_number != MAGIC_NUMBER)  
   {  
   re = _pcre_try_flipped(re, &internal_re, study, &internal_study);  
   if (re == NULL) return PCRE_ERROR_BADMAGIC;  
   if (study != NULL) study = &internal_study;  
   }  
   
3115  /* Set some local values */  /* Set some local values */
3116    
3117  current_subject = (const unsigned char *)subject + start_offset;  current_subject = (const pcre_uchar *)subject + start_offset;
3118  end_subject = (const unsigned char *)subject + length;  end_subject = (const pcre_uchar *)subject + length;
3119  req_byte_ptr = current_subject - 1;  req_char_ptr = current_subject - 1;
3120    
3121  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3122  utf8 = (re->options & PCRE_UTF8) != 0;  /* PCRE_UTF16 has the same value as PCRE_UTF8. */
3123    utf = (re->options & PCRE_UTF8) != 0;
3124  #else  #else
3125  utf8 = FALSE;  utf = FALSE;
3126  #endif  #endif
3127    
3128  anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||  anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||
# Line 2165  anchored = (options & (PCRE_ANCHORED|PCR Line 3130  anchored = (options & (PCRE_ANCHORED|PCR
3130    
3131  /* The remaining fixed data for passing around. */  /* The remaining fixed data for passing around. */
3132    
3133  md->start_code = (const uschar *)argument_re +  md->start_code = (const pcre_uchar *)argument_re +
3134      re->name_table_offset + re->name_count * re->name_entry_size;      re->name_table_offset + re->name_count * re->name_entry_size;
3135  md->start_subject = (const unsigned char *)subject;  md->start_subject = (const pcre_uchar *)subject;
3136  md->end_subject = end_subject;  md->end_subject = end_subject;
3137    md->start_offset = start_offset;
3138  md->moptions = options;  md->moptions = options;
3139  md->poptions = re->options;  md->poptions = re->options;
3140    
3141    /* If the BSR option is not set at match time, copy what was set
3142    at compile time. */
3143    
3144    if ((md->moptions & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) == 0)
3145      {
3146      if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
3147        md->moptions |= re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE);
3148    #ifdef BSR_ANYCRLF
3149      else md->moptions |= PCRE_BSR_ANYCRLF;
3150    #endif
3151      }
3152    
3153  /* Handle different types of newline. The three bits give eight cases. If  /* Handle different types of newline. The three bits give eight cases. If
3154  nothing is set at run time, whatever was used at compile time applies. */  nothing is set at run time, whatever was used at compile time applies. */
3155    
# Line 2179  switch ((((options & PCRE_NEWLINE_BITS) Line 3157  switch ((((options & PCRE_NEWLINE_BITS)
3157           PCRE_NEWLINE_BITS)           PCRE_NEWLINE_BITS)
3158    {    {
3159    case 0: newline = NEWLINE; break;   /* Compile-time default */    case 0: newline = NEWLINE; break;   /* Compile-time default */
3160    case PCRE_NEWLINE_CR: newline = '\r'; break;    case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
3161    case PCRE_NEWLINE_LF: newline = '\n'; break;    case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
3162    case PCRE_NEWLINE_CR+    case PCRE_NEWLINE_CR+
3163         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;         PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
3164    case PCRE_NEWLINE_ANY: newline = -1; break;    case PCRE_NEWLINE_ANY: newline = -1; break;
3165    case PCRE_NEWLINE_ANYCRLF: newline = -2; break;    case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
3166    default: return PCRE_ERROR_BADNEWLINE;    default: return PCRE_ERROR_BADNEWLINE;
# Line 2215  else Line 3193  else
3193  /* Check a UTF-8 string if required. Unfortunately there's no way of passing  /* Check a UTF-8 string if required. Unfortunately there's no way of passing
3194  back the character offset. */  back the character offset. */
3195    
3196  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3197  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
3198    {    {
3199    if (_pcre_valid_utf8((uschar *)subject, length) >= 0)    int erroroffset;
3200      return PCRE_ERROR_BADUTF8;    int errorcode = PRIV(valid_utf)((pcre_uchar *)subject, length, &erroroffset);
3201    if (start_offset > 0 && start_offset < length)    if (errorcode != 0)
3202      {      {
3203      int tb = ((uschar *)subject)[start_offset];      if (offsetcount >= 2)
     if (tb > 127)  
3204        {        {
3205        tb &= 0xc0;        offsets[0] = erroroffset;
3206        if (tb != 0 && tb != 0xc0) return PCRE_ERROR_BADUTF8_OFFSET;        offsets[1] = errorcode;
3207        }        }
3208        return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != 0)?
3209          PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
3210      }      }
3211      if (start_offset > 0 && start_offset < length &&
3212            NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
3213        return PCRE_ERROR_BADUTF8_OFFSET;
3214    }    }
3215  #endif  #endif
3216    
# Line 2236  if (utf8 && (options & PCRE_NO_UTF8_CHEC Line 3218  if (utf8 && (options & PCRE_NO_UTF8_CHEC
3218  is a feature that makes it possible to save compiled regex and re-use them  is a feature that makes it possible to save compiled regex and re-use them
3219  in other programs later. */  in other programs later. */
3220    
3221  if (md->tables == NULL) md->tables = _pcre_default_tables;  if (md->tables == NULL) md->tables = PRIV(default_tables);
3222    
3223  /* The lower casing table and the "must be at the start of a line" flag are  /* The "must be at the start of a line" flags are used in a loop when finding
3224  used in a loop when finding where to start. */  where to start. */
3225    
3226  lcc = md->tables + lcc_offset;  startline = (re->flags & PCRE_STARTLINE) != 0;
 startline = (re->options & PCRE_STARTLINE) != 0;  
3227  firstline = (re->options & PCRE_FIRSTLINE) != 0;  firstline = (re->options & PCRE_FIRSTLINE) != 0;
3228    
3229  /* Set up the first character to match, if available. The first_byte value is  /* Set up the first character to match, if available. The first_byte value is
# Line 2253  studied, there may be a bitmap of possib Line 3234  studied, there may be a bitmap of possib
3234    
3235  if (!anchored)  if (!anchored)
3236    {    {
3237    if ((re->options & PCRE_FIRSTSET) != 0)    if ((re->flags & PCRE_FIRSTSET) != 0)
3238      {      {
3239      first_byte = re->first_byte & 255;      has_first_char = TRUE;
3240      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)      first_char = first_char2 = (pcre_uchar)(re->first_char);
3241        first_byte = lcc[first_byte];      if ((re->flags & PCRE_FCH_CASELESS) != 0)
3242          {
3243          first_char2 = TABLE_GET(first_char, md->tables + fcc_offset, first_char);
3244    #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3245          if (utf && first_char > 127)
3246            first_char2 = UCD_OTHERCASE(first_char);
3247    #endif
3248          }
3249      }      }
3250    else    else
3251      {      {
3252      if (startline && study != NULL &&      if (!startline && study != NULL &&
3253           (study->options & PCRE_STUDY_MAPPED) != 0)           (study->flags & PCRE_STUDY_MAPPED) != 0)
3254        start_bits = study->start_bits;        start_bits = study->start_bits;
3255      }      }
3256    }    }
# Line 2270  if (!anchored) Line 3258  if (!anchored)
3258  /* For anchored or unanchored matches, there may be a "last known required  /* For anchored or unanchored matches, there may be a "last known required
3259  character" set. */  character" set. */
3260    
3261  if ((re->options & PCRE_REQCHSET) != 0)  if ((re->flags & PCRE_REQCHSET) != 0)
3262    {    {
3263    req_byte = re->req_byte & 255;    has_req_char = TRUE;
3264    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;    req_char = req_char2 = (pcre_uchar)(re->req_char);
3265    req_byte2 = (md->tables + fcc_offset)[req_byte];  /* case flipped */    if ((re->flags & PCRE_RCH_CASELESS) != 0)
3266        {
3267        req_char2 = TABLE_GET(req_char, md->tables + fcc_offset, req_char);
3268    #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3269        if (utf && req_char > 127)
3270          req_char2 = UCD_OTHERCASE(req_char);
3271    #endif
3272        }
3273    }    }
3274    
3275  /* Call the main matching function, looping for a non-anchored regex after a  /* Call the main matching function, looping for a non-anchored regex after a
3276  failed match. Unless restarting, optimize by moving to the first match  failed match. If not restarting, perform certain optimizations at the start of
3277  character if possible, when not anchored. Then unless wanting a partial match,  a match. */
 check for a required later character. */  
3278    
3279  for (;;)  for (;;)
3280    {    {
# Line 2288  for (;;) Line 3282  for (;;)
3282    
3283    if ((options & PCRE_DFA_RESTART) == 0)    if ((options & PCRE_DFA_RESTART) == 0)
3284      {      {
3285      const uschar *save_end_subject = end_subject;      const pcre_uchar *save_end_subject = end_subject;
3286    
3287      /* Advance to a unique first char if possible. If firstline is TRUE, the      /* If firstline is TRUE, the start of the match is constrained to the first
3288      start of the match is constrained to the first line of a multiline string.      line of a multiline string. Implement this by temporarily adjusting
3289      Implement this by temporarily adjusting end_subject so that we stop      end_subject so that we stop scanning at a newline. If the match fails at
3290      scanning at a newline. If the match fails at the newline, later code breaks      the newline, later code breaks this loop. */
     this loop. */  
3291    
3292      if (firstline)      if (firstline)
3293        {        {
3294        const uschar *t = current_subject;        PCRE_PUCHAR t = current_subject;
3295    #ifdef SUPPORT_UTF
3296          if (utf)
3297            {
3298            while (t < md->end_subject && !IS_NEWLINE(t))
3299              {
3300              t++;
3301              ACROSSCHAR(t < end_subject, *t, t++);
3302              }
3303            }
3304          else
3305    #endif
3306        while (t < md->end_subject && !IS_NEWLINE(t)) t++;        while (t < md->end_subject && !IS_NEWLINE(t)) t++;
3307        end_subject = t;        end_subject = t;
3308        }        }
3309    
3310      if (first_byte >= 0)      /* There are some optimizations that avoid running the match if a known
3311        starting point is not found. However, there is an option that disables
3312        these, for testing and for ensuring that all callouts do actually occur.
3313        The option can be set in the regex by (*NO_START_OPT) or passed in
3314        match-time options. */
3315    
3316        if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
3317        {        {
3318        if (first_byte_caseless)        /* Advance to a known first char. */
         while (current_subject < end_subject &&  
                lcc[*current_subject] != first_byte)  
           current_subject++;  
       else  
         while (current_subject < end_subject && *current_subject != first_byte)  
           current_subject++;  
       }  
3319    
3320      /* Or to just after a linebreak for a multiline match if possible */        if (has_first_char)
3321            {
3322            if (first_char != first_char2)
3323              while (current_subject < end_subject &&
3324                  *current_subject != first_char && *current_subject != first_char2)
3325                current_subject++;
3326            else
3327              while (current_subject < end_subject &&
3328                     *current_subject != first_char)
3329                current_subject++;
3330            }
3331    
3332      else if (startline)        /* Or to just after a linebreak for a multiline match if possible */
3333        {  
3334        if (current_subject > md->start_subject + start_offset)        else if (startline)
3335          {          {
3336          while (current_subject <= end_subject && !WAS_NEWLINE(current_subject))          if (current_subject > md->start_subject + start_offset)
3337            current_subject++;            {
3338    #ifdef SUPPORT_UTF
3339              if (utf)
3340                {
3341                while (current_subject < end_subject &&
3342                       !WAS_NEWLINE(current_subject))
3343                  {
3344                  current_subject++;
3345                  ACROSSCHAR(current_subject < end_subject, *current_subject,
3346                    current_subject++);
3347                  }
3348                }
3349              else
3350    #endif
3351              while (current_subject < end_subject && !WAS_NEWLINE(current_subject))
3352                current_subject++;
3353    
3354              /* If we have just passed a CR and the newline option is ANY or
3355              ANYCRLF, and we are now at a LF, advance the match position by one
3356              more character. */
3357    
3358          /* If we have just passed a CR and the newline option is ANY or            if (current_subject[-1] == CHAR_CR &&
3359          ANYCRLF, and we are now at a LF, advance the match position by one more                 (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
3360          character. */                 current_subject < end_subject &&
3361                   *current_subject == CHAR_NL)
3362          if (current_subject[-1] == '\r' &&              current_subject++;
3363               (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&            }
              current_subject < end_subject &&  
              *current_subject == '\n')  
           current_subject++;  
3364          }          }
       }  
3365    
3366      /* Or to a non-unique first char after study */        /* Or to a non-unique first char after study */
3367    
3368      else if (start_bits != NULL)        else if (start_bits != NULL)
       {  
       while (current_subject < end_subject)  
3369          {          {
3370          register unsigned int c = *current_subject;          while (current_subject < end_subject)
3371          if ((start_bits[c/8] & (1 << (c&7))) == 0) current_subject++;            {
3372              register unsigned int c = *current_subject;
3373    #ifndef COMPILE_PCRE8
3374              if (c > 255) c = 255;
3375    #endif
3376              if ((start_bits[c/8] & (1 << (c&7))) == 0)
3377                {
3378                current_subject++;
3379    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3380                /* In non 8-bit mode, the iteration will stop for
3381                characters > 255 at the beginning or not stop at all. */
3382                if (utf)
3383                  ACROSSCHAR(current_subject < end_subject, *current_subject,
3384                    current_subject++);
3385    #endif
3386                }
3387            else break;            else break;
3388              }
3389          }          }
3390        }        }
3391    
3392      /* Restore fudged end_subject */      /* Restore fudged end_subject */
3393    
3394      end_subject = save_end_subject;      end_subject = save_end_subject;
     }  
   
   /* If req_byte is set, we know that that character must appear in the subject  
   for the match to succeed. If the first character is set, req_byte must be  
   later in the subject; otherwise the test starts at the match point. This  
   optimization can save a huge amount of work in patterns with nested unlimited  
   repeats that aren't going to match. Writing separate code for cased/caseless  
   versions makes it go faster, as does using an autoincrement and backing off  
   on a match.  
   
   HOWEVER: when the subject string is very, very long, searching to its end can  
   take a long time, and give bad performance on quite ordinary patterns. This  
   showed up when somebody was matching /^C/ on a 32-megabyte string... so we  
   don't do this when the string is sufficiently long.  
   
   ALSO: this processing is disabled when partial matching is requested.  
   */  
   
   if (req_byte >= 0 &&  
       end_subject - current_subject < REQ_BYTE_MAX &&  
       (options & PCRE_PARTIAL) == 0)  
     {  
     register const uschar *p = current_subject + ((first_byte >= 0)? 1 : 0);  
3395    
3396      /* We don't need to repeat the search if we haven't yet reached the      /* The following two optimizations are disabled for partial matching or if
3397      place we found it at last time. */      disabling is explicitly requested (and of course, by the test above, this
3398        code is not obeyed when restarting after a partial match). */
3399    
3400      if (p > req_byte_ptr)      if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0 &&
3401            (options & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) == 0)
3402        {        {
3403        if (req_byte_caseless)        /* If the pattern was studied, a minimum subject length may be set. This
3404          {        is a lower bound; no actual string of that length may actually match the
3405          while (p < end_subject)        pattern. Although the value is, strictly, in characters, we treat it as
3406            {        bytes to avoid spending too much time in this optimization. */
3407            register int pp = *p++;  
3408            if (pp == req_byte || pp == req_byte2) { p--; break; }        if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
3409            }            (pcre_uint32)(end_subject - current_subject) < study->minlength)
3410          }          return PCRE_ERROR_NOMATCH;
3411        else  
3412          /* If req_char is set, we know that that character must appear in the
3413          subject for the match to succeed. If the first character is set, req_char
3414          must be later in the subject; otherwise the test starts at the match
3415          point. This optimization can save a huge amount of work in patterns with
3416          nested unlimited repeats that aren't going to match. Writing separate
3417          code for cased/caseless versions makes it go faster, as does using an
3418          autoincrement and backing off on a match.
3419    
3420          HOWEVER: when the subject string is very, very long, searching to its end
3421          can take a long time, and give bad performance on quite ordinary
3422          patterns. This showed up when somebody was matching /^C/ on a 32-megabyte
3423          string... so we don't do this when the string is sufficiently long. */
3424    
3425          if (has_req_char && end_subject - current_subject < REQ_BYTE_MAX)
3426          {          {
3427          while (p < end_subject)          register PCRE_PUCHAR p = current_subject + (has_first_char? 1:0);
3428    
3429            /* We don't need to repeat the search if we haven't yet reached the
3430            place we found it at last time. */
3431    
3432            if (p > req_char_ptr)
3433            {            {
3434            if (*p++ == req_byte) { p--; break; }            if (req_char != req_char2)
3435            }              {
3436          }              while (p < end_subject)
3437                  {
3438                  register int pp = *p++;
3439                  if (pp == req_char || pp == req_char2) { p--; break; }
3440                  }
3441                }
3442              else
3443                {
3444                while (p < end_subject)
3445                  {
3446                  if (*p++ == req_char) { p--; break; }
3447                  }
3448                }
3449    
3450        /* If we can't find the required character, break the matching loop,            /* If we can't find the required character, break the matching loop,
3451        which will cause a return or PCRE_ERROR_NOMATCH. */            which will cause a return or PCRE_ERROR_NOMATCH. */
3452    
3453        if (p >= end_subject) break;            if (p >= end_subject) break;
3454    
3455        /* If we have found the required character, save the point where we            /* If we have found the required character, save the point where we
3456        found it, so that we don't search again next time round the loop if            found it, so that we don't search again next time round the loop if
3457        the start hasn't passed this character yet. */            the start hasn't passed this character yet. */
3458    
3459        req_byte_ptr = p;            req_char_ptr = p;
3460              }
3461            }
3462        }        }
3463      }      }   /* End of optimizations that are done when not restarting */
3464    
3465    /* OK, now we can do the business */    /* OK, now we can do the business */
3466    
3467      md->start_used_ptr = current_subject;
3468      md->recursive = NULL;
3469    
3470    rc = internal_dfa_exec(    rc = internal_dfa_exec(
3471      md,                                /* fixed match data */      md,                                /* fixed match data */
3472      md->start_code,                    /* this subexpression's code */      md->start_code,                    /* this subexpression's code */
# Line 2419  for (;;) Line 3476  for (;;)
3476      offsetcount,                       /* size of same */      offsetcount,                       /* size of same */
3477      workspace,                         /* workspace vector */      workspace,                         /* workspace vector */
3478      wscount,                           /* size of same */      wscount,                           /* size of same */
3479      re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL), /* ims flags */      0);                                /* function recurse level */
     0,                                 /* function recurse level */  
     0);                                /* regex recurse level */  
3480    
3481    /* Anything other than "no match" means we are done, always; otherwise, carry    /* Anything other than "no match" means we are done, always; otherwise, carry
3482    on only if not anchored. */    on only if not anchored. */
# Line 2433  for (;;) Line 3488  for (;;)
3488    
3489    if (firstline && IS_NEWLINE(current_subject)) break;    if (firstline && IS_NEWLINE(current_subject)) break;
3490    current_subject++;    current_subject++;
3491    if (utf8)  #ifdef SUPPORT_UTF
3492      if (utf)
3493      {      {
3494      while (current_subject < end_subject && (*current_subject & 0xc0) == 0x80)      ACROSSCHAR(current_subject < end_subject, *current_subject,
3495        current_subject++;        current_subject++);
3496      }      }
3497    #endif
3498    if (current_subject > end_subject) break;    if (current_subject > end_subject) break;
3499    
3500    /* If we have just passed a CR and the newline option is CRLF or ANY or    /* If we have just passed a CR and we are now at a LF, and the pattern does
3501    ANYCRLF, and we are now at a LF, advance the match position by one more    not contain any explicit matches for \r or \n, and the newline option is CRLF
3502    character. */    or ANY or ANYCRLF, advance the match position by one more character. */
3503    
3504    if (current_subject[-1] == '\r' &&    if (current_subject[-1] == CHAR_CR &&
3505         (md->nltype == NLTYPE_ANY ||        current_subject < end_subject &&
3506          md->nltype == NLTYPE_ANYCRLF ||        *current_subject == CHAR_NL &&
3507          md->nllen == 2) &&        (re->flags & PCRE_HASCRORLF) == 0 &&
3508         current_subject < end_subject &&          (md->nltype == NLTYPE_ANY ||
3509         *current_subject == '\n')           md->nltype == NLTYPE_ANYCRLF ||
3510             md->nllen == 2))
3511      current_subject++;      current_subject++;
3512    
3513    }   /* "Bumpalong" loop */    }   /* "Bumpalong" loop */

Legend:
Removed from v.168  
changed lines
  Added in v.1041

  ViewVC Help
Powered by ViewVC 1.1.5