# Diff of /code/trunk/pcre_dfa_exec.c

revision 518 by ph10, Tue May 18 15:47:01 2010 UTC revision 1011 by ph10, Sat Aug 25 11:36:15 2012 UTC
7  below for why this module is different).  below for why this module is different).
9                         Written by Philip Hazel                         Written by Philip Hazel
10             Copyright (c) 1997-2010 University of Cambridge             Copyright (c) 1997-2012 University of Cambridge
12  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
13  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 38  POSSIBILITY OF SUCH DAMAGE. Line 38  POSSIBILITY OF SUCH DAMAGE.
38  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
39  */  */
41  /* This module contains the external function pcre_dfa_exec(), which is an  /* This module contains the external function pcre_dfa_exec(), which is an
42  alternative matching function that uses a sort of DFA algorithm (not a true  alternative matching function that uses a sort of DFA algorithm (not a true
43  FSM). This is NOT Perl- compatible, but it has advantages in certain  FSM). This is NOT Perl-compatible, but it has advantages in certain
44  applications. */  applications. */
# Line 113  small value. Non-zero values in the tabl Line 112  small value. Non-zero values in the tabl
112  the character is to be found. ***NOTE*** If the start of this table is  the character is to be found. ***NOTE*** If the start of this table is
113  modified, the three tables that follow must also be modified. */  modified, the three tables that follow must also be modified. */
115  static const uschar coptable[] = {  static const pcre_uint8 coptable[] = {
116    0,                             /* End                                    */    0,                             /* End                                    */
117    0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */    0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */
118    0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */    0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */
# Line 121  static const uschar coptable[] = { Line 120  static const uschar coptable[] = {
120    0, 0,                          /* \P, \p                                 */    0, 0,                          /* \P, \p                                 */
121    0, 0, 0, 0, 0,                 /* \R, \H, \h, \V, \v                     */    0, 0, 0, 0, 0,                 /* \R, \H, \h, \V, \v                     */
122    0,                             /* \X                                     */    0,                             /* \X                                     */
123    0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, \$                      */    0, 0, 0, 0, 0, 0,              /* \Z, \z, ^, ^M, \$, \$M                   */
124    1,                             /* Char                                   */    1,                             /* Char                                   */
125    1,                             /* Charnc                                 */    1,                             /* Chari                                  */
126    1,                             /* not                                    */    1,                             /* not                                    */
127      1,                             /* noti                                   */
128    /* Positive single-char repeats                                          */    /* Positive single-char repeats                                          */
129    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
130    3, 3, 3,                       /* upto, minupto, exact                   */    1+IMM2_SIZE, 1+IMM2_SIZE,      /* upto, minupto                          */
131    1, 1, 1, 3,                    /* *+, ++, ?+, upto+                      */    1+IMM2_SIZE,                   /* exact                                  */
132      1, 1, 1, 1+IMM2_SIZE,          /* *+, ++, ?+, upto+                      */
133      1, 1, 1, 1, 1, 1,              /* *I, *?I, +I, +?I, ?I, ??I              */
134      1+IMM2_SIZE, 1+IMM2_SIZE,      /* upto I, minupto I                      */
135      1+IMM2_SIZE,                   /* exact I                                */
136      1, 1, 1, 1+IMM2_SIZE,          /* *+I, ++I, ?+I, upto+I                  */
137    /* Negative single-char repeats - only for chars < 256                   */    /* Negative single-char repeats - only for chars < 256                   */
138    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */
139    3, 3, 3,                       /* NOT upto, minupto, exact               */    1+IMM2_SIZE, 1+IMM2_SIZE,      /* NOT upto, minupto                      */
140    1, 1, 1, 3,                    /* NOT *+, ++, ?+, updo+                  */    1+IMM2_SIZE,                   /* NOT exact                              */
141      1, 1, 1, 1+IMM2_SIZE,          /* NOT *+, ++, ?+, upto+                  */
142      1, 1, 1, 1, 1, 1,              /* NOT *I, *?I, +I, +?I, ?I, ??I          */
143      1+IMM2_SIZE, 1+IMM2_SIZE,      /* NOT upto I, minupto I                  */
144      1+IMM2_SIZE,                   /* NOT exact I                            */
145      1, 1, 1, 1+IMM2_SIZE,          /* NOT *+I, ++I, ?+I, upto+I              */
146    /* Positive type repeats                                                 */    /* Positive type repeats                                                 */
148    3, 3, 3,                       /* Type upto, minupto, exact              */    1+IMM2_SIZE, 1+IMM2_SIZE,      /* Type upto, minupto                     */
149    1, 1, 1, 3,                    /* Type *+, ++, ?+, upto+                 */    1+IMM2_SIZE,                   /* Type exact                             */
150      1, 1, 1, 1+IMM2_SIZE,          /* Type *+, ++, ?+, upto+                 */
151    /* Character class & ref repeats                                         */    /* Character class & ref repeats                                         */
152    0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */    0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */
153    0, 0,                          /* CRRANGE, CRMINRANGE                    */    0, 0,                          /* CRRANGE, CRMINRANGE                    */
# Line 144  static const uschar coptable[] = { Line 155  static const uschar coptable[] = {
155    0,                             /* NCLASS                                 */    0,                             /* NCLASS                                 */
156    0,                             /* XCLASS - variable length               */    0,                             /* XCLASS - variable length               */
157    0,                             /* REF                                    */    0,                             /* REF                                    */
158      0,                             /* REFI                                   */
159    0,                             /* RECURSE                                */    0,                             /* RECURSE                                */
160    0,                             /* CALLOUT                                */    0,                             /* CALLOUT                                */
161    0,                             /* Alt                                    */    0,                             /* Alt                                    */
162    0,                             /* Ket                                    */    0,                             /* Ket                                    */
163    0,                             /* KetRmax                                */    0,                             /* KetRmax                                */
164    0,                             /* KetRmin                                */    0,                             /* KetRmin                                */
165      0,                             /* KetRpos                                */
166      0,                             /* Reverse                                */
167    0,                             /* Assert                                 */    0,                             /* Assert                                 */
168    0,                             /* Assert not                             */    0,                             /* Assert not                             */
169    0,                             /* Assert behind                          */    0,                             /* Assert behind                          */
170    0,                             /* Assert behind not                      */    0,                             /* Assert behind not                      */
171    0,                             /* Reverse                                */    0, 0,                          /* ONCE, ONCE_NC                          */
172    0, 0, 0, 0,                    /* ONCE, BRA, CBRA, COND                  */    0, 0, 0, 0, 0,                 /* BRA, BRAPOS, CBRA, CBRAPOS, COND       */
173    0, 0, 0,                       /* SBRA, SCBRA, SCOND                     */    0, 0, 0, 0, 0,                 /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND  */
174    0, 0,                          /* CREF, NCREF                            */    0, 0,                          /* CREF, NCREF                            */
175    0, 0,                          /* RREF, NRREF                            */    0, 0,                          /* RREF, NRREF                            */
176    0,                             /* DEF                                    */    0,                             /* DEF                                    */
177    0, 0,                          /* BRAZERO, BRAMINZERO                    */    0, 0, 0,                       /* BRAZERO, BRAMINZERO, BRAPOSZERO        */
178    0, 0, 0,                       /* MARK, PRUNE, PRUNE_ARG,                */    0, 0, 0,                       /* MARK, PRUNE, PRUNE_ARG                 */
179    0, 0, 0, 0,                    /* SKIP, SKIP_ARG, THEN, THEN_ARG,        */    0, 0, 0, 0,                    /* SKIP, SKIP_ARG, THEN, THEN_ARG         */
180    0, 0, 0, 0, 0                  /* COMMIT, FAIL, ACCEPT, CLOSE, SKIPZERO  */    0, 0, 0, 0,                    /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT    */
181      0, 0                           /* CLOSE, SKIPZERO  */
182  };  };
184  /* This table identifies those opcodes that inspect a character. It is used to  /* This table identifies those opcodes that inspect a character. It is used to
# Line 171  remember the fact that a character could Line 186  remember the fact that a character could
186  the subject is reached. ***NOTE*** If the start of this table is modified, the  the subject is reached. ***NOTE*** If the start of this table is modified, the
187  two tables that follow must also be modified. */  two tables that follow must also be modified. */
189  static const uschar poptable[] = {  static const pcre_uint8 poptable[] = {
190    0,                             /* End                                    */    0,                             /* End                                    */
191    0, 0, 0, 1, 1,                 /* \A, \G, \K, \B, \b                     */    0, 0, 0, 1, 1,                 /* \A, \G, \K, \B, \b                     */
192    1, 1, 1, 1, 1, 1,              /* \D, \d, \S, \s, \W, \w                 */    1, 1, 1, 1, 1, 1,              /* \D, \d, \S, \s, \W, \w                 */
# Line 179  static const uschar poptable[] = { Line 194  static const uschar poptable[] = {
194    1, 1,                          /* \P, \p                                 */    1, 1,                          /* \P, \p                                 */
195    1, 1, 1, 1, 1,                 /* \R, \H, \h, \V, \v                     */    1, 1, 1, 1, 1,                 /* \R, \H, \h, \V, \v                     */
196    1,                             /* \X                                     */    1,                             /* \X                                     */
197    0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, \$                      */    0, 0, 0, 0, 0, 0,              /* \Z, \z, ^, ^M, \$, \$M                   */
198    1,                             /* Char                                   */    1,                             /* Char                                   */
199    1,                             /* Charnc                                 */    1,                             /* Chari                                  */
200    1,                             /* not                                    */    1,                             /* not                                    */
201      1,                             /* noti                                   */
202    /* Positive single-char repeats                                          */    /* Positive single-char repeats                                          */
203    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
204    1, 1, 1,                       /* upto, minupto, exact                   */    1, 1, 1,                       /* upto, minupto, exact                   */
205    1, 1, 1, 1,                    /* *+, ++, ?+, upto+                      */    1, 1, 1, 1,                    /* *+, ++, ?+, upto+                      */
206      1, 1, 1, 1, 1, 1,              /* *I, *?I, +I, +?I, ?I, ??I              */
207      1, 1, 1,                       /* upto I, minupto I, exact I             */
208      1, 1, 1, 1,                    /* *+I, ++I, ?+I, upto+I                  */
209    /* Negative single-char repeats - only for chars < 256                   */    /* Negative single-char repeats - only for chars < 256                   */
210    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */
211    1, 1, 1,                       /* NOT upto, minupto, exact               */    1, 1, 1,                       /* NOT upto, minupto, exact               */
212    1, 1, 1, 1,                    /* NOT *+, ++, ?+, upto+                  */    1, 1, 1, 1,                    /* NOT *+, ++, ?+, upto+                  */
213      1, 1, 1, 1, 1, 1,              /* NOT *I, *?I, +I, +?I, ?I, ??I          */
214      1, 1, 1,                       /* NOT upto I, minupto I, exact I         */
215      1, 1, 1, 1,                    /* NOT *+I, ++I, ?+I, upto+I              */
216    /* Positive type repeats                                                 */    /* Positive type repeats                                                 */
217    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */
218    1, 1, 1,                       /* Type upto, minupto, exact              */    1, 1, 1,                       /* Type upto, minupto, exact              */
# Line 202  static const uschar poptable[] = { Line 224  static const uschar poptable[] = {
224    1,                             /* NCLASS                                 */    1,                             /* NCLASS                                 */
225    1,                             /* XCLASS - variable length               */    1,                             /* XCLASS - variable length               */
226    0,                             /* REF                                    */    0,                             /* REF                                    */
227      0,                             /* REFI                                   */
228    0,                             /* RECURSE                                */    0,                             /* RECURSE                                */
229    0,                             /* CALLOUT                                */    0,                             /* CALLOUT                                */
230    0,                             /* Alt                                    */    0,                             /* Alt                                    */
231    0,                             /* Ket                                    */    0,                             /* Ket                                    */
232    0,                             /* KetRmax                                */    0,                             /* KetRmax                                */
233    0,                             /* KetRmin                                */    0,                             /* KetRmin                                */
234      0,                             /* KetRpos                                */
235      0,                             /* Reverse                                */
236    0,                             /* Assert                                 */    0,                             /* Assert                                 */
237    0,                             /* Assert not                             */    0,                             /* Assert not                             */
238    0,                             /* Assert behind                          */    0,                             /* Assert behind                          */
239    0,                             /* Assert behind not                      */    0,                             /* Assert behind not                      */
240    0,                             /* Reverse                                */    0, 0,                          /* ONCE, ONCE_NC                          */
241    0, 0, 0, 0,                    /* ONCE, BRA, CBRA, COND                  */    0, 0, 0, 0, 0,                 /* BRA, BRAPOS, CBRA, CBRAPOS, COND       */
242    0, 0, 0,                       /* SBRA, SCBRA, SCOND                     */    0, 0, 0, 0, 0,                 /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND  */
243    0, 0,                          /* CREF, NCREF                            */    0, 0,                          /* CREF, NCREF                            */
244    0, 0,                          /* RREF, NRREF                            */    0, 0,                          /* RREF, NRREF                            */
245    0,                             /* DEF                                    */    0,                             /* DEF                                    */
246    0, 0,                          /* BRAZERO, BRAMINZERO                    */    0, 0, 0,                       /* BRAZERO, BRAMINZERO, BRAPOSZERO        */
247    0, 0, 0,                       /* MARK, PRUNE, PRUNE_ARG,                */    0, 0, 0,                       /* MARK, PRUNE, PRUNE_ARG                 */
248    0, 0, 0, 0,                    /* SKIP, SKIP_ARG, THEN, THEN_ARG,        */    0, 0, 0, 0,                    /* SKIP, SKIP_ARG, THEN, THEN_ARG         */
249    0, 0, 0, 0, 0                  /* COMMIT, FAIL, ACCEPT, CLOSE, SKIPZERO  */    0, 0, 0, 0,                    /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT    */
250      0, 0                           /* CLOSE, SKIPZERO                        */
251  };  };
252
253  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
254  and \w */  and \w */
256  static const uschar toptable1[] = {  static const pcre_uint8 toptable1[] = {
257    0, 0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
258    ctype_digit, ctype_digit,    ctype_digit, ctype_digit,
259    ctype_space, ctype_space,    ctype_space, ctype_space,
# Line 235  static const uschar toptable1[] = {
0, 0                            /* OP_ANY, OP_ALLANY */
};
261    0, 0                            /* OP_ANY, OP_ALLANY */    0, 0                            /* OP_ANY, OP_ALLANY */
262  };  };
264  static const uschar toptable2[] = {  static const pcre_uint8 toptable2[] = {
265    0, 0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
266    ctype_digit, 0,    ctype_digit, 0,
267    ctype_space, 0,    ctype_space, 0,
# Line 252  these structures in, is a vector of ints Line 278  these structures in, is a vector of ints
278  typedef struct stateblock {  typedef struct stateblock {
279    int offset;                     /* Offset to opcode */    int offset;                     /* Offset to opcode */
280    int count;                      /* Count for repeats */    int count;                      /* Count for repeats */
int ims;                        /* ims flag bits */
281    int data;                       /* Some use extra data */    int data;                       /* Some use extra data */
282  } stateblock;  } stateblock;
284  #define INTS_PER_STATEBLOCK  (sizeof(stateblock)/sizeof(int))  #define INTS_PER_STATEBLOCK  (int)(sizeof(stateblock)/sizeof(int))
287  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
# Line 275  Returns:       nothing
*/

static void
300  */  */
301
302  static void  static void
303  pchars(unsigned char *p, int length, FILE *f)  pchars(const pcre_uchar *p, int length, FILE *f)
304  {  {
305  int c;  int c;
306  while (length-- > 0)  while (length-- > 0)
# Line 308  Arguments: Line 333  Arguments:
333    offsetcount       size of same    offsetcount       size of same
334    workspace         vector of workspace    workspace         vector of workspace
335    wscount           size of same    wscount           size of same
336    rlevel            function call recursion level    rlevel            function call recursion level
recursing         regex recursive call level
338  Returns:            > 0 => number of match offset pairs placed in offsets  Returns:            > 0 => number of match offset pairs placed in offsets
339                      = 0 => offsets overflowed; longest matches are present                      = 0 => offsets overflowed; longest matches are present
# Line 325  for the current character, one for the f
    { \
    next_active_state->offset = (x); \
    next_active_state->count  = (y); \
348      { \      { \
349      next_active_state->offset = (x); \      next_active_state->offset = (x); \
350      next_active_state->count  = (y); \      next_active_state->count  = (y); \
next_active_state->ims    = ims; \
351      next_active_state++; \      next_active_state++; \
352      DPRINTF(("%.*sADD_ACTIVE(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \      DPRINTF(("%.*sADD_ACTIVE(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \
353      } \      } \
# Line 336  for the current character, one for the f
    { \
    next_active_state->offset = (x); \
    next_active_state->count  = (y); \
358      { \      { \
359      next_active_state->offset = (x); \      next_active_state->offset = (x); \
360      next_active_state->count  = (y); \      next_active_state->count  = (y); \
next_active_state->ims    = ims; \
361      next_active_state->data   = (z); \      next_active_state->data   = (z); \
362      next_active_state++; \      next_active_state++; \
363      DPRINTF(("%.*sADD_ACTIVE_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \      DPRINTF(("%.*sADD_ACTIVE_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \
# Line 348  for the current character, one for the f
    { \
    next_new_state->offset = (x); \
    next_new_state->count  = (y); \
369      { \      { \
370      next_new_state->offset = (x); \      next_new_state->offset = (x); \
371      next_new_state->count  = (y); \      next_new_state->count  = (y); \
next_new_state->ims    = ims; \
372      next_new_state++; \      next_new_state++; \
373      DPRINTF(("%.*sADD_NEW(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \      DPRINTF(("%.*sADD_NEW(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \
374      } \      } \
# Line 359  for the current character, one for the f
    { \
    next_new_state->offset = (x); \
    next_new_state->count  = (y); \
379      { \      { \
380      next_new_state->offset = (x); \      next_new_state->offset = (x); \
381      next_new_state->count  = (y); \      next_new_state->count  = (y); \
next_new_state->ims    = ims; \
382      next_new_state->data   = (z); \      next_new_state->data   = (z); \
383      next_new_state++; \      next_new_state++; \
384      DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \      DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d) line %d\n", rlevel*2-2, SP, \
385          (x), (y), (z), __LINE__)); \
386      } \      } \
387    else return PCRE_ERROR_DFA_WSSIZE    else return PCRE_ERROR_DFA_WSSIZE
# Line 371  for the current character, one for the f
static int
internal_dfa_exec(
  dfa_match_data *md,
391  static int  static int
392  internal_dfa_exec(  internal_dfa_exec(
393    dfa_match_data *md,    dfa_match_data *md,
394    const uschar *this_start_code,    const pcre_uchar *this_start_code,
395    const uschar *current_subject,    const pcre_uchar *current_subject,
396    int start_offset,    int start_offset,
397    int *offsets,    int *offsets,
398    int offsetcount,    int offsetcount,
399    int *workspace,    int *workspace,
400    int wscount,    int wscount,
401    int ims,    int  rlevel)
int  rlevel,
int  recursing)
402  {  {
403  stateblock *active_states, *new_states, *temp_states;  stateblock *active_states, *new_states, *temp_states;
404  stateblock *next_active_state, *next_new_state;  stateblock *next_active_state, *next_new_state;
406  const uschar *ctypes, *lcc, *fcc;  const pcre_uint8 *ctypes, *lcc, *fcc;
407  const uschar *ptr;  const pcre_uchar *ptr;
408  const uschar *end_code, *first_op;  const pcre_uchar *end_code, *first_op;
410    dfa_recursion_info new_recursive;
411
412  int active_count, new_count, match_count;  int active_count, new_count, match_count;
413
414  /* Some fields in the md block are frequently referenced, so we load them into  /* Some fields in the md block are frequently referenced, so we load them into
415  independent variables in the hope that this will perform better. */  independent variables in the hope that this will perform better. */
416
417  const uschar *start_subject = md->start_subject;  const pcre_uchar *start_subject = md->start_subject;
418  const uschar *end_subject = md->end_subject;  const pcre_uchar *end_subject = md->end_subject;
419  const uschar *start_code = md->start_code;  const pcre_uchar *start_code = md->start_code;
420
421  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
422  BOOL utf8 = (md->poptions & PCRE_UTF8) != 0;  BOOL utf = (md->poptions & PCRE_UTF8) != 0;
423  #else  #else
424  BOOL utf8 = FALSE;  BOOL utf = FALSE;
425  #endif  #endif
427    BOOL reset_could_continue = FALSE;
428
429  rlevel++;  rlevel++;
430  offsetcount &= (-2);  offsetcount &= (-2);
431
# Line 412  wscount = (wscount - (wscount % (INTS_PE
          (2 * INTS_PER_STATEBLOCK);

DPRINTF(("\n%.*s---------------------\n"
435
437    "%.*sCall to internal_dfa_exec f=%d r=%d\n",    "%.*sCall to internal_dfa_exec f=%d\n",
438    rlevel*2-2, SP, rlevel*2-2, SP, rlevel, recursing));    rlevel*2-2, SP, rlevel*2-2, SP, rlevel));
439
440  ctypes = md->tables + ctypes_offset;  ctypes = md->tables + ctypes_offset;
441  lcc = md->tables + lcc_offset;  lcc = md->tables + lcc_offset;
# Line 426  next_new_state = new_states = active_sta
new_count = 0;

first_op = this_start_code + 1 + LINK_SIZE +
448  new_count = 0;  new_count = 0;
449
450  first_op = this_start_code + 1 + LINK_SIZE +  first_op = this_start_code + 1 + LINK_SIZE +
451    ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA)? 2:0);    ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
452        *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
453        ? IMM2_SIZE:0);
454
455  /* The first thing in any (sub) pattern is a bracket of some sort. Push all  /* The first thing in any (sub) pattern is a bracket of some sort. Push all
456  the alternative states onto the list, and find out where the end is. This  the alternative states onto the list, and find out where the end is. This
# Line 454  if (*first_op == OP_REVERSE) Line 478  if (*first_op == OP_REVERSE)
478    /* If we can't go back the amount required for the longest lookbehind    /* If we can't go back the amount required for the longest lookbehind
479    pattern, go back as far as we can; some alternatives may still be viable. */    pattern, go back as far as we can; some alternatives may still be viable. */
480
481  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
482    /* In character mode we have to step back character by character */    /* In character mode we have to step back character by character */
483
484    if (utf8)    if (utf)
485      {      {
486      for (gone_back = 0; gone_back < max_back; gone_back++)      for (gone_back = 0; gone_back < max_back; gone_back++)
487        {        {
488        if (current_subject <= start_subject) break;        if (current_subject <= start_subject) break;
489        current_subject--;        current_subject--;
490        while (current_subject > start_subject &&        ACROSSCHAR(current_subject > start_subject, *current_subject, current_subject--);
(*current_subject & 0xc0) == 0x80)
current_subject--;
491        }        }
492      }      }
493    else    else
# Line 475  if (*first_op == OP_REVERSE)

    {
    gone_back = (current_subject - max_back < start_subject)?
497
498      {      {
499      gone_back = (current_subject - max_back < start_subject)?      gone_back = (current_subject - max_back < start_subject)?
500        current_subject - start_subject : max_back;        (int)(current_subject - start_subject) : max_back;
501      current_subject -= gone_back;      current_subject -= gone_back;
502      }      }
503
# Line 492  if (*first_op == OP_REVERSE)

    if (back <= gone_back)
      {
516        {        {
517        int bstate = end_code - start_code + 2 + 2*LINK_SIZE;        int bstate = (int)(end_code - start_code + 2 + 2*LINK_SIZE);
519        }        }
520      end_code += GET(end_code, 1);      end_code += GET(end_code, 1);
# Line 525  else Line 547  else
547    else    else
548      {      {
549      int length = 1 + LINK_SIZE +      int length = 1 + LINK_SIZE +
550        ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA)? 2:0);        ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
551            *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
552            ? IMM2_SIZE:0);
553      do      do
554        {        {
555        ADD_NEW(end_code - start_code + length, 0);        ADD_NEW((int)(end_code - start_code + length), 0);
556        end_code += GET(end_code, 1);        end_code += GET(end_code, 1);
558        }        }
# Line 538  else Line 562  else
563  workspace[0] = 0;    /* Bit indicating which vector is current */  workspace[0] = 0;    /* Bit indicating which vector is current */
564
565  DPRINTF(("%.*sEnd state = %d\n", rlevel*2-2, SP, end_code - start_code));  DPRINTF(("%.*sEnd state = %d\n", rlevel*2-2, SP, (int)(end_code - start_code)));
566
567  /* Loop for scanning the subject */  /* Loop for scanning the subject */
568
# Line 549  for (;;) Line 573  for (;;)
573    int clen, dlen;    int clen, dlen;
574    unsigned int c, d;    unsigned int c, d;
575    int forced_fail = 0;    int forced_fail = 0;
576    BOOL could_continue = FALSE;    BOOL partial_newline = FALSE;
577      BOOL could_continue = reset_could_continue;
578      reset_could_continue = FALSE;
580    /* Make the new state list into the active state list and empty the    /* Make the new state list into the active state list and empty the
581    new state list. */    new state list. */
# Line 565  for (;;) Line 591  for (;;)
591
592  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
593    printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP);    printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP);
594    pchars((uschar *)ptr, strlen((char *)ptr), stdout);    pchars(ptr, STRLEN_UC(ptr), stdout);
595    printf("\"\n");    printf("\"\n");
596
597    printf("%.*sActive states: ", rlevel*2-2, SP);    printf("%.*sActive states: ", rlevel*2-2, SP);
# Line 585  for (;;)

  if (ptr < end_subject)
    {
611
612    if (ptr < end_subject)    if (ptr < end_subject)
613      {      {
614      clen = 1;        /* Number of bytes in the character */      clen = 1;        /* Number of data items in the character */
615  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
616      if (utf8) { GETCHARLEN(c, ptr, clen); } else      if (utf) { GETCHARLEN(c, ptr, clen); } else
617  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
618      c = *ptr;      c = *ptr;
619      }      }
# Line 605  for (;;) Line 631  for (;;)
631    for (i = 0; i < active_count; i++)    for (i = 0; i < active_count; i++)
632      {      {
633      stateblock *current_state = active_states + i;      stateblock *current_state = active_states + i;
635        const pcre_uchar *code;
636      int state_offset = current_state->offset;      int state_offset = current_state->offset;
637      int count, codevalue, rrc;      int count, codevalue, rrc;
638
# Line 616  for (;;) Line 643  for (;;)
643          else printf("0x%02x\n", c);          else printf("0x%02x\n", c);
644  #endif  #endif
645
646      /* A negative offset is a special case meaning "hold off going to this      /* A negative offset is a special case meaning "hold off going to this
647      (negated) state until the number of characters in the data field have      (negated) state until the number of characters in the data field have
648      been skipped". */      been skipped". If the could_continue flag was passed over from a previous
649        state, arrange for it to passed on. */
651      if (state_offset < 0)      if (state_offset < 0)
652        {        {
# Line 631  for (;;) Line 655  for (;;)
657            current_state->data - 1);            current_state->data - 1);
658            if (could_continue) reset_could_continue = TRUE;
660          }          }
661        else        else
# Line 670  for (;;) Line 695  for (;;)
695      permitted.      permitted.
696
697      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
698      argument that is not a data character - but is always one byte long. We      argument that is not a data character - but is always one byte long because
699      have to take special action to deal with  \P, \p, \H, \h, \V, \v and \X in      the values are small. We have to take special action to deal with  \P, \p,
700      this case. To keep the other cases fast, convert these ones to new opcodes.      \H, \h, \V, \v and \X in this case. To keep the other cases fast, convert
701      */      these ones to new opcodes. */
702
703      if (coptable[codevalue] > 0)      if (coptable[codevalue] > 0)
704        {        {
705        dlen = 1;        dlen = 1;
706  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
707        if (utf8) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else        if (utf) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else
709        d = code[coptable[codevalue]];        d = code[coptable[codevalue]];
710        if (codevalue >= OP_TYPESTAR)        if (codevalue >= OP_TYPESTAR)
711          {          {
# Line 725  for (;;) Line 750  for (;;)
750
751  /* ========================================================================== */  /* ========================================================================== */
752        /* Reached a closing bracket. If not at the end of the pattern, carry        /* Reached a closing bracket. If not at the end of the pattern, carry
753        on with the next opcode. Otherwise, unless we have an empty string and        on with the next opcode. For repeating opcodes, also add the repeat
754          state. Note that KETRPOS will always be encountered at the end of the
756          using recursive calls. Thus, it never adds any new states.
757
758          At the end of the (sub)pattern, unless we have an empty string and
759        PCRE_NOTEMPTY is set, or PCRE_NOTEMPTY_ATSTART is set and we are at the        PCRE_NOTEMPTY is set, or PCRE_NOTEMPTY_ATSTART is set and we are at the
760        start of the subject, save the match data, shifting up all previous        start of the subject, save the match data, shifting up all previous
761        matches so we always have the longest first. */        matches so we always have the longest first. */
# Line 733  for (;;) Line 763  for (;;)
763        case OP_KET:        case OP_KET:
764        case OP_KETRMIN:        case OP_KETRMIN:
765        case OP_KETRMAX:        case OP_KETRMAX:
766          case OP_KETRPOS:
767        if (code != end_code)        if (code != end_code)
768          {          {
# Line 749  for (;;) Line 780  for (;;)
780                  current_subject > start_subject + md->start_offset)))                  current_subject > start_subject + md->start_offset)))
781            {            {
782            if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;            if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;
783              else if (match_count > 0 && ++match_count * 2 >= offsetcount)              else if (match_count > 0 && ++match_count * 2 > offsetcount)
784                match_count = 0;                match_count = 0;
785            count = ((match_count == 0)? offsetcount : match_count * 2) - 2;            count = ((match_count == 0)? offsetcount : match_count * 2) - 2;
786            if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int));            if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int));
787            if (offsetcount >= 2)            if (offsetcount >= 2)
788              {              {
789              offsets[0] = current_subject - start_subject;              offsets[0] = (int)(current_subject - start_subject);
790              offsets[1] = ptr - start_subject;              offsets[1] = (int)(ptr - start_subject);
791              DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,              DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,
792                offsets[1] - offsets[0], current_subject));                offsets[1] - offsets[0], (char *)current_subject));
793              }              }
794            if ((md->moptions & PCRE_DFA_SHORTEST) != 0)            if ((md->moptions & PCRE_DFA_SHORTEST) != 0)
795              {              {
# Line 778  for (;;) Line 809  for (;;)
809        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
810        case OP_ALT:        case OP_ALT:
811        do { code += GET(code, 1); } while (*code == OP_ALT);        do { code += GET(code, 1); } while (*code == OP_ALT);
813        break;        break;
814
815        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
# Line 786  for (;;) Line 817  for (;;)
817        case OP_SBRA:        case OP_SBRA:
818        do        do
819          {          {
821          code += GET(code, 1);          code += GET(code, 1);
822          }          }
823        while (*code == OP_ALT);        while (*code == OP_ALT);
# Line 795  for (;;) Line 826  for (;;)
826        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
827        case OP_CBRA:        case OP_CBRA:
828        case OP_SCBRA:        case OP_SCBRA:
830        code += GET(code, 1);        code += GET(code, 1);
831        while (*code == OP_ALT)        while (*code == OP_ALT)
832          {          {
834          code += GET(code, 1);          code += GET(code, 1);
835          }          }
836        break;        break;
# Line 810  for (;;) Line 841  for (;;)
842        code += 1 + GET(code, 2);        code += 1 + GET(code, 2);
843        while (*code == OP_ALT) code += GET(code, 1);        while (*code == OP_ALT) code += GET(code, 1);
845        break;        break;
846
847        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
848        case OP_SKIPZERO:        case OP_SKIPZERO:
849        code += 1 + GET(code, 2);        code += 1 + GET(code, 2);
850        while (*code == OP_ALT) code += GET(code, 1);        while (*code == OP_ALT) code += GET(code, 1);
852        break;        break;
854        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
855        case OP_CIRC:        case OP_CIRC:
856        if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||        if (ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0)
((ims & PCRE_MULTILINE) != 0 &&
ptr != end_subject &&
WAS_NEWLINE(ptr)))
857          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
858        break;        break;
859
860        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
861        case OP_EOD:        case OP_CIRCM:
862        if (ptr >= end_subject) { ADD_ACTIVE(state_offset + 1, 0); }        if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||
863              (ptr != end_subject && WAS_NEWLINE(ptr)))
864            { ADD_ACTIVE(state_offset + 1, 0); }
865        break;        break;
867        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
868        case OP_OPT:        case OP_EOD:
869        ims = code[1];        if (ptr >= end_subject)
870        ADD_ACTIVE(state_offset + 2, 0);          {
871            if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
872              could_continue = TRUE;
873            else { ADD_ACTIVE(state_offset + 1, 0); }
874            }
875        break;        break;
877        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
# Line 860  for (;;) Line 894  for (;;)
894        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
895        case OP_ANY:        case OP_ANY:
896        if (clen > 0 && !IS_NEWLINE(ptr))        if (clen > 0 && !IS_NEWLINE(ptr))
897          { ADD_NEW(state_offset + 1, 0); }          {
898            if (ptr + 1 >= md->end_subject &&
899                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
900                NLBLOCK->nltype == NLTYPE_FIXED &&
901                NLBLOCK->nllen == 2 &&
902                c == NLBLOCK->nl[0])
903              {
904              could_continue = partial_newline = TRUE;
905              }
906            else
907              {
909              }
910            }
911        break;        break;
913        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
# Line 871  for (;;) Line 918  for (;;)
918
919        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
920        case OP_EODN:        case OP_EODN:
921        if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - md->nllen))        if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
922            could_continue = TRUE;
923          else if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - md->nllen))
924          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
925        break;        break;
# Line 879  for (;;) Line 928  for (;;)
928        case OP_DOLL:        case OP_DOLL:
929        if ((md->moptions & PCRE_NOTEOL) == 0)        if ((md->moptions & PCRE_NOTEOL) == 0)
930          {          {
931          if (clen == 0 ||          if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
932              could_continue = TRUE;
933            else if (clen == 0 ||
934              ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr) &&              ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr) &&
935                 ((ims & PCRE_MULTILINE) != 0 || ptr == end_subject - md->nllen)                 (ptr == end_subject - md->nllen)
936              ))              ))
937            { ADD_ACTIVE(state_offset + 1, 0); }            { ADD_ACTIVE(state_offset + 1, 0); }
938            else if (ptr + 1 >= md->end_subject &&
939                     (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
940                     NLBLOCK->nltype == NLTYPE_FIXED &&
941                     NLBLOCK->nllen == 2 &&
942                     c == NLBLOCK->nl[0])
943              {
944              if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
945                {
946                reset_could_continue = TRUE;
947                ADD_NEW_DATA(-(state_offset + 1), 0, 1);
948                }
949              else could_continue = partial_newline = TRUE;
950              }
951          }          }
953
954          /*-----------------------------------------------------------------*/
955          case OP_DOLLM:
956          if ((md->moptions & PCRE_NOTEOL) == 0)
957            {
958            if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
959              could_continue = TRUE;
960            else if (clen == 0 ||
961                ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr)))
962              { ADD_ACTIVE(state_offset + 1, 0); }
963            else if (ptr + 1 >= md->end_subject &&
964                     (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
965                     NLBLOCK->nltype == NLTYPE_FIXED &&
966                     NLBLOCK->nllen == 2 &&
967                     c == NLBLOCK->nl[0])
968              {
969              if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
970                {
971                reset_could_continue = TRUE;
972                ADD_NEW_DATA(-(state_offset + 1), 0, 1);
973                }
974              else could_continue = partial_newline = TRUE;
975              }
976            }
977          else if (IS_NEWLINE(ptr))
978          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
979        break;        break;
# Line 916  for (;;) Line 1005  for (;;)
1005
1006          if (ptr > start_subject)          if (ptr > start_subject)
1007            {            {
1008            const uschar *temp = ptr - 1;            const pcre_uchar *temp = ptr - 1;
1009            if (temp < md->start_used_ptr) md->start_used_ptr = temp;            if (temp < md->start_used_ptr) md->start_used_ptr = temp;
1010  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
1011            if (utf8) BACKCHAR(temp);            if (utf) { BACKCHAR(temp); }
1012  #endif  #endif
1013            GETCHARTEST(d, temp);            GETCHARTEST(d, temp);
1014  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1015            if ((md->poptions & PCRE_UCP) != 0)            if ((md->poptions & PCRE_UCP) != 0)
1016              {              {
1017              if (d == '_') left_word = TRUE; else              if (d == '_') left_word = TRUE; else
1018                {                {
1019                int cat = UCD_CATEGORY(d);                int cat = UCD_CATEGORY(d);
1020                left_word = (cat == ucp_L || cat == ucp_N);                left_word = (cat == ucp_L || cat == ucp_N);
1021                }                }
1022              }              }
1023            else            else
1024  #endif  #endif
1025            left_word = d < 256 && (ctypes[d] & ctype_word) != 0;            left_word = d < 256 && (ctypes[d] & ctype_word) != 0;
1026            }            }
1027          else left_word = FALSE;          else left_word = FALSE;
1029          if (clen > 0)          if (clen > 0)
1030            {            {
1031  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1032            if ((md->poptions & PCRE_UCP) != 0)            if ((md->poptions & PCRE_UCP) != 0)
1033              {              {
1034              if (c == '_') right_word = TRUE; else              if (c == '_') right_word = TRUE; else
1035                {                {
1036                int cat = UCD_CATEGORY(c);                int cat = UCD_CATEGORY(c);
1037                right_word = (cat == ucp_L || cat == ucp_N);                right_word = (cat == ucp_L || cat == ucp_N);
1038                }                }
1039              }              }
1040            else            else
1041  #endif  #endif
1042            right_word = c < 256 && (ctypes[c] & ctype_word) != 0;            right_word = c < 256 && (ctypes[c] & ctype_word) != 0;
1043            }            }
1044          else right_word = FALSE;          else right_word = FALSE;
1045
# Line 979  for (;;) Line 1068  for (;;)
1068            break;            break;
1069
1070            case PT_LAMP:            case PT_LAMP:
1071            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
1072                 prop->chartype == ucp_Lt;                 prop->chartype == ucp_Lt;
1073            break;            break;
1074
1075            case PT_GC:            case PT_GC:
1076            OK = _pcre_ucp_gentype[prop->chartype] == code[2];            OK = PRIV(ucp_gentype)[prop->chartype] == code[2];
1077            break;            break;
1078
1079            case PT_PC:            case PT_PC:
# Line 994  for (;;) Line 1083  for (;;)
1083            case PT_SC:            case PT_SC:
1084            OK = prop->script == code[2];            OK = prop->script == code[2];
1085            break;            break;
1086
1087            /* These are specials for combination cases. */            /* These are specials for combination cases. */
1088
1089            case PT_ALNUM:            case PT_ALNUM:
1090            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1091                 _pcre_ucp_gentype[prop->chartype] == ucp_N;                 PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1092            break;            break;
1093
1094            case PT_SPACE:    /* Perl space */            case PT_SPACE:    /* Perl space */
1095            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1096                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
1097            break;            break;
1098
1099            case PT_PXSPACE:  /* POSIX space */            case PT_PXSPACE:  /* POSIX space */
1100            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1101                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
1102                 c == CHAR_FF || c == CHAR_CR;                 c == CHAR_FF || c == CHAR_CR;
1103            break;            break;
1104
1105            case PT_WORD:            case PT_WORD:
1106            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1107                 _pcre_ucp_gentype[prop->chartype] == ucp_N ||                 PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1108                 c == CHAR_UNDERSCORE;                 c == CHAR_UNDERSCORE;
1109            break;            break;
1110
1112
# Line 1046  for (;;) Line 1135  for (;;)
1135        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1136        if (clen > 0)        if (clen > 0)
1137          {          {
1138          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1139                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1140                NLBLOCK->nltype == NLTYPE_FIXED &&
1141                NLBLOCK->nllen == 2 &&
1142                c == NLBLOCK->nl[0])
1143              {
1144              could_continue = partial_newline = TRUE;
1145              }
1146            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1147              (c < 256 &&              (c < 256 &&
1148                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1149                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
# Line 1069  for (;;) Line 1166  for (;;)
1167        if (clen > 0)        if (clen > 0)
1168          {          {
1169          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1170                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1171                NLBLOCK->nltype == NLTYPE_FIXED &&
1172                NLBLOCK->nllen == 2 &&
1173                c == NLBLOCK->nl[0])
1174              {
1175              could_continue = partial_newline = TRUE;
1176              }
1177            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1178              (c < 256 &&              (c < 256 &&
1179                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1180                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
# Line 1091  for (;;)

      if (clen > 0)
        {
1198          {          {
1199          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1200                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1201                NLBLOCK->nltype == NLTYPE_FIXED &&
1202                NLBLOCK->nllen == 2 &&
1203                c == NLBLOCK->nl[0])
1204              {
1205              could_continue = partial_newline = TRUE;
1206              }
1207            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1208              (c < 256 &&              (c < 256 &&
1209                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1210                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
# Line 1111  for (;;) Line 1224  for (;;)
1224        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1225        if (clen > 0)        if (clen > 0)
1226          {          {
1227          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1228                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1229                NLBLOCK->nltype == NLTYPE_FIXED &&
1230                NLBLOCK->nllen == 2 &&
1231                c == NLBLOCK->nl[0])
1232              {
1233              could_continue = partial_newline = TRUE;
1234              }
1235            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1236              (c < 256 &&              (c < 256 &&
1237                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1238                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
1240            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1241              { ADD_NEW(state_offset + 4, 0); }              { ADD_NEW(state_offset + 1 + IMM2_SIZE + 1, 0); }
1242            else            else
1244            }            }
# Line 1128  for (;;) Line 1249  for (;;)
1249        case OP_TYPEUPTO:        case OP_TYPEUPTO:
1250        case OP_TYPEMINUPTO:        case OP_TYPEMINUPTO:
1251        case OP_TYPEPOSUPTO:        case OP_TYPEPOSUPTO:
# Line 1128  for (;;)
      case OP_TYPEUPTO:
      case OP_TYPEMINUPTO:
      case OP_TYPEPOSUPTO:
1254        if (clen > 0)        if (clen > 0)
1255          {          {
1256          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1257                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1258                NLBLOCK->nltype == NLTYPE_FIXED &&
1259                NLBLOCK->nllen == 2 &&
1260                c == NLBLOCK->nl[0])
1261              {
1262              could_continue = partial_newline = TRUE;
1263              }
1264            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1265              (c < 256 &&              (c < 256 &&
1266                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1267                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
# Line 1143  for (;;) Line 1272  for (;;)
1272              next_active_state--;              next_active_state--;
1273              }              }
1274            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1275              { ADD_NEW(state_offset + 4, 0); }              { ADD_NEW(state_offset + 2 + IMM2_SIZE, 0); }
1276            else            else
1278            }            }
1302            break;            break;
1303
1304            case PT_LAMP:            case PT_LAMP:
1305            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
1306              prop->chartype == ucp_Lt;              prop->chartype == ucp_Lt;
1307            break;            break;
1308
1309            case PT_GC:            case PT_GC:
1310            OK = _pcre_ucp_gentype[prop->chartype] == code[3];            OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
1311            break;            break;
1312
1313            case PT_PC:            case PT_PC:
# Line 1190  for (;;) Line 1319  for (;;)
1319            break;            break;
1320
1321            /* These are specials for combination cases. */            /* These are specials for combination cases. */
1322
1323            case PT_ALNUM:            case PT_ALNUM:
1324            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1325                 _pcre_ucp_gentype[prop->chartype] == ucp_N;                 PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1326            break;            break;
1327
1328            case PT_SPACE:    /* Perl space */            case PT_SPACE:    /* Perl space */
1329            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1330                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
1331            break;            break;
1332
1333            case PT_PXSPACE:  /* POSIX space */            case PT_PXSPACE:  /* POSIX space */
1334            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1335                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
1336                 c == CHAR_FF || c == CHAR_CR;                 c == CHAR_FF || c == CHAR_CR;
1337            break;            break;
1338
1339            case PT_WORD:            case PT_WORD:
1340            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1341                 _pcre_ucp_gentype[prop->chartype] == ucp_N ||                 PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1342                 c == CHAR_UNDERSCORE;                 c == CHAR_UNDERSCORE;
1343            break;            break;
1344
1345            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
1346
# Line 1239  for (;;) Line 1368  for (;;)
1368        case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:        case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
1369        count = current_state->count;  /* Already matched */        count = current_state->count;  /* Already matched */
1370        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1371        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0)
1372          {          {
1373          const uschar *nptr = ptr + clen;          int lgb, rgb;
1374            const pcre_uchar *nptr = ptr + clen;
1375          int ncount = 0;          int ncount = 0;
1376          if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)          if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
1377            {            {
1378            active_count--;           /* Remove non-match possibility */            active_count--;           /* Remove non-match possibility */
1379            next_active_state--;            next_active_state--;
1380            }            }
1381            lgb = UCD_GRAPHBREAK(c);
1382          while (nptr < end_subject)          while (nptr < end_subject)
1383            {            {
1384            int nd;            dlen = 1;
1385            int ndlen = 1;            if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
1386            GETCHARLEN(nd, nptr, ndlen);            rgb = UCD_GRAPHBREAK(d);
1387            if (UCD_CATEGORY(nd) != ucp_M) break;            if (PRIV(ucp_gbtable)[lgb * ucp_gbCount + rgb] == 0) break;
1388            ncount++;            ncount++;
1389            nptr += ndlen;            lgb = rgb;
1390              nptr += dlen;
1391            }            }
1392          count++;          count++;
# Line 1420  for (;;) Line 1552  for (;;)
1552            break;            break;
1553
1554            case PT_LAMP:            case PT_LAMP:
1555            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
1556              prop->chartype == ucp_Lt;              prop->chartype == ucp_Lt;
1557            break;            break;
1558
1559            case PT_GC:            case PT_GC:
1560            OK = _pcre_ucp_gentype[prop->chartype] == code[3];            OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
1561            break;            break;
1562
1563            case PT_PC:            case PT_PC:
# Line 1435  for (;;) Line 1567  for (;;)
1567            case PT_SC:            case PT_SC:
1568            OK = prop->script == code[3];            OK = prop->script == code[3];
1569            break;            break;
1570
1571            /* These are specials for combination cases. */            /* These are specials for combination cases. */
1572
1573            case PT_ALNUM:            case PT_ALNUM:
1574            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1575                 _pcre_ucp_gentype[prop->chartype] == ucp_N;                 PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1576            break;            break;
1577
1578            case PT_SPACE:    /* Perl space */            case PT_SPACE:    /* Perl space */
1579            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1580                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
1581            break;            break;
1582
1583            case PT_PXSPACE:  /* POSIX space */            case PT_PXSPACE:  /* POSIX space */
1584            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1585                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
1586                 c == CHAR_FF || c == CHAR_CR;                 c == CHAR_FF || c == CHAR_CR;
1587            break;            break;
1588
1589            case PT_WORD:            case PT_WORD:
1590            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1591                 _pcre_ucp_gentype[prop->chartype] == ucp_N ||                 PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1592                 c == CHAR_UNDERSCORE;                 c == CHAR_UNDERSCORE;
1593            break;            break;
1594
1595            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
1596
# Line 1495  for (;;)
      QS2:
1627        QS2:        QS2:
1628
1630        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0)
1631          {          {
1632          const uschar *nptr = ptr + clen;          int lgb, rgb;
1633            const pcre_uchar *nptr = ptr + clen;
1634          int ncount = 0;          int ncount = 0;
1635          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
1636              codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)              codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)
# Line 1505  for (;;) Line 1638  for (;;)
1638            active_count--;           /* Remove non-match possibility */            active_count--;           /* Remove non-match possibility */
1639            next_active_state--;            next_active_state--;
1640            }            }
1641            lgb = UCD_GRAPHBREAK(c);
1642          while (nptr < end_subject)          while (nptr < end_subject)
1643            {            {
1644            int nd;            dlen = 1;
1645            int ndlen = 1;            if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
1646            GETCHARLEN(nd, nptr, ndlen);            rgb = UCD_GRAPHBREAK(d);
1647            if (UCD_CATEGORY(nd) != ucp_M) break;            if (PRIV(ucp_gbtable)[lgb * ucp_gbCount + rgb] == 0) break;
1648            ncount++;            ncount++;
1649            nptr += ndlen;            lgb = rgb;
1650              nptr += dlen;
1651            }            }
1653          }          }
# Line 1679  for (;;) Line 1814  for (;;)
1814        case OP_PROP_EXTRA + OP_TYPEMINUPTO:        case OP_PROP_EXTRA + OP_TYPEMINUPTO:
1815        case OP_PROP_EXTRA + OP_TYPEPOSUPTO:        case OP_PROP_EXTRA + OP_TYPEPOSUPTO:
1816        if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)
1817          { ADD_ACTIVE(state_offset + 6, 0); }          { ADD_ACTIVE(state_offset + 1 + IMM2_SIZE + 3, 0); }
1818        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1819        if (clen > 0)        if (clen > 0)
1820          {          {
1821          BOOL OK;          BOOL OK;
1822          const ucd_record * prop = GET_UCD(c);          const ucd_record * prop = GET_UCD(c);
1823          switch(code[4])          switch(code[1 + IMM2_SIZE + 1])
1824            {            {
1825            case PT_ANY:            case PT_ANY:
1826            OK = TRUE;            OK = TRUE;
1827            break;            break;
1828
1829            case PT_LAMP:            case PT_LAMP:
1830            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
1831              prop->chartype == ucp_Lt;              prop->chartype == ucp_Lt;
1832            break;            break;
1833
1834            case PT_GC:            case PT_GC:
1835            OK = _pcre_ucp_gentype[prop->chartype] == code[5];            OK = PRIV(ucp_gentype)[prop->chartype] == code[1 + IMM2_SIZE + 2];
1836            break;            break;
1837
1838            case PT_PC:            case PT_PC:
1839            OK = prop->chartype == code[5];            OK = prop->chartype == code[1 + IMM2_SIZE + 2];
1840            break;            break;
1841
1842            case PT_SC:            case PT_SC:
1843            OK = prop->script == code[5];            OK = prop->script == code[1 + IMM2_SIZE + 2];
1844            break;            break;
1845
1846            /* These are specials for combination cases. */            /* These are specials for combination cases. */
1847
1848            case PT_ALNUM:            case PT_ALNUM:
1849            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1850                 _pcre_ucp_gentype[prop->chartype] == ucp_N;                 PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1851            break;            break;
1852
1853            case PT_SPACE:    /* Perl space */            case PT_SPACE:    /* Perl space */
1854            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1855                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
1856            break;            break;
1857
1858            case PT_PXSPACE:  /* POSIX space */            case PT_PXSPACE:  /* POSIX space */
1859            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1860                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
1861                 c == CHAR_FF || c == CHAR_CR;                 c == CHAR_FF || c == CHAR_CR;
1862            break;            break;
1863
1864            case PT_WORD:            case PT_WORD:
1865            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1866                 _pcre_ucp_gentype[prop->chartype] == ucp_N ||                 PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1867                 c == CHAR_UNDERSCORE;                 c == CHAR_UNDERSCORE;
1868            break;            break;
1869
1870            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
1871
# Line 1747  for (;;) Line 1882  for (;;)
1882              next_active_state--;              next_active_state--;
1883              }              }
1884            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1885              { ADD_NEW(state_offset + 6, 0); }              { ADD_NEW(state_offset + 1 + IMM2_SIZE + 3, 0); }
1886            else            else
1888            }            }
# Line 1760  for (;;) Line 1895  for (;;)
1895        case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:        case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:
1896        case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:        case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:
1897        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
1898          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1899        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1900        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0)
1901          {          {
1902          const uschar *nptr = ptr + clen;          int lgb, rgb;
1903            const pcre_uchar *nptr = ptr + clen;
1904          int ncount = 0;          int ncount = 0;
1905          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
1906            {            {
1907            active_count--;           /* Remove non-match possibility */            active_count--;           /* Remove non-match possibility */
1908            next_active_state--;            next_active_state--;
1909            }            }
1910            lgb = UCD_GRAPHBREAK(c);
1911          while (nptr < end_subject)          while (nptr < end_subject)
1912            {            {
1913            int nd;            dlen = 1;
1914            int ndlen = 1;            if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
1915            GETCHARLEN(nd, nptr, ndlen);            rgb = UCD_GRAPHBREAK(d);
1916            if (UCD_CATEGORY(nd) != ucp_M) break;            if (PRIV(ucp_gbtable)[lgb * ucp_gbCount + rgb] == 0) break;
1917            ncount++;            ncount++;
1918            nptr += ndlen;            lgb = rgb;
1919              nptr += dlen;
1920            }            }
1921            if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
1922                reset_could_continue = TRUE;
1923          if (++count >= GET2(code, 1))          if (++count >= GET2(code, 1))
1924            { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }            { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
1925          else          else
1927          }          }
# Line 1794  for (;;) Line 1934  for (;;)
1934        case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:        case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:
1935        case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:        case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:
1936        if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)
1937          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1938        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1939        if (clen > 0)        if (clen > 0)
1940          {          {
# Line 1821  for (;;) Line 1961  for (;;)
1961              next_active_state--;              next_active_state--;
1962              }              }
1963            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1964              { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }              { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
1965            else            else
1967            break;            break;
# Line 1838  for (;;) Line 1978  for (;;)
1978        case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:        case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:
1979        case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:        case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:
1980        if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)
1981          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1982        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1983        if (clen > 0)        if (clen > 0)
1984          {          {
# Line 1867  for (;;) Line 2007  for (;;)
2007              next_active_state--;              next_active_state--;
2008              }              }
2009            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
2010              { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }              { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
2011            else            else
2013            }            }
# Line 1880  for (;;) Line 2020  for (;;)
2020        case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:        case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:
2021        case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:        case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:
2022        if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)
2023          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
2024        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
2025        if (clen > 0)        if (clen > 0)
2026          {          {
# Line 1922  for (;;) Line 2062  for (;;)
2062              next_active_state--;              next_active_state--;
2063              }              }
2064            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
2065              { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }              { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
2066            else            else
2068            }            }
# Line 1941  for (;;) Line 2081  for (;;)
2081        break;        break;
2082
2083        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2084        case OP_CHARNC:        case OP_CHARI:
2085        if (clen == 0) break;        if (clen == 0) break;
2086
2087  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2088        if (utf8)        if (utf)
2089          {          {
2090          if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else          if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
2091            {            {
2092            unsigned int othercase;            unsigned int othercase;
2093            if (c < 128) othercase = fcc[c]; else            if (c < 128)
2094                othercase = fcc[c];
2095            /* If we have Unicode property support, we can use it to test the            else
2096            other case of the character. */              /* If we have Unicode property support, we can use it to test the
2097                other case of the character. */
2098  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2099            othercase = UCD_OTHERCASE(c);              othercase = UCD_OTHERCASE(c);
2100  #else  #else
2101            othercase = NOTACHAR;              othercase = NOTACHAR;
2102  #endif  #endif
2103
2104            if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }            if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }
2105            }            }
2106          }          }
2107        else        else
2108  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2109          /* Not UTF mode */
/* Non-UTF-8 mode */
2110          {          {
2111          if (lcc[c] == lcc[d]) { ADD_NEW(state_offset + 2, 0); }          if (TABLE_GET(c, lcc, c) == TABLE_GET(d, lcc, d))
2112              { ADD_NEW(state_offset + 2, 0); }
2113          }          }
2114        break;        break;
2115
# Line 1981  for (;;) Line 2121  for (;;)
2121        to wait for them to pass before continuing. */        to wait for them to pass before continuing. */
2122
2123        case OP_EXTUNI:        case OP_EXTUNI:
2124        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0)
2125          {          {
2126          const uschar *nptr = ptr + clen;          int lgb, rgb;
2127            const pcre_uchar *nptr = ptr + clen;
2128          int ncount = 0;          int ncount = 0;
2129            lgb = UCD_GRAPHBREAK(c);
2130          while (nptr < end_subject)          while (nptr < end_subject)
2131            {            {
2132            int nclen = 1;            dlen = 1;
2133            GETCHARLEN(c, nptr, nclen);            if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
2134            if (UCD_CATEGORY(c) != ucp_M) break;            rgb = UCD_GRAPHBREAK(d);
2135              if (PRIV(ucp_gbtable)[lgb * ucp_gbCount + rgb] == 0) break;
2136            ncount++;            ncount++;
2137            nptr += nclen;            lgb = rgb;
2138              nptr += dlen;
2139            }            }
2140            if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
2141                reset_could_continue = TRUE;
2143          }          }
2144        break;        break;
# Line 2018  for (;;) Line 2164  for (;;)
2164          break;          break;
2165
2166          case 0x000d:          case 0x000d:
2167          if (ptr + 1 < end_subject && ptr[1] == 0x0a)          if (ptr + 1 >= end_subject)
2168              {
2170              if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
2171                reset_could_continue = TRUE;
2172              }
2173            else if (ptr[1] == 0x0a)
2174            {            {
2176            }            }
# Line 2127  for (;;) Line 2279  for (;;)
2279        break;        break;
2280
2281        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2282        /* Match a negated single character. This is only used for one-byte        /* Match a negated single character casefully. */
characters, that is, we know that d < 256. The character we are
checking (c) can be multibyte. */
2283
2284        case OP_NOT:        case OP_NOT:
2285          if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); }
2286          break;
2287
2288          /*-----------------------------------------------------------------*/
2289          /* Match a negated single character caselessly. */
2290
2291          case OP_NOTI:
2292        if (clen > 0)        if (clen > 0)
2293          {          {
2294          unsigned int otherd = ((ims & PCRE_CASELESS) != 0)? fcc[d] : d;          unsigned int otherd;
2295          if (c != d && c != otherd) { ADD_NEW(state_offset + dlen + 1, 0); }  #ifdef SUPPORT_UTF
2296            if (utf && d >= 128)
2297              {
2298    #ifdef SUPPORT_UCP
2299              otherd = UCD_OTHERCASE(d);
2300    #endif  /* SUPPORT_UCP */
2301              }
2302            else
2303    #endif  /* SUPPORT_UTF */
2304            otherd = TABLE_GET(d, fcc, d);
2305            if (c != d && c != otherd)
2306              { ADD_NEW(state_offset + dlen + 1, 0); }
2307          }          }
2308        break;        break;
2309
2310        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2311          case OP_PLUSI:
2312          case OP_MINPLUSI:
2313          case OP_POSPLUSI:
2314          case OP_NOTPLUSI:
2315          case OP_NOTMINPLUSI:
2316          case OP_NOTPOSPLUSI:
2317          caseless = TRUE;
2318          codevalue -= OP_STARI - OP_STAR;
2319
2320          /* Fall through */
2321        case OP_PLUS:        case OP_PLUS:
2322        case OP_MINPLUS:        case OP_MINPLUS:
2323        case OP_POSPLUS:        case OP_POSPLUS:
# Line 2151  for (;;) Line 2329  for (;;)
2329        if (clen > 0)        if (clen > 0)
2330          {          {
2331          unsigned int otherd = NOTACHAR;          unsigned int otherd = NOTACHAR;
2332          if ((ims & PCRE_CASELESS) != 0)          if (caseless)
2333            {            {
2334  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2335            if (utf8 && d >= 128)            if (utf && d >= 128)
2336              {              {
2337  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2338              otherd = UCD_OTHERCASE(d);              otherd = UCD_OTHERCASE(d);
2339  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2340              }              }
2341            else            else
2342  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2343            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2344            }            }
2345          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2346            {            {
# Line 2179  for (;;) Line 2357  for (;;)
2357        break;        break;
2358
2359        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2360          case OP_QUERYI:
2361          case OP_MINQUERYI:
2362          case OP_POSQUERYI:
2363          case OP_NOTQUERYI:
2364          case OP_NOTMINQUERYI:
2365          case OP_NOTPOSQUERYI:
2366          caseless = TRUE;
2367          codevalue -= OP_STARI - OP_STAR;
2368          /* Fall through */
2369        case OP_QUERY:        case OP_QUERY:
2370        case OP_MINQUERY:        case OP_MINQUERY:
2371        case OP_POSQUERY:        case OP_POSQUERY:
# Line 2189  for (;;) Line 2376  for (;;)
2376        if (clen > 0)        if (clen > 0)
2377          {          {
2378          unsigned int otherd = NOTACHAR;          unsigned int otherd = NOTACHAR;
2379          if ((ims & PCRE_CASELESS) != 0)          if (caseless)
2380            {            {
2381  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2382            if (utf8 && d >= 128)            if (utf && d >= 128)
2383              {              {
2384  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2385              otherd = UCD_OTHERCASE(d);              otherd = UCD_OTHERCASE(d);
2386  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2387              }              }
2388            else            else
2389  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2390            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2391            }            }
2392          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2393            {            {
# Line 2215  for (;;) Line 2402  for (;;)
2402        break;        break;
2403
2404        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2405          case OP_STARI:
2406          case OP_MINSTARI:
2407          case OP_POSSTARI:
2408          case OP_NOTSTARI:
2409          case OP_NOTMINSTARI:
2410          case OP_NOTPOSSTARI:
2411          caseless = TRUE;
2412          codevalue -= OP_STARI - OP_STAR;
2413          /* Fall through */
2414        case OP_STAR:        case OP_STAR:
2415        case OP_MINSTAR:        case OP_MINSTAR:
2416        case OP_POSSTAR:        case OP_POSSTAR:
# Line 2225  for (;;) Line 2421  for (;;)
2421        if (clen > 0)        if (clen > 0)
2422          {          {
2423          unsigned int otherd = NOTACHAR;          unsigned int otherd = NOTACHAR;
2424          if ((ims & PCRE_CASELESS) != 0)          if (caseless)
2425            {            {
2426  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2427            if (utf8 && d >= 128)            if (utf && d >= 128)
2428              {              {
2429  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2430              otherd = UCD_OTHERCASE(d);              otherd = UCD_OTHERCASE(d);
2431  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2432              }              }
2433            else            else
2434  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2435            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2436            }            }
2437          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2438            {            {
# Line 2251  for (;;) Line 2447  for (;;)
2447        break;        break;
2448
2449        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2450          case OP_EXACTI:
2451          case OP_NOTEXACTI:
2452          caseless = TRUE;
2453          codevalue -= OP_STARI - OP_STAR;
2454          /* Fall through */
2455        case OP_EXACT:        case OP_EXACT:
2456        case OP_NOTEXACT:        case OP_NOTEXACT:
2457        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
2458        if (clen > 0)        if (clen > 0)
2459          {          {
2460          unsigned int otherd = NOTACHAR;          unsigned int otherd = NOTACHAR;
2461          if ((ims & PCRE_CASELESS) != 0)          if (caseless)
2462            {            {
2463  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2464            if (utf8 && d >= 128)            if (utf && d >= 128)
2465              {              {
2466  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2467              otherd = UCD_OTHERCASE(d);              otherd = UCD_OTHERCASE(d);
2468  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2469              }              }
2470            else            else
2471  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2472            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2473            }            }
2474          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2475            {            {
2476            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
2477              { ADD_NEW(state_offset + dlen + 3, 0); }              { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
2478            else            else
2480            }            }
# Line 2281  for (;;) Line 2482  for (;;)
2482        break;        break;
2483
2484        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2485          case OP_UPTOI:
2486          case OP_MINUPTOI:
2487          case OP_POSUPTOI:
2488          case OP_NOTUPTOI:
2489          case OP_NOTMINUPTOI:
2490          case OP_NOTPOSUPTOI:
2491          caseless = TRUE;
2492          codevalue -= OP_STARI - OP_STAR;
2493          /* Fall through */
2494        case OP_UPTO:        case OP_UPTO:
2495        case OP_MINUPTO:        case OP_MINUPTO:
2496        case OP_POSUPTO:        case OP_POSUPTO:
2497        case OP_NOTUPTO:        case OP_NOTUPTO:
2498        case OP_NOTMINUPTO:        case OP_NOTMINUPTO:
2499        case OP_NOTPOSUPTO:        case OP_NOTPOSUPTO:
2500        ADD_ACTIVE(state_offset + dlen + 3, 0);        ADD_ACTIVE(state_offset + dlen + 1 + IMM2_SIZE, 0);
2501        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
2502        if (clen > 0)        if (clen > 0)
2503          {          {
2504          unsigned int otherd = NOTACHAR;          unsigned int otherd = NOTACHAR;
2505          if ((ims & PCRE_CASELESS) != 0)          if (caseless)
2506            {            {
2507  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2508            if (utf8 && d >= 128)            if (utf && d >= 128)
2509              {              {
2510  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2511              otherd = UCD_OTHERCASE(d);              otherd = UCD_OTHERCASE(d);
2512  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2513              }              }
2514            else            else
2515  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2516            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2517            }            }
2518          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2519            {            {
# Line 2313  for (;;) Line 2523  for (;;)
2523              next_active_state--;              next_active_state--;
2524              }              }
2525            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
2526              { ADD_NEW(state_offset + dlen + 3, 0); }              { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
2527            else            else
2529            }            }
# Line 2330  for (;;) Line 2540  for (;;)
2540          {          {
2541          BOOL isinclass = FALSE;          BOOL isinclass = FALSE;
2542          int next_state_offset;          int next_state_offset;
2543          const uschar *ecode;          const pcre_uchar *ecode;
2544
2545          /* For a simple class, there is always just a 32-byte table, and we          /* For a simple class, there is always just a 32-byte table, and we
2546          can set isinclass from it. */          can set isinclass from it. */
2547
2548          if (codevalue != OP_XCLASS)          if (codevalue != OP_XCLASS)
2549            {            {
2550            ecode = code + 33;            ecode = code + 1 + (32 / sizeof(pcre_uchar));
2551            if (clen > 0)            if (clen > 0)
2552              {              {
2553              isinclass = (c > 255)? (codevalue == OP_NCLASS) :              isinclass = (c > 255)? (codevalue == OP_NCLASS) :
2554                ((code[1 + c/8] & (1 << (c&7))) != 0);                ((((pcre_uint8 *)(code + 1))[c/8] & (1 << (c&7))) != 0);
2555              }              }
2556            }            }
2557
# Line 2352  for (;;) Line 2562  for (;;)
2562          else          else
2563           {           {
2564           ecode = code + GET(code, 1);           ecode = code + GET(code, 1);
2565           if (clen > 0) isinclass = _pcre_xclass(c, code + 1 + LINK_SIZE);           if (clen > 0) isinclass = PRIV(xclass)(c, code + 1 + LINK_SIZE, utf);
2566           }           }
2567
2568          /* At this point, isinclass is set for all kinds of class, and ecode          /* At this point, isinclass is set for all kinds of class, and ecode
2569          points to the byte after the end of the class. If there is a          points to the byte after the end of the class. If there is a
2570          quantifier, this is where it will be. */          quantifier, this is where it will be. */
2571
2572          next_state_offset = ecode - start_code;          next_state_offset = (int)(ecode - start_code);
2573
2574          switch (*ecode)          switch (*ecode)
2575            {            {
# Line 2386  for (;;) Line 2596  for (;;)
2596            case OP_CRMINRANGE:            case OP_CRMINRANGE:
2597            count = current_state->count;  /* Already matched */            count = current_state->count;  /* Already matched */
2598            if (count >= GET2(ecode, 1))            if (count >= GET2(ecode, 1))
2599              { ADD_ACTIVE(next_state_offset + 5, 0); }              { ADD_ACTIVE(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
2600            if (isinclass)            if (isinclass)
2601              {              {
2602              int max = GET2(ecode, 3);              int max = GET2(ecode, 1 + IMM2_SIZE);
2603              if (++count >= max && max != 0)   /* Max 0 => no limit */              if (++count >= max && max != 0)   /* Max 0 => no limit */
2604                { ADD_NEW(next_state_offset + 5, 0); }                { ADD_NEW(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
2605              else              else
2607              }              }
# Line 2422  for (;;) Line 2632  for (;;)
2632          int rc;          int rc;
2633          int local_offsets[2];          int local_offsets[2];
2634          int local_workspace[1000];          int local_workspace[1000];
2635          const uschar *endasscode = code + GET(code, 1);          const pcre_uchar *endasscode = code + GET(code, 1);
2636
2637          while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);          while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
2638
# Line 2430  for (;;) Line 2640  for (;;)
2640            md,                                   /* static match data */            md,                                   /* static match data */
2641            code,                                 /* this subexpression's code */            code,                                 /* this subexpression's code */
2642            ptr,                                  /* where we currently are */            ptr,                                  /* where we currently are */
2643            ptr - start_subject,                  /* start offset */            (int)(ptr - start_subject),           /* start offset */
2644            local_offsets,                        /* offset vector */            local_offsets,                        /* offset vector */
2645            sizeof(local_offsets)/sizeof(int),    /* size of same */            sizeof(local_offsets)/sizeof(int),    /* size of same */
2646            local_workspace,                      /* workspace vector */            local_workspace,                      /* workspace vector */
2647            sizeof(local_workspace)/sizeof(int),  /* size of same */            sizeof(local_workspace)/sizeof(int),  /* size of same */
2648            ims,                                  /* the current ims flags */            rlevel);                              /* function recursion level */
rlevel,                               /* function recursion level */
recursing);                           /* pass on regex recursion */
2649
2650          if (rc == PCRE_ERROR_DFA_UITEM) return rc;          if (rc == PCRE_ERROR_DFA_UITEM) return rc;
2651          if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK))          if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK))
2653          }          }
2654        break;        break;
2655
# Line 2461  for (;;) Line 2669  for (;;)
2670            {            {
2671            rrc = 0;            rrc = 0;
2672            if (pcre_callout != NULL)            if (PUBL(callout) != NULL)
2673              {              {
2674              pcre_callout_block cb;              PUBL(callout_block) cb;
2675              cb.version          = 1;   /* Version 1 of the callout block */              cb.version          = 1;   /* Version 1 of the callout block */
2677              cb.offset_vector    = offsets;              cb.offset_vector    = offsets;
2678    #ifdef COMPILE_PCRE8
2679              cb.subject          = (PCRE_SPTR)start_subject;              cb.subject          = (PCRE_SPTR)start_subject;
2680              cb.subject_length   = end_subject - start_subject;  #else
2681              cb.start_match      = current_subject - start_subject;              cb.subject          = (PCRE_SPTR16)start_subject;
2682              cb.current_position = ptr - start_subject;  #endif
2683                cb.subject_length   = (int)(end_subject - start_subject);
2684                cb.start_match      = (int)(current_subject - start_subject);
2685                cb.current_position = (int)(ptr - start_subject);
2686              cb.pattern_position = GET(code, LINK_SIZE + 3);              cb.pattern_position = GET(code, LINK_SIZE + 3);
2687              cb.next_item_length = GET(code, 3 + 2*LINK_SIZE);              cb.next_item_length = GET(code, 3 + 2*LINK_SIZE);
2688              cb.capture_top      = 1;              cb.capture_top      = 1;
2689              cb.capture_last     = -1;              cb.capture_last     = -1;
2690              cb.callout_data     = md->callout_data;              cb.callout_data     = md->callout_data;
2691              if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */              cb.mark             = NULL;   /* No (*MARK) support */
2692                if ((rrc = (*PUBL(callout))(&cb)) < 0) return rrc;   /* Abandon */
2693              }              }
2694            if (rrc > 0) break;                      /* Fail this thread */            if (rrc > 0) break;                      /* Fail this thread */
2695            code += _pcre_OP_lengths[OP_CALLOUT];    /* Skip callout data */            code += PRIV(OP_lengths)[OP_CALLOUT];    /* Skip callout data */
2696            }            }
2697
# Line 2500  for (;;) Line 2713  for (;;)
2713
2714          else if (condcode == OP_RREF || condcode == OP_NRREF)          else if (condcode == OP_RREF || condcode == OP_NRREF)
2715            {            {
2716            int value = GET2(code, LINK_SIZE+2);            int value = GET2(code, LINK_SIZE + 2);
2717            if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;            if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
2718            if (recursing > 0)            if (md->recursive != NULL)
2721            }            }
2722
# Line 2512  for (;;) Line 2725  for (;;)
2725          else          else
2726            {            {
2727            int rc;            int rc;
2728            const uschar *asscode = code + LINK_SIZE + 1;            const pcre_uchar *asscode = code + LINK_SIZE + 1;
2729            const uschar *endasscode = asscode + GET(asscode, 1);            const pcre_uchar *endasscode = asscode + GET(asscode, 1);
2730
2731            while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);            while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
2732
# Line 2521  for (;;) Line 2734  for (;;)
2734              md,                                   /* fixed match data */              md,                                   /* fixed match data */
2735              asscode,                              /* this subexpression's code */              asscode,                              /* this subexpression's code */
2736              ptr,                                  /* where we currently are */              ptr,                                  /* where we currently are */
2737              ptr - start_subject,                  /* start offset */              (int)(ptr - start_subject),           /* start offset */
2738              local_offsets,                        /* offset vector */              local_offsets,                        /* offset vector */
2739              sizeof(local_offsets)/sizeof(int),    /* size of same */              sizeof(local_offsets)/sizeof(int),    /* size of same */
2740              local_workspace,                      /* workspace vector */              local_workspace,                      /* workspace vector */
2741              sizeof(local_workspace)/sizeof(int),  /* size of same */              sizeof(local_workspace)/sizeof(int),  /* size of same */
2742              ims,                                  /* the current ims flags */              rlevel);                              /* function recursion level */
rlevel,                               /* function recursion level */
recursing);                           /* pass on regex recursion */
2743
2744            if (rc == PCRE_ERROR_DFA_UITEM) return rc;            if (rc == PCRE_ERROR_DFA_UITEM) return rc;
2745            if ((rc >= 0) ==            if ((rc >= 0) ==
2746                  (condcode == OP_ASSERT || condcode == OP_ASSERTBACK))                  (condcode == OP_ASSERT || condcode == OP_ASSERTBACK))
2748            else            else
2750            }            }
# Line 2543  for (;;) Line 2754  for (;;)
2754        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2755        case OP_RECURSE:        case OP_RECURSE:
2756          {          {
2757            dfa_recursion_info *ri;
2758          int local_offsets[1000];          int local_offsets[1000];
2759          int local_workspace[1000];          int local_workspace[1000];
2760            const pcre_uchar *callpat = start_code + GET(code, 1);
2761            int recno = (callpat == md->start_code)? 0 :
2763          int rc;          int rc;
2764
2765          DPRINTF(("%.*sStarting regex recursion %d\n", rlevel*2-2, SP,          DPRINTF(("%.*sStarting regex recursion\n", rlevel*2-2, SP));
2766            recursing + 1));
2767            /* Check for repeating a recursion without advancing the subject
2768            pointer. This should catch convoluted mutual recursions. (Some simple
2769            cases are caught at compile time.) */
2770
2771            for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
2772              if (recno == ri->group_num && ptr == ri->subject_position)
2773                return PCRE_ERROR_RECURSELOOP;
2774
2775            /* Remember this recursion and where we started it so as to
2776            catch infinite loops. */
2777
2778            new_recursive.group_num = recno;
2779            new_recursive.subject_position = ptr;
2780            new_recursive.prevrec = md->recursive;
2781            md->recursive = &new_recursive;
2782
2783          rc = internal_dfa_exec(          rc = internal_dfa_exec(
2784            md,                                   /* fixed match data */            md,                                   /* fixed match data */
2785            start_code + GET(code, 1),            /* this subexpression's code */            callpat,                              /* this subexpression's code */
2786            ptr,                                  /* where we currently are */            ptr,                                  /* where we currently are */
2787            ptr - start_subject,                  /* start offset */            (int)(ptr - start_subject),           /* start offset */
2788            local_offsets,                        /* offset vector */            local_offsets,                        /* offset vector */
2789            sizeof(local_offsets)/sizeof(int),    /* size of same */            sizeof(local_offsets)/sizeof(int),    /* size of same */
2790            local_workspace,                      /* workspace vector */            local_workspace,                      /* workspace vector */
2791            sizeof(local_workspace)/sizeof(int),  /* size of same */            sizeof(local_workspace)/sizeof(int),  /* size of same */
2792            ims,                                  /* the current ims flags */            rlevel);                              /* function recursion level */
2793            rlevel,                               /* function recursion level */
2794            recursing + 1);                       /* regex recurse level */          md->recursive = new_recursive.prevrec;  /* Done this recursion */
2795
2796          DPRINTF(("%.*sReturn from regex recursion %d: rc=%d\n", rlevel*2-2, SP,          DPRINTF(("%.*sReturn from regex recursion: rc=%d\n", rlevel*2-2, SP,
2797            recursing + 1, rc));            rc));
2798
2799          /* Ran out of internal offsets */          /* Ran out of internal offsets */
2800
# Line 2578  for (;;) Line 2808  for (;;)
2808            {            {
2809            for (rc = rc*2 - 2; rc >= 0; rc -= 2)            for (rc = rc*2 - 2; rc >= 0; rc -= 2)
2810              {              {
const uschar *p = start_subject + local_offsets[rc];
const uschar *pp = start_subject + local_offsets[rc+1];
2811              int charcount = local_offsets[rc+1] - local_offsets[rc];              int charcount = local_offsets[rc+1] - local_offsets[rc];
2812              while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;  #ifdef SUPPORT_UTF
2813                if (utf)
2814                  {
2815                  const pcre_uchar *p = start_subject + local_offsets[rc];
2816                  const pcre_uchar *pp = start_subject + local_offsets[rc+1];
2817                  while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2818                  }
2819    #endif
2820              if (charcount > 0)              if (charcount > 0)
2821                {                {
# Line 2597  for (;;) Line 2832  for (;;)
2832        break;        break;
2833
2834        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2835          case OP_BRAPOS:
2836          case OP_SBRAPOS:
2837          case OP_CBRAPOS:
2838          case OP_SCBRAPOS:
2839          case OP_BRAPOSZERO:
2840            {
2841            int charcount, matched_count;
2842            const pcre_uchar *local_ptr = ptr;
2843            BOOL allow_zero;
2844
2845            if (codevalue == OP_BRAPOSZERO)
2846              {
2847              allow_zero = TRUE;
2848              codevalue = *(++code);  /* Codevalue will be one of above BRAs */
2849              }
2850            else allow_zero = FALSE;
2851
2852            /* Loop to match the subpattern as many times as possible as if it were
2853            a complete pattern. */
2854
2855            for (matched_count = 0;; matched_count++)
2856              {
2857              int local_offsets[2];
2858              int local_workspace[1000];
2859
2860              int rc = internal_dfa_exec(
2861                md,                                   /* fixed match data */
2862                code,                                 /* this subexpression's code */
2863                local_ptr,                            /* where we currently are */
2864                (int)(ptr - start_subject),           /* start offset */
2865                local_offsets,                        /* offset vector */
2866                sizeof(local_offsets)/sizeof(int),    /* size of same */
2867                local_workspace,                      /* workspace vector */
2868                sizeof(local_workspace)/sizeof(int),  /* size of same */
2869                rlevel);                              /* function recursion level */
2870
2871              /* Failed to match */
2872
2873              if (rc < 0)
2874                {
2875                if (rc != PCRE_ERROR_NOMATCH) return rc;
2876                break;
2877                }
2878
2879              /* Matched: break the loop if zero characters matched. */
2880
2881              charcount = local_offsets[1] - local_offsets[0];
2882              if (charcount == 0) break;
2883              local_ptr += charcount;    /* Advance temporary position ptr */
2884              }
2885
2886            /* At this point we have matched the subpattern matched_count
2887            times, and local_ptr is pointing to the character after the end of the
2888            last match. */
2889
2890            if (matched_count > 0 || allow_zero)
2891              {
2892              const pcre_uchar *end_subpattern = code;
2893              int next_state_offset;
2894
2895              do { end_subpattern += GET(end_subpattern, 1); }
2896                while (*end_subpattern == OP_ALT);
2897              next_state_offset =
2898                (int)(end_subpattern - start_code + LINK_SIZE + 1);
2899
2900              /* Optimization: if there are no more active states, and there
2901              are no new states yet set up, then skip over the subject string
2902              right here, to save looping. Otherwise, set up the new state to swing
2903              into action when the end of the matched substring is reached. */
2904
2905              if (i + 1 >= active_count && new_count == 0)
2906                {
2907                ptr = local_ptr;
2908                clen = 0;
2910                }
2911              else
2912                {
2913                const pcre_uchar *p = ptr;
2914                const pcre_uchar *pp = local_ptr;
2915                charcount = (int)(pp - p);
2916    #ifdef SUPPORT_UTF
2917                if (utf) while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2918    #endif
2919                ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
2920                }
2921              }
2922            }
2923          break;
2924
2925          /*-----------------------------------------------------------------*/
2926        case OP_ONCE:        case OP_ONCE:
2927          case OP_ONCE_NC:
2928          {          {
2929          int local_offsets[2];          int local_offsets[2];
2930          int local_workspace[1000];          int local_workspace[1000];
# Line 2606  for (;;) Line 2933  for (;;)
2933            md,                                   /* fixed match data */            md,                                   /* fixed match data */
2934            code,                                 /* this subexpression's code */            code,                                 /* this subexpression's code */
2935            ptr,                                  /* where we currently are */            ptr,                                  /* where we currently are */
2936            ptr - start_subject,                  /* start offset */            (int)(ptr - start_subject),           /* start offset */
2937            local_offsets,                        /* offset vector */            local_offsets,                        /* offset vector */
2938            sizeof(local_offsets)/sizeof(int),    /* size of same */            sizeof(local_offsets)/sizeof(int),    /* size of same */
2939            local_workspace,                      /* workspace vector */            local_workspace,                      /* workspace vector */
2940            sizeof(local_workspace)/sizeof(int),  /* size of same */            sizeof(local_workspace)/sizeof(int),  /* size of same */
2941            ims,                                  /* the current ims flags */            rlevel);                              /* function recursion level */
rlevel,                               /* function recursion level */
recursing);                           /* pass on regex recursion */
2942
2943          if (rc >= 0)          if (rc >= 0)
2944            {            {
2945            const uschar *end_subpattern = code;            const pcre_uchar *end_subpattern = code;
2946            int charcount = local_offsets[1] - local_offsets[0];            int charcount = local_offsets[1] - local_offsets[0];
2947            int next_state_offset, repeat_state_offset;            int next_state_offset, repeat_state_offset;
2948
2949            do { end_subpattern += GET(end_subpattern, 1); }            do { end_subpattern += GET(end_subpattern, 1); }
2950              while (*end_subpattern == OP_ALT);              while (*end_subpattern == OP_ALT);
2951            next_state_offset = end_subpattern - start_code + LINK_SIZE + 1;            next_state_offset =
2952                (int)(end_subpattern - start_code + LINK_SIZE + 1);
2953
2954            /* If the end of this subpattern is KETRMAX or KETRMIN, we must            /* If the end of this subpattern is KETRMAX or KETRMIN, we must
2955            arrange for the repeat state also to be added to the relevant list.            arrange for the repeat state also to be added to the relevant list.
# Line 2631  for (;;) Line 2957  for (;;)
2957
2958            repeat_state_offset = (*end_subpattern == OP_KETRMAX ||            repeat_state_offset = (*end_subpattern == OP_KETRMAX ||
2959                                   *end_subpattern == OP_KETRMIN)?                                   *end_subpattern == OP_KETRMIN)?
2960              end_subpattern - start_code - GET(end_subpattern, 1) : -1;              (int)(end_subpattern - start_code - GET(end_subpattern, 1)) : -1;
2961
2962            /* If we have matched an empty string, add the next state at the            /* If we have matched an empty string, add the next state at the
2963            current character pointer. This is important so that the duplicate            current character pointer. This is important so that the duplicate
# Line 2646  for (;;) Line 2972  for (;;)
2972            /* Optimization: if there are no more active states, and there            /* Optimization: if there are no more active states, and there
2973            are no new states yet set up, then skip over the subject string            are no new states yet set up, then skip over the subject string
2974            right here, to save looping. Otherwise, set up the new state to swing            right here, to save looping. Otherwise, set up the new state to swing
2975            into action when the end of the substring is reached. */            into action when the end of the matched substring is reached. */
2976
2977            else if (i + 1 >= active_count && new_count == 0)            else if (i + 1 >= active_count && new_count == 0)
2978              {              {
# Line 2669  for (;;) Line 2995  for (;;)
2995              }              }
2996            else            else
2997              {              {
2998              const uschar *p = start_subject + local_offsets[0];  #ifdef SUPPORT_UTF
2999              const uschar *pp = start_subject + local_offsets[1];              if (utf)
3000              while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;                {
3001                  const pcre_uchar *p = start_subject + local_offsets[0];
3002                  const pcre_uchar *pp = start_subject + local_offsets[1];
3003                  while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
3004                  }
3005    #endif
3007              if (repeat_state_offset >= 0)              if (repeat_state_offset >= 0)
3008                { ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); }                { ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); }
3009              }              }

3010            }            }
3011          else if (rc != PCRE_ERROR_NOMATCH) return rc;          else if (rc != PCRE_ERROR_NOMATCH) return rc;
3012          }          }
# Line 2688  for (;;) Line 3018  for (;;)
3018
3019        case OP_CALLOUT:        case OP_CALLOUT:
3020        rrc = 0;        rrc = 0;
3021        if (pcre_callout != NULL)        if (PUBL(callout) != NULL)
3022          {          {
3023          pcre_callout_block cb;          PUBL(callout_block) cb;
3024          cb.version          = 1;   /* Version 1 of the callout block */          cb.version          = 1;   /* Version 1 of the callout block */
3025          cb.callout_number   = code[1];          cb.callout_number   = code[1];
3026          cb.offset_vector    = offsets;          cb.offset_vector    = offsets;
3027    #ifdef COMPILE_PCRE8
3028          cb.subject          = (PCRE_SPTR)start_subject;          cb.subject          = (PCRE_SPTR)start_subject;
3029          cb.subject_length   = end_subject - start_subject;  #else
3030          cb.start_match      = current_subject - start_subject;          cb.subject          = (PCRE_SPTR16)start_subject;
3031          cb.current_position = ptr - start_subject;  #endif
3032            cb.subject_length   = (int)(end_subject - start_subject);
3033            cb.start_match      = (int)(current_subject - start_subject);
3034            cb.current_position = (int)(ptr - start_subject);
3035          cb.pattern_position = GET(code, 2);          cb.pattern_position = GET(code, 2);
3036          cb.next_item_length = GET(code, 2 + LINK_SIZE);          cb.next_item_length = GET(code, 2 + LINK_SIZE);
3037          cb.capture_top      = 1;          cb.capture_top      = 1;
3038          cb.capture_last     = -1;          cb.capture_last     = -1;
3039          cb.callout_data     = md->callout_data;          cb.callout_data     = md->callout_data;
3040          if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */          cb.mark             = NULL;   /* No (*MARK) support */
3041            if ((rrc = (*PUBL(callout))(&cb)) < 0) return rrc;   /* Abandon */
3042          }          }
3043        if (rrc == 0)        if (rrc == 0)
3044          { ADD_ACTIVE(state_offset + _pcre_OP_lengths[OP_CALLOUT], 0); }          { ADD_ACTIVE(state_offset + PRIV(OP_lengths)[OP_CALLOUT], 0); }
3045        break;        break;
3046
3047
# Line 2735  for (;;) Line 3070  for (;;)
3070    if (new_count <= 0)    if (new_count <= 0)
3071      {      {
3072      if (rlevel == 1 &&                               /* Top level, and */      if (rlevel == 1 &&                               /* Top level, and */
3073          could_continue &&                            /* Some could go on */          could_continue &&                            /* Some could go on, and */
3074          forced_fail != workspace[1] &&               /* Not all forced fail & */          forced_fail != workspace[1] &&               /* Not all forced fail & */
3075          (                                            /* either... */          (                                            /* either... */
3076          (md->moptions & PCRE_PARTIAL_HARD) != 0      /* Hard partial */          (md->moptions & PCRE_PARTIAL_HARD) != 0      /* Hard partial */
# Line 2743  for (;;) Line 3078  for (;;)
3078          ((md->moptions & PCRE_PARTIAL_SOFT) != 0 &&  /* Soft partial and */          ((md->moptions & PCRE_PARTIAL_SOFT) != 0 &&  /* Soft partial and */
3079           match_count < 0)                            /* no matches */           match_count < 0)                            /* no matches */
3080          ) &&                                         /* And... */          ) &&                                         /* And... */
3081          ptr >= end_subject &&                     /* Reached end of subject */          (
3082          ptr > current_subject)                    /* Matched non-empty string */          partial_newline ||                           /* Either partial NL */
3083              (                                          /* or ... */
3084              ptr >= end_subject &&                /* End of subject and */
3085              ptr > md->start_used_ptr)            /* Inspected non-empty string */
3086              )
3087            )
3088        {        {
3089        if (offsetcount >= 2)        if (offsetcount >= 2)
3090          {          {
3091          offsets[0] = md->start_used_ptr - start_subject;          offsets[0] = (int)(md->start_used_ptr - start_subject);
3092          offsets[1] = end_subject - start_subject;          offsets[1] = (int)(end_subject - start_subject);
3093          }          }
3094        match_count = PCRE_ERROR_PARTIAL;        match_count = PCRE_ERROR_PARTIAL;
3095        }        }
# Line 2803  Returns:          > 0 => number of match Line 3143  Returns:          > 0 => number of match
3143                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
3144  */  */
3145
3146    #ifdef COMPILE_PCRE8
3147  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
3148  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
3149    const char *subject, int length, int start_offset, int options, int *offsets,    const char *subject, int length, int start_offset, int options, int *offsets,
3150    int offsetcount, int *workspace, int wscount)    int offsetcount, int *workspace, int wscount)
3151    #else
3152    PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
3153    pcre16_dfa_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
3154      PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
3155      int offsetcount, int *workspace, int wscount)
3156    #endif
3157  {  {
3158  real_pcre *re = (real_pcre *)argument_re;  REAL_PCRE *re = (REAL_PCRE *)argument_re;
3159  dfa_match_data match_block;  dfa_match_data match_block;
3160  dfa_match_data *md = &match_block;  dfa_match_data *md = &match_block;
3161  BOOL utf8, anchored, startline, firstline;  BOOL utf, anchored, startline, firstline;
3162  const uschar *current_subject, *end_subject, *lcc;  const pcre_uchar *current_subject, *end_subject;

pcre_study_data internal_study;
3163  const pcre_study_data *study = NULL;  const pcre_study_data *study = NULL;
real_pcre internal_re;
3164
3165  const uschar *req_byte_ptr;  const pcre_uchar *req_char_ptr;
3166  const uschar *start_bits = NULL;  const pcre_uint8 *start_bits = NULL;
3167  BOOL first_byte_caseless = FALSE;  BOOL has_first_char = FALSE;
3168  BOOL req_byte_caseless = FALSE;  BOOL has_req_char = FALSE;
3169  int first_byte = -1;  pcre_uchar first_char = 0;
3170  int req_byte = -1;  pcre_uchar first_char2 = 0;
3171  int req_byte2 = -1;  pcre_uchar req_char = 0;
3172    pcre_uchar req_char2 = 0;
3173  int newline;  int newline;
3174
3175  /* Plausibility checks */  /* Plausibility checks */
# Line 2834  if (re == NULL || subject == NULL || wor Line 3179  if (re == NULL || subject == NULL || wor
3179     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
3180  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
3181  if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;  if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;
3182    if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
3183
3184  /* We need to find the pointer to any study data before we test for byte  /* Check that the first field in the block is the magic number. If it is not,
3185  flipping, so we scan the extra_data block first. This may set two fields in the  return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
3186  match block, so we must initialize them beforehand. However, the other fields  REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
3187  in the match block must not be set until after the byte flipping. */  means that the pattern is likely compiled with different endianness. */
3188
3189    if (re->magic_number != MAGIC_NUMBER)
3190      return re->magic_number == REVERSED_MAGIC_NUMBER?
3192    if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
3193
3194    /* If restarting after a partial match, do some sanity checks on the contents
3195    of the workspace. */
3196
3197    if ((options & PCRE_DFA_RESTART) != 0)
3198      {
3199      if ((workspace[0] & (-2)) != 0 || workspace[1] < 1 ||
3200        workspace[1] > (wscount - 2)/INTS_PER_STATEBLOCK)
3202      }
3203
3204    /* Set up study, callout, and table data */
3205
3206  md->tables = re->tables;  md->tables = re->tables;
3207  md->callout_data = NULL;  md->callout_data = NULL;
# Line 2857  if (extra_data != NULL) Line 3220  if (extra_data != NULL)
3220      md->tables = extra_data->tables;      md->tables = extra_data->tables;
3221    }    }
3222
/* Check that the first field in the block is the magic number. If it is not,
test for a regex that was compiled on a host of opposite endianness. If this is
the case, flipped values are put in internal_re and internal_study if there was
study data too. */

if (re->magic_number != MAGIC_NUMBER)
{
re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
if (re == NULL) return PCRE_ERROR_BADMAGIC;
if (study != NULL) study = &internal_study;
}

3223  /* Set some local values */  /* Set some local values */
3224
3225  current_subject = (const unsigned char *)subject + start_offset;  current_subject = (const pcre_uchar *)subject + start_offset;
3226  end_subject = (const unsigned char *)subject + length;  end_subject = (const pcre_uchar *)subject + length;
3227  req_byte_ptr = current_subject - 1;  req_char_ptr = current_subject - 1;
3228
3229  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3230  utf8 = (re->options & PCRE_UTF8) != 0;  /* PCRE_UTF16 has the same value as PCRE_UTF8. */
3231    utf = (re->options & PCRE_UTF8) != 0;
3232  #else  #else
3233  utf8 = FALSE;  utf = FALSE;
3234  #endif  #endif
3235
3236  anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||  anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||
# Line 2886  anchored = (options & (PCRE_ANCHORED|PCR Line 3238  anchored = (options & (PCRE_ANCHORED|PCR
3238
3239  /* The remaining fixed data for passing around. */  /* The remaining fixed data for passing around. */
3240
3241  md->start_code = (const uschar *)argument_re +  md->start_code = (const pcre_uchar *)argument_re +
3242      re->name_table_offset + re->name_count * re->name_entry_size;      re->name_table_offset + re->name_count * re->name_entry_size;
3243  md->start_subject = (const unsigned char *)subject;  md->start_subject = (const pcre_uchar *)subject;
3244  md->end_subject = end_subject;  md->end_subject = end_subject;
3245  md->start_offset = start_offset;  md->start_offset = start_offset;
3246  md->moptions = options;  md->moptions = options;
# Line 2949  else Line 3301  else
3301  /* Check a UTF-8 string if required. Unfortunately there's no way of passing  /* Check a UTF-8 string if required. Unfortunately there's no way of passing
3302  back the character offset. */  back the character offset. */
3303
3304  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3305  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
3306    {    {
3307    if (_pcre_valid_utf8((uschar *)subject, length) >= 0)    int erroroffset;
3308      return PCRE_ERROR_BADUTF8;    int errorcode = PRIV(valid_utf)((pcre_uchar *)subject, length, &erroroffset);
3309    if (start_offset > 0 && start_offset < length)    if (errorcode != 0)
3310      {      {
3311      int tb = ((uschar *)subject)[start_offset];      if (offsetcount >= 2)
if (tb > 127)
3312        {        {
3313        tb &= 0xc0;        offsets[0] = erroroffset;
3314        if (tb != 0 && tb != 0xc0) return PCRE_ERROR_BADUTF8_OFFSET;        offsets[1] = errorcode;
3315        }        }
3316        return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != 0)?
3318      }      }
3319      if (start_offset > 0 && start_offset < length &&
3320            NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
3322    }    }
3323  #endif  #endif
3324
# Line 2970  if (utf8 && (options & PCRE_NO_UTF8_CHEC Line 3326  if (utf8 && (options & PCRE_NO_UTF8_CHEC
3326  is a feature that makes it possible to save compiled regex and re-use them  is a feature that makes it possible to save compiled regex and re-use them
3327  in other programs later. */  in other programs later. */
3328
3329  if (md->tables == NULL) md->tables = _pcre_default_tables;  if (md->tables == NULL) md->tables = PRIV(default_tables);
3330
3331  /* The lower casing table and the "must be at the start of a line" flag are  /* The "must be at the start of a line" flags are used in a loop when finding
3332  used in a loop when finding where to start. */  where to start. */
3333
lcc = md->tables + lcc_offset;
3334  startline = (re->flags & PCRE_STARTLINE) != 0;  startline = (re->flags & PCRE_STARTLINE) != 0;
3335  firstline = (re->options & PCRE_FIRSTLINE) != 0;  firstline = (re->options & PCRE_FIRSTLINE) != 0;
3336
# Line 2989  if (!anchored) Line 3344  if (!anchored)
3344    {    {
3345    if ((re->flags & PCRE_FIRSTSET) != 0)    if ((re->flags & PCRE_FIRSTSET) != 0)
3346      {      {
3347      first_byte = re->first_byte & 255;      has_first_char = TRUE;
3348      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)      first_char = first_char2 = (pcre_uchar)(re->first_char);
3349        first_byte = lcc[first_byte];      if ((re->flags & PCRE_FCH_CASELESS) != 0)
3350          {
3351          first_char2 = TABLE_GET(first_char, md->tables + fcc_offset, first_char);
3352    #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3353          if (utf && first_char > 127)
3354            first_char2 = UCD_OTHERCASE(first_char);
3355    #endif
3356          }
3357      }      }
3358    else    else
3359      {      {
# Line 3006  character" set. */ Line 3368  character" set. */
3368
3369  if ((re->flags & PCRE_REQCHSET) != 0)  if ((re->flags & PCRE_REQCHSET) != 0)
3370    {    {
3371    req_byte = re->req_byte & 255;    has_req_char = TRUE;
3372    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;    req_char = req_char2 = (pcre_uchar)(re->req_char);
3373    req_byte2 = (md->tables + fcc_offset)[req_byte];  /* case flipped */    if ((re->flags & PCRE_RCH_CASELESS) != 0)
3374        {
3375        req_char2 = TABLE_GET(req_char, md->tables + fcc_offset, req_char);
3376    #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3377        if (utf && req_char > 127)
3378          req_char2 = UCD_OTHERCASE(req_char);
3379    #endif
3380        }
3381    }    }
3382
3383  /* Call the main matching function, looping for a non-anchored regex after a  /* Call the main matching function, looping for a non-anchored regex after a
# Line 3021  for (;;) Line 3390  for (;;)
3390
3391    if ((options & PCRE_DFA_RESTART) == 0)    if ((options & PCRE_DFA_RESTART) == 0)
3392      {      {
3393      const uschar *save_end_subject = end_subject;      const pcre_uchar *save_end_subject = end_subject;
3394
3395      /* If firstline is TRUE, the start of the match is constrained to the first      /* If firstline is TRUE, the start of the match is constrained to the first
3396      line of a multiline string. Implement this by temporarily adjusting      line of a multiline string. Implement this by temporarily adjusting
# Line 3030  for (;;) Line 3399  for (;;)
3399
3400      if (firstline)      if (firstline)
3401        {        {
3402        USPTR t = current_subject;        PCRE_PUCHAR t = current_subject;
3403  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3404        if (utf8)        if (utf)
3405          {          {
3406          while (t < md->end_subject && !IS_NEWLINE(t))          while (t < md->end_subject && !IS_NEWLINE(t))
3407            {            {
3408            t++;            t++;
3409            while (t < end_subject && (*t & 0xc0) == 0x80) t++;            ACROSSCHAR(t < end_subject, *t, t++);
3410            }            }
3411          }          }
3412        else        else
# Line 3048  for (;;) Line 3417  for (;;)
3417
3418      /* There are some optimizations that avoid running the match if a known      /* There are some optimizations that avoid running the match if a known
3419      starting point is not found. However, there is an option that disables      starting point is not found. However, there is an option that disables
3420      these, for testing and for ensuring that all callouts do actually occur. */      these, for testing and for ensuring that all callouts do actually occur.
3421        The option can be set in the regex by (*NO_START_OPT) or passed in
3422        match-time options. */
3423
3424      if ((options & PCRE_NO_START_OPTIMIZE) == 0)      if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
3425        {        {
3426        /* Advance to a known first byte. */        /* Advance to a known first char. */
3427
3428        if (first_byte >= 0)        if (has_first_char)
3429          {          {
3430          if (first_byte_caseless)          if (first_char != first_char2)
3431            while (current_subject < end_subject &&            while (current_subject < end_subject &&
3432                   lcc[*current_subject] != first_byte)                *current_subject != first_char && *current_subject != first_char2)
3433              current_subject++;              current_subject++;
3434          else          else
3435            while (current_subject < end_subject &&            while (current_subject < end_subject &&
3436                   *current_subject != first_byte)                   *current_subject != first_char)
3437              current_subject++;              current_subject++;
3438          }          }
3439
# Line 3072  for (;;) Line 3443  for (;;)
3443          {          {
3444          if (current_subject > md->start_subject + start_offset)          if (current_subject > md->start_subject + start_offset)
3445            {            {
3446  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3447            if (utf8)            if (utf)
3448              {              {
3449              while (current_subject < end_subject &&              while (current_subject < end_subject &&
3450                     !WAS_NEWLINE(current_subject))                     !WAS_NEWLINE(current_subject))
3451                {                {
3452                current_subject++;                current_subject++;
3453                while(current_subject < end_subject &&                ACROSSCHAR(current_subject < end_subject, *current_subject,
3454                      (*current_subject & 0xc0) == 0x80)                  current_subject++);
current_subject++;
3455                }                }
3456              }              }
3457            else            else
# Line 3108  for (;;) Line 3478  for (;;)
3478          while (current_subject < end_subject)          while (current_subject < end_subject)
3479            {            {
3480            register unsigned int c = *current_subject;            register unsigned int c = *current_subject;
3481            if ((start_bits[c/8] & (1 << (c&7))) == 0) current_subject++;  #ifndef COMPILE_PCRE8
3482              else break;            if (c > 255) c = 255;
3483    #endif
3484              if ((start_bits[c/8] & (1 << (c&7))) == 0)
3485                {
3486                current_subject++;
3487    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3488                /* In non 8-bit mode, the iteration will stop for
3489                characters > 255 at the beginning or not stop at all. */
3490                if (utf)
3491                  ACROSSCHAR(current_subject < end_subject, *current_subject,
3492                    current_subject++);
3493    #endif
3494                }
3495              else break;
3496            }            }
3497          }          }
3498        }        }
# Line 3122  for (;;) Line 3505  for (;;)
3505      disabling is explicitly requested (and of course, by the test above, this      disabling is explicitly requested (and of course, by the test above, this
3506      code is not obeyed when restarting after a partial match). */      code is not obeyed when restarting after a partial match). */
3507
3508      if ((options & PCRE_NO_START_OPTIMIZE) == 0 &&      if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0 &&
3509          (options & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) == 0)          (options & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) == 0)
3510        {        {
3511        /* If the pattern was studied, a minimum subject length may be set. This        /* If the pattern was studied, a minimum subject length may be set. This
# Line 3134  for (;;) Line 3517  for (;;)
3517            (pcre_uint32)(end_subject - current_subject) < study->minlength)            (pcre_uint32)(end_subject - current_subject) < study->minlength)
3518          return PCRE_ERROR_NOMATCH;          return PCRE_ERROR_NOMATCH;
3519
3520        /* If req_byte is set, we know that that character must appear in the        /* If req_char is set, we know that that character must appear in the
3521        subject for the match to succeed. If the first character is set, req_byte        subject for the match to succeed. If the first character is set, req_char
3522        must be later in the subject; otherwise the test starts at the match        must be later in the subject; otherwise the test starts at the match
3523        point. This optimization can save a huge amount of work in patterns with        point. This optimization can save a huge amount of work in patterns with
3524        nested unlimited repeats that aren't going to match. Writing separate        nested unlimited repeats that aren't going to match. Writing separate
# Line 3147  for (;;) Line 3530  for (;;)
3530        patterns. This showed up when somebody was matching /^C/ on a 32-megabyte        patterns. This showed up when somebody was matching /^C/ on a 32-megabyte
3531        string... so we don't do this when the string is sufficiently long. */        string... so we don't do this when the string is sufficiently long. */
3532
3533        if (req_byte >= 0 && end_subject - current_subject < REQ_BYTE_MAX)        if (has_req_char && end_subject - current_subject < REQ_BYTE_MAX)
3534          {          {
3535          register const uschar *p = current_subject + ((first_byte >= 0)? 1 : 0);          register PCRE_PUCHAR p = current_subject + (has_first_char? 1:0);
3536
3537          /* We don't need to repeat the search if we haven't yet reached the          /* We don't need to repeat the search if we haven't yet reached the
3538          place we found it at last time. */          place we found it at last time. */
3539
3540          if (p > req_byte_ptr)          if (p > req_char_ptr)
3541            {            {
3542            if (req_byte_caseless)            if (req_char != req_char2)
3543              {              {
3544              while (p < end_subject)              while (p < end_subject)
3545                {                {
3546                register int pp = *p++;                register int pp = *p++;
3547                if (pp == req_byte || pp == req_byte2) { p--; break; }                if (pp == req_char || pp == req_char2) { p--; break; }
3548                }                }
3549              }              }
3550            else            else
3551              {              {
3552              while (p < end_subject)              while (p < end_subject)
3553                {                {
3554                if (*p++ == req_byte) { p--; break; }                if (*p++ == req_char) { p--; break; }
3555                }                }
3556              }              }
3557
# Line 3181  for (;;) Line 3564  for (;;)
3564            found it, so that we don't search again next time round the loop if            found it, so that we don't search again next time round the loop if
3565            the start hasn't passed this character yet. */            the start hasn't passed this character yet. */
3566
3567            req_byte_ptr = p;            req_char_ptr = p;
3568            }            }
3569          }          }
3570        }        }
# Line 3190  for (;;) Line 3573  for (;;)
3573    /* OK, now we can do the business */    /* OK, now we can do the business */
3574
3575    md->start_used_ptr = current_subject;    md->start_used_ptr = current_subject;
3576      md->recursive = NULL;
3577
3578    rc = internal_dfa_exec(    rc = internal_dfa_exec(
3579      md,                                /* fixed match data */      md,                                /* fixed match data */
# Line 3200  for (;;) Line 3584  for (;;)
3584      offsetcount,                       /* size of same */      offsetcount,                       /* size of same */
3585      workspace,                         /* workspace vector */      workspace,                         /* workspace vector */
3586      wscount,                           /* size of same */      wscount,                           /* size of same */
3587      re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL), /* ims flags */      0);                                /* function recurse level */
0,                                 /* function recurse level */
0);                                /* regex recurse level */
3588
3589    /* Anything other than "no match" means we are done, always; otherwise, carry    /* Anything other than "no match" means we are done, always; otherwise, carry
3590    on only if not anchored. */    on only if not anchored. */
# Line 3214  for (;;) Line 3596  for (;;)
3596
3597    if (firstline && IS_NEWLINE(current_subject)) break;    if (firstline && IS_NEWLINE(current_subject)) break;
3598    current_subject++;    current_subject++;
3599    if (utf8)  #ifdef SUPPORT_UTF
3600      if (utf)
3601      {      {
3602      while (current_subject < end_subject && (*current_subject & 0xc0) == 0x80)      ACROSSCHAR(current_subject < end_subject, *current_subject,
3603        current_subject++;        current_subject++);
3604      }      }
3605    #endif
3606    if (current_subject > end_subject) break;    if (current_subject > end_subject) break;
3607
3608    /* If we have just passed a CR and we are now at a LF, and the pattern does    /* If we have just passed a CR and we are now at a LF, and the pattern does

