# Diff of /code/trunk/pcre_dfa_exec.c

revision 510 by ph10, Sat Mar 27 17:45:29 2010 UTC revision 1364 by ph10, Sat Oct 5 15:45:11 2013 UTC
# Line 7  and semantics are as close as possible t Line 7  and semantics are as close as possible t
7  below for why this module is different).  below for why this module is different).
8
9                         Written by Philip Hazel                         Written by Philip Hazel
10             Copyright (c) 1997-2010 University of Cambridge             Copyright (c) 1997-2013 University of Cambridge
11
12  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
13  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 38  POSSIBILITY OF SUCH DAMAGE. Line 38  POSSIBILITY OF SUCH DAMAGE.
38  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
39  */  */
40

41  /* This module contains the external function pcre_dfa_exec(), which is an  /* This module contains the external function pcre_dfa_exec(), which is an
42  alternative matching function that uses a sort of DFA algorithm (not a true  alternative matching function that uses a sort of DFA algorithm (not a true
43  FSM). This is NOT Perl- compatible, but it has advantages in certain  FSM). This is NOT Perl-compatible, but it has advantages in certain
44  applications. */  applications. */
45
46
# Line 113  small value. Non-zero values in the tabl Line 112  small value. Non-zero values in the tabl
112  the character is to be found. ***NOTE*** If the start of this table is  the character is to be found. ***NOTE*** If the start of this table is
113  modified, the three tables that follow must also be modified. */  modified, the three tables that follow must also be modified. */
114
115  static const uschar coptable[] = {  static const pcre_uint8 coptable[] = {
116    0,                             /* End                                    */    0,                             /* End                                    */
117    0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */    0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */
118    0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */    0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */
# Line 121  static const uschar coptable[] = { Line 120  static const uschar coptable[] = {
120    0, 0,                          /* \P, \p                                 */    0, 0,                          /* \P, \p                                 */
121    0, 0, 0, 0, 0,                 /* \R, \H, \h, \V, \v                     */    0, 0, 0, 0, 0,                 /* \R, \H, \h, \V, \v                     */
122    0,                             /* \X                                     */    0,                             /* \X                                     */
123    0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, \$                      */    0, 0, 0, 0, 0, 0,              /* \Z, \z, \$, \$M, ^, ^M                   */
124    1,                             /* Char                                   */    1,                             /* Char                                   */
125    1,                             /* Charnc                                 */    1,                             /* Chari                                  */
126    1,                             /* not                                    */    1,                             /* not                                    */
127      1,                             /* noti                                   */
128    /* Positive single-char repeats                                          */    /* Positive single-char repeats                                          */
129    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
130    3, 3, 3,                       /* upto, minupto, exact                   */    1+IMM2_SIZE, 1+IMM2_SIZE,      /* upto, minupto                          */
131    1, 1, 1, 3,                    /* *+, ++, ?+, upto+                      */    1+IMM2_SIZE,                   /* exact                                  */
132      1, 1, 1, 1+IMM2_SIZE,          /* *+, ++, ?+, upto+                      */
133      1, 1, 1, 1, 1, 1,              /* *I, *?I, +I, +?I, ?I, ??I              */
134      1+IMM2_SIZE, 1+IMM2_SIZE,      /* upto I, minupto I                      */
135      1+IMM2_SIZE,                   /* exact I                                */
136      1, 1, 1, 1+IMM2_SIZE,          /* *+I, ++I, ?+I, upto+I                  */
137    /* Negative single-char repeats - only for chars < 256                   */    /* Negative single-char repeats - only for chars < 256                   */
138    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */
139    3, 3, 3,                       /* NOT upto, minupto, exact               */    1+IMM2_SIZE, 1+IMM2_SIZE,      /* NOT upto, minupto                      */
140    1, 1, 1, 3,                    /* NOT *+, ++, ?+, updo+                  */    1+IMM2_SIZE,                   /* NOT exact                              */
141      1, 1, 1, 1+IMM2_SIZE,          /* NOT *+, ++, ?+, upto+                  */
142      1, 1, 1, 1, 1, 1,              /* NOT *I, *?I, +I, +?I, ?I, ??I          */
143      1+IMM2_SIZE, 1+IMM2_SIZE,      /* NOT upto I, minupto I                  */
144      1+IMM2_SIZE,                   /* NOT exact I                            */
145      1, 1, 1, 1+IMM2_SIZE,          /* NOT *+I, ++I, ?+I, upto+I              */
146    /* Positive type repeats                                                 */    /* Positive type repeats                                                 */
147    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */
148    3, 3, 3,                       /* Type upto, minupto, exact              */    1+IMM2_SIZE, 1+IMM2_SIZE,      /* Type upto, minupto                     */
149    1, 1, 1, 3,                    /* Type *+, ++, ?+, upto+                 */    1+IMM2_SIZE,                   /* Type exact                             */
150      1, 1, 1, 1+IMM2_SIZE,          /* Type *+, ++, ?+, upto+                 */
151    /* Character class & ref repeats                                         */    /* Character class & ref repeats                                         */
152    0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */    0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */
153    0, 0,                          /* CRRANGE, CRMINRANGE                    */    0, 0,                          /* CRRANGE, CRMINRANGE                    */
# Line 144  static const uschar coptable[] = { Line 155  static const uschar coptable[] = {
155    0,                             /* NCLASS                                 */    0,                             /* NCLASS                                 */
156    0,                             /* XCLASS - variable length               */    0,                             /* XCLASS - variable length               */
157    0,                             /* REF                                    */    0,                             /* REF                                    */
158      0,                             /* REFI                                   */
159      0,                             /* DNREF                                  */
160      0,                             /* DNREFI                                 */
161    0,                             /* RECURSE                                */    0,                             /* RECURSE                                */
162    0,                             /* CALLOUT                                */    0,                             /* CALLOUT                                */
163    0,                             /* Alt                                    */    0,                             /* Alt                                    */
164    0,                             /* Ket                                    */    0,                             /* Ket                                    */
165    0,                             /* KetRmax                                */    0,                             /* KetRmax                                */
166    0,                             /* KetRmin                                */    0,                             /* KetRmin                                */
167      0,                             /* KetRpos                                */
168      0,                             /* Reverse                                */
169    0,                             /* Assert                                 */    0,                             /* Assert                                 */
170    0,                             /* Assert not                             */    0,                             /* Assert not                             */
171    0,                             /* Assert behind                          */    0,                             /* Assert behind                          */
172    0,                             /* Assert behind not                      */    0,                             /* Assert behind not                      */
173    0,                             /* Reverse                                */    0, 0,                          /* ONCE, ONCE_NC                          */
174    0, 0, 0, 0,                    /* ONCE, BRA, CBRA, COND                  */    0, 0, 0, 0, 0,                 /* BRA, BRAPOS, CBRA, CBRAPOS, COND       */
175    0, 0, 0,                       /* SBRA, SCBRA, SCOND                     */    0, 0, 0, 0, 0,                 /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND  */
176    0, 0,                          /* CREF, NCREF                            */    0, 0,                          /* CREF, NCREF                            */
177    0, 0,                          /* RREF, NRREF                            */    0, 0,                          /* RREF, NRREF                            */
178    0,                             /* DEF                                    */    0,                             /* DEF                                    */
179    0, 0,                          /* BRAZERO, BRAMINZERO                    */    0, 0, 0,                       /* BRAZERO, BRAMINZERO, BRAPOSZERO        */
180    0, 0, 0,                       /* MARK, PRUNE, PRUNE_ARG,                */    0, 0, 0,                       /* MARK, PRUNE, PRUNE_ARG                 */
181    0, 0, 0, 0,                    /* SKIP, SKIP_ARG, THEN, THEN_ARG,        */    0, 0, 0, 0,                    /* SKIP, SKIP_ARG, THEN, THEN_ARG         */
182    0, 0, 0, 0, 0                  /* COMMIT, FAIL, ACCEPT, CLOSE, SKIPZERO  */    0, 0, 0, 0,                    /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT    */
183      0, 0                           /* CLOSE, SKIPZERO  */
184  };  };
185
186  /* This table identifies those opcodes that inspect a character. It is used to  /* This table identifies those opcodes that inspect a character. It is used to
# Line 171  remember the fact that a character could Line 188  remember the fact that a character could
188  the subject is reached. ***NOTE*** If the start of this table is modified, the  the subject is reached. ***NOTE*** If the start of this table is modified, the
189  two tables that follow must also be modified. */  two tables that follow must also be modified. */
190
191  static const uschar poptable[] = {  static const pcre_uint8 poptable[] = {
192    0,                             /* End                                    */    0,                             /* End                                    */
193    0, 0, 0, 1, 1,                 /* \A, \G, \K, \B, \b                     */    0, 0, 0, 1, 1,                 /* \A, \G, \K, \B, \b                     */
194    1, 1, 1, 1, 1, 1,              /* \D, \d, \S, \s, \W, \w                 */    1, 1, 1, 1, 1, 1,              /* \D, \d, \S, \s, \W, \w                 */
# Line 179  static const uschar poptable[] = { Line 196  static const uschar poptable[] = {
196    1, 1,                          /* \P, \p                                 */    1, 1,                          /* \P, \p                                 */
197    1, 1, 1, 1, 1,                 /* \R, \H, \h, \V, \v                     */    1, 1, 1, 1, 1,                 /* \R, \H, \h, \V, \v                     */
198    1,                             /* \X                                     */    1,                             /* \X                                     */
199    0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, \$                      */    0, 0, 0, 0, 0, 0,              /* \Z, \z, \$, \$M, ^, ^M                   */
200    1,                             /* Char                                   */    1,                             /* Char                                   */
201    1,                             /* Charnc                                 */    1,                             /* Chari                                  */
202    1,                             /* not                                    */    1,                             /* not                                    */
203      1,                             /* noti                                   */
204    /* Positive single-char repeats                                          */    /* Positive single-char repeats                                          */
205    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
206    1, 1, 1,                       /* upto, minupto, exact                   */    1, 1, 1,                       /* upto, minupto, exact                   */
207    1, 1, 1, 1,                    /* *+, ++, ?+, upto+                      */    1, 1, 1, 1,                    /* *+, ++, ?+, upto+                      */
208      1, 1, 1, 1, 1, 1,              /* *I, *?I, +I, +?I, ?I, ??I              */
209      1, 1, 1,                       /* upto I, minupto I, exact I             */
210      1, 1, 1, 1,                    /* *+I, ++I, ?+I, upto+I                  */
211    /* Negative single-char repeats - only for chars < 256                   */    /* Negative single-char repeats - only for chars < 256                   */
212    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */
213    1, 1, 1,                       /* NOT upto, minupto, exact               */    1, 1, 1,                       /* NOT upto, minupto, exact               */
214    1, 1, 1, 1,                    /* NOT *+, ++, ?+, upto+                  */    1, 1, 1, 1,                    /* NOT *+, ++, ?+, upto+                  */
215      1, 1, 1, 1, 1, 1,              /* NOT *I, *?I, +I, +?I, ?I, ??I          */
216      1, 1, 1,                       /* NOT upto I, minupto I, exact I         */
217      1, 1, 1, 1,                    /* NOT *+I, ++I, ?+I, upto+I              */
218    /* Positive type repeats                                                 */    /* Positive type repeats                                                 */
219    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */
220    1, 1, 1,                       /* Type upto, minupto, exact              */    1, 1, 1,                       /* Type upto, minupto, exact              */
# Line 202  static const uschar poptable[] = { Line 226  static const uschar poptable[] = {
226    1,                             /* NCLASS                                 */    1,                             /* NCLASS                                 */
227    1,                             /* XCLASS - variable length               */    1,                             /* XCLASS - variable length               */
228    0,                             /* REF                                    */    0,                             /* REF                                    */
229      0,                             /* REFI                                   */
230      0,                             /* DNREF                                  */
231      0,                             /* DNREFI                                 */
232    0,                             /* RECURSE                                */    0,                             /* RECURSE                                */
233    0,                             /* CALLOUT                                */    0,                             /* CALLOUT                                */
234    0,                             /* Alt                                    */    0,                             /* Alt                                    */
235    0,                             /* Ket                                    */    0,                             /* Ket                                    */
236    0,                             /* KetRmax                                */    0,                             /* KetRmax                                */
237    0,                             /* KetRmin                                */    0,                             /* KetRmin                                */
238      0,                             /* KetRpos                                */
239      0,                             /* Reverse                                */
240    0,                             /* Assert                                 */    0,                             /* Assert                                 */
241    0,                             /* Assert not                             */    0,                             /* Assert not                             */
242    0,                             /* Assert behind                          */    0,                             /* Assert behind                          */
243    0,                             /* Assert behind not                      */    0,                             /* Assert behind not                      */
244    0,                             /* Reverse                                */    0, 0,                          /* ONCE, ONCE_NC                          */
245    0, 0, 0, 0,                    /* ONCE, BRA, CBRA, COND                  */    0, 0, 0, 0, 0,                 /* BRA, BRAPOS, CBRA, CBRAPOS, COND       */
246    0, 0, 0,                       /* SBRA, SCBRA, SCOND                     */    0, 0, 0, 0, 0,                 /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND  */
247    0, 0,                          /* CREF, NCREF                            */    0, 0,                          /* CREF, NCREF                            */
248    0, 0,                          /* RREF, NRREF                            */    0, 0,                          /* RREF, NRREF                            */
249    0,                             /* DEF                                    */    0,                             /* DEF                                    */
250    0, 0,                          /* BRAZERO, BRAMINZERO                    */    0, 0, 0,                       /* BRAZERO, BRAMINZERO, BRAPOSZERO        */
251    0, 0, 0,                       /* MARK, PRUNE, PRUNE_ARG,                */    0, 0, 0,                       /* MARK, PRUNE, PRUNE_ARG                 */
252    0, 0, 0, 0,                    /* SKIP, SKIP_ARG, THEN, THEN_ARG,        */    0, 0, 0, 0,                    /* SKIP, SKIP_ARG, THEN, THEN_ARG         */
253    0, 0, 0, 0, 0                  /* COMMIT, FAIL, ACCEPT, CLOSE, SKIPZERO  */    0, 0, 0, 0,                    /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT    */
254      0, 0                           /* CLOSE, SKIPZERO                        */
255  };  };
256
257  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
258  and \w */  and \w */
259
260  static const uschar toptable1[] = {  static const pcre_uint8 toptable1[] = {
261    0, 0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
262    ctype_digit, ctype_digit,    ctype_digit, ctype_digit,
263    ctype_space, ctype_space,    ctype_space, ctype_space,
# Line 235  static const uschar toptable1[] = { Line 265  static const uschar toptable1[] = {
265    0, 0                            /* OP_ANY, OP_ALLANY */    0, 0                            /* OP_ANY, OP_ALLANY */
266  };  };
267
268  static const uschar toptable2[] = {  static const pcre_uint8 toptable2[] = {
269    0, 0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
270    ctype_digit, 0,    ctype_digit, 0,
271    ctype_space, 0,    ctype_space, 0,
# Line 252  these structures in, is a vector of ints Line 282  these structures in, is a vector of ints
282  typedef struct stateblock {  typedef struct stateblock {
283    int offset;                     /* Offset to opcode */    int offset;                     /* Offset to opcode */
284    int count;                      /* Count for repeats */    int count;                      /* Count for repeats */
int ims;                        /* ims flag bits */
285    int data;                       /* Some use extra data */    int data;                       /* Some use extra data */
286  } stateblock;  } stateblock;
287
288  #define INTS_PER_STATEBLOCK  (sizeof(stateblock)/sizeof(int))  #define INTS_PER_STATEBLOCK  (int)(sizeof(stateblock)/sizeof(int))
289
290
291  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
# Line 275  Returns:       nothing Line 304  Returns:       nothing
304  */  */
305
306  static void  static void
307  pchars(unsigned char *p, int length, FILE *f)  pchars(const pcre_uchar *p, int length, FILE *f)
308  {  {
309  int c;  pcre_uint32 c;
310  while (length-- > 0)  while (length-- > 0)
311    {    {
312    if (isprint(c = *(p++)))    if (isprint(c = *(p++)))
313      fprintf(f, "%c", c);      fprintf(f, "%c", c);
314    else    else
315      fprintf(f, "\\x%02x", c);      fprintf(f, "\\x{%02x}", c);
316    }    }
317  }  }
318  #endif  #endif
# Line 308  Arguments: Line 337  Arguments:
337    offsetcount       size of same    offsetcount       size of same
338    workspace         vector of workspace    workspace         vector of workspace
339    wscount           size of same    wscount           size of same
ims               the current ims flags
340    rlevel            function call recursion level    rlevel            function call recursion level
recursing         regex recursive call level
341
342  Returns:            > 0 => number of match offset pairs placed in offsets  Returns:            > 0 => number of match offset pairs placed in offsets
343                      = 0 => offsets overflowed; longest matches are present                      = 0 => offsets overflowed; longest matches are present
# Line 325  for the current character, one for the f Line 352  for the current character, one for the f
352      { \      { \
353      next_active_state->offset = (x); \      next_active_state->offset = (x); \
354      next_active_state->count  = (y); \      next_active_state->count  = (y); \
next_active_state->ims    = ims; \
355      next_active_state++; \      next_active_state++; \
356      DPRINTF(("%.*sADD_ACTIVE(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \      DPRINTF(("%.*sADD_ACTIVE(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \
357      } \      } \
# Line 336  for the current character, one for the f Line 362  for the current character, one for the f
362      { \      { \
363      next_active_state->offset = (x); \      next_active_state->offset = (x); \
364      next_active_state->count  = (y); \      next_active_state->count  = (y); \
next_active_state->ims    = ims; \
365      next_active_state->data   = (z); \      next_active_state->data   = (z); \
366      next_active_state++; \      next_active_state++; \
367      DPRINTF(("%.*sADD_ACTIVE_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \      DPRINTF(("%.*sADD_ACTIVE_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \
# Line 348  for the current character, one for the f Line 373  for the current character, one for the f
373      { \      { \
374      next_new_state->offset = (x); \      next_new_state->offset = (x); \
375      next_new_state->count  = (y); \      next_new_state->count  = (y); \
next_new_state->ims    = ims; \
376      next_new_state++; \      next_new_state++; \
377      DPRINTF(("%.*sADD_NEW(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \      DPRINTF(("%.*sADD_NEW(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \
378      } \      } \
# Line 359  for the current character, one for the f Line 383  for the current character, one for the f
383      { \      { \
384      next_new_state->offset = (x); \      next_new_state->offset = (x); \
385      next_new_state->count  = (y); \      next_new_state->count  = (y); \
next_new_state->ims    = ims; \
386      next_new_state->data   = (z); \      next_new_state->data   = (z); \
387      next_new_state++; \      next_new_state++; \
388      DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \      DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d) line %d\n", rlevel*2-2, SP, \
389          (x), (y), (z), __LINE__)); \
390      } \      } \
391    else return PCRE_ERROR_DFA_WSSIZE    else return PCRE_ERROR_DFA_WSSIZE
392
# Line 371  for the current character, one for the f Line 395  for the current character, one for the f
395  static int  static int
396  internal_dfa_exec(  internal_dfa_exec(
397    dfa_match_data *md,    dfa_match_data *md,
398    const uschar *this_start_code,    const pcre_uchar *this_start_code,
399    const uschar *current_subject,    const pcre_uchar *current_subject,
400    int start_offset,    int start_offset,
401    int *offsets,    int *offsets,
402    int offsetcount,    int offsetcount,
403    int *workspace,    int *workspace,
404    int wscount,    int wscount,
405    int ims,    int  rlevel)
int  rlevel,
int  recursing)
406  {  {
407  stateblock *active_states, *new_states, *temp_states;  stateblock *active_states, *new_states, *temp_states;
408  stateblock *next_active_state, *next_new_state;  stateblock *next_active_state, *next_new_state;
409
410  const uschar *ctypes, *lcc, *fcc;  const pcre_uint8 *ctypes, *lcc, *fcc;
411  const uschar *ptr;  const pcre_uchar *ptr;
412  const uschar *end_code, *first_op;  const pcre_uchar *end_code, *first_op;
413
414    dfa_recursion_info new_recursive;
415
416  int active_count, new_count, match_count;  int active_count, new_count, match_count;
417
418  /* Some fields in the md block are frequently referenced, so we load them into  /* Some fields in the md block are frequently referenced, so we load them into
419  independent variables in the hope that this will perform better. */  independent variables in the hope that this will perform better. */
420
421  const uschar *start_subject = md->start_subject;  const pcre_uchar *start_subject = md->start_subject;
422  const uschar *end_subject = md->end_subject;  const pcre_uchar *end_subject = md->end_subject;
423  const uschar *start_code = md->start_code;  const pcre_uchar *start_code = md->start_code;
424
425  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
426  BOOL utf8 = (md->poptions & PCRE_UTF8) != 0;  BOOL utf = (md->poptions & PCRE_UTF8) != 0;
427  #else  #else
428  BOOL utf8 = FALSE;  BOOL utf = FALSE;
429  #endif  #endif
430
431    BOOL reset_could_continue = FALSE;
432
433  rlevel++;  rlevel++;
434  offsetcount &= (-2);  offsetcount &= (-2);
435
# Line 412  wscount = (wscount - (wscount % (INTS_PE Line 438  wscount = (wscount - (wscount % (INTS_PE
438            (2 * INTS_PER_STATEBLOCK);            (2 * INTS_PER_STATEBLOCK);
439
440  DPRINTF(("\n%.*s---------------------\n"  DPRINTF(("\n%.*s---------------------\n"
441    "%.*sCall to internal_dfa_exec f=%d r=%d\n",    "%.*sCall to internal_dfa_exec f=%d\n",
442    rlevel*2-2, SP, rlevel*2-2, SP, rlevel, recursing));    rlevel*2-2, SP, rlevel*2-2, SP, rlevel));
443
444  ctypes = md->tables + ctypes_offset;  ctypes = md->tables + ctypes_offset;
445  lcc = md->tables + lcc_offset;  lcc = md->tables + lcc_offset;
# Line 426  next_new_state = new_states = active_sta Line 452  next_new_state = new_states = active_sta
452  new_count = 0;  new_count = 0;
453
454  first_op = this_start_code + 1 + LINK_SIZE +  first_op = this_start_code + 1 + LINK_SIZE +
455    ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA)? 2:0);    ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
456        *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
457        ? IMM2_SIZE:0);
458
459  /* The first thing in any (sub) pattern is a bracket of some sort. Push all  /* The first thing in any (sub) pattern is a bracket of some sort. Push all
460  the alternative states onto the list, and find out where the end is. This  the alternative states onto the list, and find out where the end is. This
# Line 454  if (*first_op == OP_REVERSE) Line 482  if (*first_op == OP_REVERSE)
482    /* If we can't go back the amount required for the longest lookbehind    /* If we can't go back the amount required for the longest lookbehind
483    pattern, go back as far as we can; some alternatives may still be viable. */    pattern, go back as far as we can; some alternatives may still be viable. */
484
485  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
486    /* In character mode we have to step back character by character */    /* In character mode we have to step back character by character */
487
488    if (utf8)    if (utf)
489      {      {
490      for (gone_back = 0; gone_back < max_back; gone_back++)      for (gone_back = 0; gone_back < max_back; gone_back++)
491        {        {
492        if (current_subject <= start_subject) break;        if (current_subject <= start_subject) break;
493        current_subject--;        current_subject--;
494        while (current_subject > start_subject &&        ACROSSCHAR(current_subject > start_subject, *current_subject, current_subject--);
(*current_subject & 0xc0) == 0x80)
current_subject--;
495        }        }
496      }      }
497    else    else
# Line 475  if (*first_op == OP_REVERSE) Line 501  if (*first_op == OP_REVERSE)
501
502      {      {
503      gone_back = (current_subject - max_back < start_subject)?      gone_back = (current_subject - max_back < start_subject)?
504        current_subject - start_subject : max_back;        (int)(current_subject - start_subject) : max_back;
505      current_subject -= gone_back;      current_subject -= gone_back;
506      }      }
507
# Line 492  if (*first_op == OP_REVERSE) Line 518  if (*first_op == OP_REVERSE)
519      if (back <= gone_back)      if (back <= gone_back)
520        {        {
521        int bstate = end_code - start_code + 2 + 2*LINK_SIZE;        int bstate = (int)(end_code - start_code + 2 + 2*LINK_SIZE);
523        }        }
524      end_code += GET(end_code, 1);      end_code += GET(end_code, 1);
# Line 525  else Line 551  else
551    else    else
552      {      {
553      int length = 1 + LINK_SIZE +      int length = 1 + LINK_SIZE +
554        ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA)? 2:0);        ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
555            *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
556            ? IMM2_SIZE:0);
557      do      do
558        {        {
559        ADD_NEW(end_code - start_code + length, 0);        ADD_NEW((int)(end_code - start_code + length), 0);
560        end_code += GET(end_code, 1);        end_code += GET(end_code, 1);
562        }        }
# Line 538  else Line 566  else
566
567  workspace[0] = 0;    /* Bit indicating which vector is current */  workspace[0] = 0;    /* Bit indicating which vector is current */
568
569  DPRINTF(("%.*sEnd state = %d\n", rlevel*2-2, SP, end_code - start_code));  DPRINTF(("%.*sEnd state = %d\n", rlevel*2-2, SP, (int)(end_code - start_code)));
570
571  /* Loop for scanning the subject */  /* Loop for scanning the subject */
572
# Line 547  for (;;) Line 575  for (;;)
575    {    {
576    int i, j;    int i, j;
577    int clen, dlen;    int clen, dlen;
578    unsigned int c, d;    pcre_uint32 c, d;
579    int forced_fail = 0;    int forced_fail = 0;
580    BOOL could_continue = FALSE;    BOOL partial_newline = FALSE;
581      BOOL could_continue = reset_could_continue;
582      reset_could_continue = FALSE;
583
584    /* Make the new state list into the active state list and empty the    /* Make the new state list into the active state list and empty the
585    new state list. */    new state list. */
# Line 565  for (;;) Line 595  for (;;)
595
596  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
597    printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP);    printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP);
598    pchars((uschar *)ptr, strlen((char *)ptr), stdout);    pchars(ptr, STRLEN_UC(ptr), stdout);
599    printf("\"\n");    printf("\"\n");
600
601    printf("%.*sActive states: ", rlevel*2-2, SP);    printf("%.*sActive states: ", rlevel*2-2, SP);
# Line 585  for (;;) Line 615  for (;;)
615
616    if (ptr < end_subject)    if (ptr < end_subject)
617      {      {
618      clen = 1;        /* Number of bytes in the character */      clen = 1;        /* Number of data items in the character */
619  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
620      if (utf8) { GETCHARLEN(c, ptr, clen); } else      GETCHARLENTEST(c, ptr, clen);
621  #endif  /* SUPPORT_UTF8 */  #else
622      c = *ptr;      c = *ptr;
623    #endif  /* SUPPORT_UTF */
624      }      }
625    else    else
626      {      {
# Line 605  for (;;) Line 636  for (;;)
636    for (i = 0; i < active_count; i++)    for (i = 0; i < active_count; i++)
637      {      {
638      stateblock *current_state = active_states + i;      stateblock *current_state = active_states + i;
639      const uschar *code;      BOOL caseless = FALSE;
640        const pcre_uchar *code;
641      int state_offset = current_state->offset;      int state_offset = current_state->offset;
642      int count, codevalue, rrc;      int codevalue, rrc;
643        int count;
644
645  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
646      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
# Line 616  for (;;) Line 649  for (;;)
649          else printf("0x%02x\n", c);          else printf("0x%02x\n", c);
650  #endif  #endif
651
/* This variable is referred to implicity in the ADD_xxx macros. */

ims = current_state->ims;

652      /* A negative offset is a special case meaning "hold off going to this      /* A negative offset is a special case meaning "hold off going to this
653      (negated) state until the number of characters in the data field have      (negated) state until the number of characters in the data field have
654      been skipped". */      been skipped". If the could_continue flag was passed over from a previous
655        state, arrange for it to passed on. */
656
657      if (state_offset < 0)      if (state_offset < 0)
658        {        {
# Line 631  for (;;) Line 661  for (;;)
661          DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP));          DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP));
663            current_state->data - 1);            current_state->data - 1);
664            if (could_continue) reset_could_continue = TRUE;
665          continue;          continue;
666          }          }
667        else        else
# Line 670  for (;;) Line 701  for (;;)
701      permitted.      permitted.
702
703      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
704      argument that is not a data character - but is always one byte long. We      argument that is not a data character - but is always one byte long because
705      have to take special action to deal with  \P, \p, \H, \h, \V, \v and \X in      the values are small. We have to take special action to deal with  \P, \p,
706      this case. To keep the other cases fast, convert these ones to new opcodes.      \H, \h, \V, \v and \X in this case. To keep the other cases fast, convert
707      */      these ones to new opcodes. */
708
709      if (coptable[codevalue] > 0)      if (coptable[codevalue] > 0)
710        {        {
711        dlen = 1;        dlen = 1;
712  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
713        if (utf8) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else        if (utf) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else
714  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
715        d = code[coptable[codevalue]];        d = code[coptable[codevalue]];
716        if (codevalue >= OP_TYPESTAR)        if (codevalue >= OP_TYPESTAR)
717          {          {
# Line 725  for (;;) Line 756  for (;;)
756
757  /* ========================================================================== */  /* ========================================================================== */
758        /* Reached a closing bracket. If not at the end of the pattern, carry        /* Reached a closing bracket. If not at the end of the pattern, carry
759        on with the next opcode. Otherwise, unless we have an empty string and        on with the next opcode. For repeating opcodes, also add the repeat
760          state. Note that KETRPOS will always be encountered at the end of the
761          subpattern, because the possessive subpattern repeats are always handled
762          using recursive calls. Thus, it never adds any new states.
763
764          At the end of the (sub)pattern, unless we have an empty string and
765        PCRE_NOTEMPTY is set, or PCRE_NOTEMPTY_ATSTART is set and we are at the        PCRE_NOTEMPTY is set, or PCRE_NOTEMPTY_ATSTART is set and we are at the
766        start of the subject, save the match data, shifting up all previous        start of the subject, save the match data, shifting up all previous
767        matches so we always have the longest first. */        matches so we always have the longest first. */
# Line 733  for (;;) Line 769  for (;;)
769        case OP_KET:        case OP_KET:
770        case OP_KETRMIN:        case OP_KETRMIN:
771        case OP_KETRMAX:        case OP_KETRMAX:
772          case OP_KETRPOS:
773        if (code != end_code)        if (code != end_code)
774          {          {
# Line 749  for (;;) Line 786  for (;;)
786                  current_subject > start_subject + md->start_offset)))                  current_subject > start_subject + md->start_offset)))
787            {            {
788            if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;            if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;
789              else if (match_count > 0 && ++match_count * 2 >= offsetcount)              else if (match_count > 0 && ++match_count * 2 > offsetcount)
790                match_count = 0;                match_count = 0;
791            count = ((match_count == 0)? offsetcount : match_count * 2) - 2;            count = ((match_count == 0)? offsetcount : match_count * 2) - 2;
792            if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int));            if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int));
793            if (offsetcount >= 2)            if (offsetcount >= 2)
794              {              {
795              offsets[0] = current_subject - start_subject;              offsets[0] = (int)(current_subject - start_subject);
796              offsets[1] = ptr - start_subject;              offsets[1] = (int)(ptr - start_subject);
797              DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,              DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,
798                offsets[1] - offsets[0], current_subject));                offsets[1] - offsets[0], (char *)current_subject));
799              }              }
800            if ((md->moptions & PCRE_DFA_SHORTEST) != 0)            if ((md->moptions & PCRE_DFA_SHORTEST) != 0)
801              {              {
# Line 778  for (;;) Line 815  for (;;)
815        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
816        case OP_ALT:        case OP_ALT:
817        do { code += GET(code, 1); } while (*code == OP_ALT);        do { code += GET(code, 1); } while (*code == OP_ALT);
819        break;        break;
820
821        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
# Line 786  for (;;) Line 823  for (;;)
823        case OP_SBRA:        case OP_SBRA:
824        do        do
825          {          {
827          code += GET(code, 1);          code += GET(code, 1);
828          }          }
829        while (*code == OP_ALT);        while (*code == OP_ALT);
# Line 795  for (;;) Line 832  for (;;)
832        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
833        case OP_CBRA:        case OP_CBRA:
834        case OP_SCBRA:        case OP_SCBRA:
836        code += GET(code, 1);        code += GET(code, 1);
837        while (*code == OP_ALT)        while (*code == OP_ALT)
838          {          {
840          code += GET(code, 1);          code += GET(code, 1);
841          }          }
842        break;        break;
# Line 810  for (;;) Line 847  for (;;)
848        code += 1 + GET(code, 2);        code += 1 + GET(code, 2);
849        while (*code == OP_ALT) code += GET(code, 1);        while (*code == OP_ALT) code += GET(code, 1);
851        break;        break;
852
853        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
854        case OP_SKIPZERO:        case OP_SKIPZERO:
855        code += 1 + GET(code, 2);        code += 1 + GET(code, 2);
856        while (*code == OP_ALT) code += GET(code, 1);        while (*code == OP_ALT) code += GET(code, 1);
858        break;        break;
859
860        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
861        case OP_CIRC:        case OP_CIRC:
862        if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||        if (ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0)
((ims & PCRE_MULTILINE) != 0 &&
ptr != end_subject &&
WAS_NEWLINE(ptr)))
863          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
864        break;        break;
865
866        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
867        case OP_EOD:        case OP_CIRCM:
868        if (ptr >= end_subject) { ADD_ACTIVE(state_offset + 1, 0); }        if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||
869              (ptr != end_subject && WAS_NEWLINE(ptr)))
870            { ADD_ACTIVE(state_offset + 1, 0); }
871        break;        break;
872
873        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
874        case OP_OPT:        case OP_EOD:
875        ims = code[1];        if (ptr >= end_subject)
876        ADD_ACTIVE(state_offset + 2, 0);          {
877            if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
878              could_continue = TRUE;
879            else { ADD_ACTIVE(state_offset + 1, 0); }
880            }
881        break;        break;
882
883        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
# Line 860  for (;;) Line 900  for (;;)
900        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
901        case OP_ANY:        case OP_ANY:
902        if (clen > 0 && !IS_NEWLINE(ptr))        if (clen > 0 && !IS_NEWLINE(ptr))
903          { ADD_NEW(state_offset + 1, 0); }          {
904            if (ptr + 1 >= md->end_subject &&
905                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
906                NLBLOCK->nltype == NLTYPE_FIXED &&
907                NLBLOCK->nllen == 2 &&
908                c == NLBLOCK->nl[0])
909              {
910              could_continue = partial_newline = TRUE;
911              }
912            else
913              {
915              }
916            }
917        break;        break;
918
919        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
# Line 871  for (;;) Line 924  for (;;)
924
925        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
926        case OP_EODN:        case OP_EODN:
927        if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - md->nllen))        if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
928            could_continue = TRUE;
929          else if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - md->nllen))
930          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
931        break;        break;
932
# Line 879  for (;;) Line 934  for (;;)
934        case OP_DOLL:        case OP_DOLL:
935        if ((md->moptions & PCRE_NOTEOL) == 0)        if ((md->moptions & PCRE_NOTEOL) == 0)
936          {          {
937          if (clen == 0 ||          if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
938              could_continue = TRUE;
939            else if (clen == 0 ||
940              ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr) &&              ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr) &&
941                 ((ims & PCRE_MULTILINE) != 0 || ptr == end_subject - md->nllen)                 (ptr == end_subject - md->nllen)
942              ))              ))
943            { ADD_ACTIVE(state_offset + 1, 0); }            { ADD_ACTIVE(state_offset + 1, 0); }
944            else if (ptr + 1 >= md->end_subject &&
945                     (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
946                     NLBLOCK->nltype == NLTYPE_FIXED &&
947                     NLBLOCK->nllen == 2 &&
948                     c == NLBLOCK->nl[0])
949              {
950              if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
951                {
952                reset_could_continue = TRUE;
953                ADD_NEW_DATA(-(state_offset + 1), 0, 1);
954                }
955              else could_continue = partial_newline = TRUE;
956              }
957            }
958          break;
959
960          /*-----------------------------------------------------------------*/
961          case OP_DOLLM:
962          if ((md->moptions & PCRE_NOTEOL) == 0)
963            {
964            if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
965              could_continue = TRUE;
966            else if (clen == 0 ||
967                ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr)))
968              { ADD_ACTIVE(state_offset + 1, 0); }
969            else if (ptr + 1 >= md->end_subject &&
970                     (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
971                     NLBLOCK->nltype == NLTYPE_FIXED &&
972                     NLBLOCK->nllen == 2 &&
973                     c == NLBLOCK->nl[0])
974              {
975              if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
976                {
977                reset_could_continue = TRUE;
978                ADD_NEW_DATA(-(state_offset + 1), 0, 1);
979                }
980              else could_continue = partial_newline = TRUE;
981              }
982          }          }
983        else if ((ims & PCRE_MULTILINE) != 0 && IS_NEWLINE(ptr))        else if (IS_NEWLINE(ptr))
984          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
985        break;        break;
986
# Line 916  for (;;) Line 1011  for (;;)
1011
1012          if (ptr > start_subject)          if (ptr > start_subject)
1013            {            {
1014            const uschar *temp = ptr - 1;            const pcre_uchar *temp = ptr - 1;
1015            if (temp < md->start_used_ptr) md->start_used_ptr = temp;            if (temp < md->start_used_ptr) md->start_used_ptr = temp;
1016  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
1017            if (utf8) BACKCHAR(temp);            if (utf) { BACKCHAR(temp); }
1018  #endif  #endif
1019            GETCHARTEST(d, temp);            GETCHARTEST(d, temp);
1020    #ifdef SUPPORT_UCP
1021              if ((md->poptions & PCRE_UCP) != 0)
1022                {
1023                if (d == '_') left_word = TRUE; else
1024                  {
1025                  int cat = UCD_CATEGORY(d);
1026                  left_word = (cat == ucp_L || cat == ucp_N);
1027                  }
1028                }
1029              else
1030    #endif
1031            left_word = d < 256 && (ctypes[d] & ctype_word) != 0;            left_word = d < 256 && (ctypes[d] & ctype_word) != 0;
1032            }            }
1033          else left_word = 0;          else left_word = FALSE;
1034
1035          if (clen > 0)          if (clen > 0)
1036              {
1037    #ifdef SUPPORT_UCP
1038              if ((md->poptions & PCRE_UCP) != 0)
1039                {
1040                if (c == '_') right_word = TRUE; else
1041                  {
1042                  int cat = UCD_CATEGORY(c);
1043                  right_word = (cat == ucp_L || cat == ucp_N);
1044                  }
1045                }
1046              else
1047    #endif
1048            right_word = c < 256 && (ctypes[c] & ctype_word) != 0;            right_word = c < 256 && (ctypes[c] & ctype_word) != 0;
1049          else right_word = 0;            }
1050            else right_word = FALSE;
1051
1052          if ((left_word == right_word) == (codevalue == OP_NOT_WORD_BOUNDARY))          if ((left_word == right_word) == (codevalue == OP_NOT_WORD_BOUNDARY))
1053            { ADD_ACTIVE(state_offset + 1, 0); }            { ADD_ACTIVE(state_offset + 1, 0); }
# Line 947  for (;;) Line 1066  for (;;)
1066        if (clen > 0)        if (clen > 0)
1067          {          {
1068          BOOL OK;          BOOL OK;
1069            const pcre_uint32 *cp;
1070          const ucd_record * prop = GET_UCD(c);          const ucd_record * prop = GET_UCD(c);
1071          switch(code[1])          switch(code[1])
1072            {            {
# Line 955  for (;;) Line 1075  for (;;)
1075            break;            break;
1076
1077            case PT_LAMP:            case PT_LAMP:
1078            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
1079                   prop->chartype == ucp_Lt;
1080            break;            break;
1081
1082            case PT_GC:            case PT_GC:
1083            OK = _pcre_ucp_gentype[prop->chartype] == code[2];            OK = PRIV(ucp_gentype)[prop->chartype] == code[2];
1084            break;            break;
1085
1086            case PT_PC:            case PT_PC:
# Line 970  for (;;) Line 1091  for (;;)
1091            OK = prop->script == code[2];            OK = prop->script == code[2];
1092            break;            break;
1093
1094              /* These are specials for combination cases. */
1095
1096              case PT_ALNUM:
1097              OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1098                   PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1099              break;
1100
1101              /* Perl space used to exclude VT, but from Perl 5.18 it is included,
1102              which means that Perl space and POSIX space are now identical. PCRE
1103              was changed at release 8.34. */
1104
1105              case PT_SPACE:    /* Perl space */
1106              case PT_PXSPACE:  /* POSIX space */
1107              OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1108                   c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
1109                   c == CHAR_FF || c == CHAR_CR;
1110              break;
1111
1112              case PT_WORD:
1113              OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1114                   PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1115                   c == CHAR_UNDERSCORE;
1116              break;
1117
1118              case PT_CLIST:
1119              cp = PRIV(ucd_caseless_sets) + code[2];
1120              for (;;)
1121                {
1122                if (c < *cp) { OK = FALSE; break; }
1123                if (c == *cp++) { OK = TRUE; break; }
1124                }
1125              break;
1126
1127              case PT_UCNC:
1128              OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
1129                   c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
1130                   c >= 0xe000;
1131              break;
1132
1133            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
1134
1135            default:            default:
# Line 997  for (;;) Line 1157  for (;;)
1157        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1158        if (clen > 0)        if (clen > 0)
1159          {          {
1160          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1161                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1162                NLBLOCK->nltype == NLTYPE_FIXED &&
1163                NLBLOCK->nllen == 2 &&
1164                c == NLBLOCK->nl[0])
1165              {
1166              could_continue = partial_newline = TRUE;
1167              }
1168            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1169              (c < 256 &&              (c < 256 &&
1170                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1171                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
# Line 1020  for (;;) Line 1188  for (;;)
1189        if (clen > 0)        if (clen > 0)
1190          {          {
1191          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1192                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1193                NLBLOCK->nltype == NLTYPE_FIXED &&
1194                NLBLOCK->nllen == 2 &&
1195                c == NLBLOCK->nl[0])
1196              {
1197              could_continue = partial_newline = TRUE;
1198              }
1199            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1200              (c < 256 &&              (c < 256 &&
1201                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1202                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
# Line 1042  for (;;) Line 1218  for (;;)
1219        if (clen > 0)        if (clen > 0)
1220          {          {
1221          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1222                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1223                NLBLOCK->nltype == NLTYPE_FIXED &&
1224                NLBLOCK->nllen == 2 &&
1225                c == NLBLOCK->nl[0])
1226              {
1227              could_continue = partial_newline = TRUE;
1228              }
1229            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1230              (c < 256 &&              (c < 256 &&
1231                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1232                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
# Line 1062  for (;;) Line 1246  for (;;)
1246        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1247        if (clen > 0)        if (clen > 0)
1248          {          {
1249          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1250                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1251                NLBLOCK->nltype == NLTYPE_FIXED &&
1252                NLBLOCK->nllen == 2 &&
1253                c == NLBLOCK->nl[0])
1254              {
1255              could_continue = partial_newline = TRUE;
1256              }
1257            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1258              (c < 256 &&              (c < 256 &&
1259                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1260                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
1261            {            {
1262            if (++count >= GET2(code, 1))            if (++count >= (int)GET2(code, 1))
1263              { ADD_NEW(state_offset + 4, 0); }              { ADD_NEW(state_offset + 1 + IMM2_SIZE + 1, 0); }
1264            else            else
1266            }            }
# Line 1079  for (;;) Line 1271  for (;;)
1271        case OP_TYPEUPTO:        case OP_TYPEUPTO:
1272        case OP_TYPEMINUPTO:        case OP_TYPEMINUPTO:
1273        case OP_TYPEPOSUPTO:        case OP_TYPEPOSUPTO:
1275        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1276        if (clen > 0)        if (clen > 0)
1277          {          {
1278          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1279                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1280                NLBLOCK->nltype == NLTYPE_FIXED &&
1281                NLBLOCK->nllen == 2 &&
1282                c == NLBLOCK->nl[0])
1283              {
1284              could_continue = partial_newline = TRUE;
1285              }
1286            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1287              (c < 256 &&              (c < 256 &&
1288                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1289                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
# Line 1093  for (;;) Line 1293  for (;;)
1293              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
1294              next_active_state--;              next_active_state--;
1295              }              }
1296            if (++count >= GET2(code, 1))            if (++count >= (int)GET2(code, 1))
1297              { ADD_NEW(state_offset + 4, 0); }              { ADD_NEW(state_offset + 2 + IMM2_SIZE, 0); }
1298            else            else
1300            }            }
# Line 1116  for (;;) Line 1316  for (;;)
1316        if (clen > 0)        if (clen > 0)
1317          {          {
1318          BOOL OK;          BOOL OK;
1319            const pcre_uint32 *cp;
1320          const ucd_record * prop = GET_UCD(c);          const ucd_record * prop = GET_UCD(c);
1321          switch(code[2])          switch(code[2])
1322            {            {
# Line 1124  for (;;) Line 1325  for (;;)
1325            break;            break;
1326
1327            case PT_LAMP:            case PT_LAMP:
1328            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
1329                prop->chartype == ucp_Lt;
1330            break;            break;
1331
1332            case PT_GC:            case PT_GC:
1333            OK = _pcre_ucp_gentype[prop->chartype] == code[3];            OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
1334            break;            break;
1335
1336            case PT_PC:            case PT_PC:
# Line 1139  for (;;) Line 1341  for (;;)
1341            OK = prop->script == code[3];            OK = prop->script == code[3];
1342            break;            break;
1343
1344              /* These are specials for combination cases. */
1345
1346              case PT_ALNUM:
1347              OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1348                   PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1349              break;
1350
1351              /* Perl space used to exclude VT, but from Perl 5.18 it is included,
1352              which means that Perl space and POSIX space are now identical. PCRE
1353              was changed at release 8.34. */
1354
1355              case PT_SPACE:    /* Perl space */
1356              case PT_PXSPACE:  /* POSIX space */
1357              OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1358                   c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
1359                   c == CHAR_FF || c == CHAR_CR;
1360              break;
1361
1362              case PT_WORD:
1363              OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1364                   PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1365                   c == CHAR_UNDERSCORE;
1366              break;
1367
1368              case PT_CLIST:
1369              cp = PRIV(ucd_caseless_sets) + code[3];
1370              for (;;)
1371                {
1372                if (c < *cp) { OK = FALSE; break; }
1373                if (c == *cp++) { OK = TRUE; break; }
1374                }
1375              break;
1376
1377              case PT_UCNC:
1378              OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
1379                   c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
1380                   c >= 0xe000;
1381              break;
1382
1383            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
1384
1385            default:            default:
# Line 1165  for (;;) Line 1406  for (;;)
1406        case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:        case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
1407        count = current_state->count;  /* Already matched */        count = current_state->count;  /* Already matched */
1408        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1409        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0)
1410          {          {
1411          const uschar *nptr = ptr + clen;          int lgb, rgb;
1412            const pcre_uchar *nptr = ptr + clen;
1413          int ncount = 0;          int ncount = 0;
1414          if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)          if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
1415            {            {
1416            active_count--;           /* Remove non-match possibility */            active_count--;           /* Remove non-match possibility */
1417            next_active_state--;            next_active_state--;
1418            }            }
1419            lgb = UCD_GRAPHBREAK(c);
1420          while (nptr < end_subject)          while (nptr < end_subject)
1421            {            {
1422            int nd;            dlen = 1;
1423            int ndlen = 1;            if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
1424            GETCHARLEN(nd, nptr, ndlen);            rgb = UCD_GRAPHBREAK(d);
1425            if (UCD_CATEGORY(nd) != ucp_M) break;            if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
1426            ncount++;            ncount++;
1427            nptr += ndlen;            lgb = rgb;
1428              nptr += dlen;
1429            }            }
1430          count++;          count++;
# Line 1200  for (;;) Line 1444  for (;;)
1444          int ncount = 0;          int ncount = 0;
1445          switch (c)          switch (c)
1446            {            {
1447            case 0x000b:            case CHAR_VT:
1448            case 0x000c:            case CHAR_FF:
1449            case 0x0085:            case CHAR_NEL:
1450    #ifndef EBCDIC
1451            case 0x2028:            case 0x2028:
1452            case 0x2029:            case 0x2029:
1453    #endif  /* Not EBCDIC */
1454            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1455            goto ANYNL01;            goto ANYNL01;
1456
1457            case 0x000d:            case CHAR_CR:
1458            if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;            if (ptr + 1 < end_subject && RAWUCHARTEST(ptr + 1) == CHAR_LF) ncount = 1;
1459            /* Fall through */            /* Fall through */
1460
1461            ANYNL01:            ANYNL01:
1462            case 0x000a:            case CHAR_LF:
1463            if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)            if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
1464              {              {
1465              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
# Line 1240  for (;;) Line 1486  for (;;)
1486          BOOL OK;          BOOL OK;
1487          switch (c)          switch (c)
1488            {            {
1489            case 0x000a:            VSPACE_CASES:
case 0x000b:
case 0x000c:
case 0x000d:
case 0x0085:
case 0x2028:
case 0x2029:
1490            OK = TRUE;            OK = TRUE;
1491            break;            break;
1492
# Line 1279  for (;;) Line 1519  for (;;)
1519          BOOL OK;          BOOL OK;
1520          switch (c)          switch (c)
1521            {            {
1522            case 0x09:      /* HT */            HSPACE_CASES:
case 0x20:      /* SPACE */
case 0xa0:      /* NBSP */
case 0x1680:    /* OGHAM SPACE MARK */
case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
case 0x2000:    /* EN QUAD */
case 0x2001:    /* EM QUAD */
case 0x2002:    /* EN SPACE */
case 0x2003:    /* EM SPACE */
case 0x2004:    /* THREE-PER-EM SPACE */
case 0x2005:    /* FOUR-PER-EM SPACE */
case 0x2006:    /* SIX-PER-EM SPACE */
case 0x2007:    /* FIGURE SPACE */
case 0x2008:    /* PUNCTUATION SPACE */
case 0x2009:    /* THIN SPACE */
case 0x200A:    /* HAIR SPACE */
case 0x202f:    /* NARROW NO-BREAK SPACE */
case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
case 0x3000:    /* IDEOGRAPHIC SPACE */
1523            OK = TRUE;            OK = TRUE;
1524            break;            break;
1525
# Line 1338  for (;;) Line 1560  for (;;)
1560        if (clen > 0)        if (clen > 0)
1561          {          {
1562          BOOL OK;          BOOL OK;
1563            const pcre_uint32 *cp;
1564          const ucd_record * prop = GET_UCD(c);          const ucd_record * prop = GET_UCD(c);
1565          switch(code[2])          switch(code[2])
1566            {            {
# Line 1346  for (;;) Line 1569  for (;;)
1569            break;            break;
1570
1571            case PT_LAMP:            case PT_LAMP:
1572            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
1573                prop->chartype == ucp_Lt;
1574            break;            break;
1575
1576            case PT_GC:            case PT_GC:
1577            OK = _pcre_ucp_gentype[prop->chartype] == code[3];            OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
1578            break;            break;
1579
1580            case PT_PC:            case PT_PC:
# Line 1361  for (;;) Line 1585  for (;;)
1585            OK = prop->script == code[3];            OK = prop->script == code[3];
1586            break;            break;
1587
1588              /* These are specials for combination cases. */
1589
1590              case PT_ALNUM:
1591              OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1592                   PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1593              break;
1594
1595              /* Perl space used to exclude VT, but from Perl 5.18 it is included,
1596              which means that Perl space and POSIX space are now identical. PCRE
1597              was changed at release 8.34. */
1598
1599              case PT_SPACE:    /* Perl space */
1600              case PT_PXSPACE:  /* POSIX space */
1601              OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1602                   c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
1603                   c == CHAR_FF || c == CHAR_CR;
1604              break;
1605
1606              case PT_WORD:
1607              OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1608                   PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1609                   c == CHAR_UNDERSCORE;
1610              break;
1611
1612              case PT_CLIST:
1613              cp = PRIV(ucd_caseless_sets) + code[3];
1614              for (;;)
1615                {
1616                if (c < *cp) { OK = FALSE; break; }
1617                if (c == *cp++) { OK = TRUE; break; }
1618                }
1619              break;
1620
1621              case PT_UCNC:
1622              OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
1623                   c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
1624                   c >= 0xe000;
1625              break;
1626
1627            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
1628
1629            default:            default:
# Line 1396  for (;;) Line 1659  for (;;)
1659        QS2:        QS2:
1660
1662        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0)
1663          {          {
1664          const uschar *nptr = ptr + clen;          int lgb, rgb;
1665            const pcre_uchar *nptr = ptr + clen;
1666          int ncount = 0;          int ncount = 0;
1667          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
1668              codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)              codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)
# Line 1406  for (;;) Line 1670  for (;;)
1670            active_count--;           /* Remove non-match possibility */            active_count--;           /* Remove non-match possibility */
1671            next_active_state--;            next_active_state--;
1672            }            }
1673            lgb = UCD_GRAPHBREAK(c);
1674          while (nptr < end_subject)          while (nptr < end_subject)
1675            {            {
1676            int nd;            dlen = 1;
1677            int ndlen = 1;            if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
1678            GETCHARLEN(nd, nptr, ndlen);            rgb = UCD_GRAPHBREAK(d);
1679            if (UCD_CATEGORY(nd) != ucp_M) break;            if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
1680            ncount++;            ncount++;
1681            nptr += ndlen;            lgb = rgb;
1682              nptr += dlen;
1683            }            }
1685          }          }
# Line 1439  for (;;) Line 1705  for (;;)
1705          int ncount = 0;          int ncount = 0;
1706          switch (c)          switch (c)
1707            {            {
1708            case 0x000b:            case CHAR_VT:
1709            case 0x000c:            case CHAR_FF:
1710            case 0x0085:            case CHAR_NEL:
1711    #ifndef EBCDIC
1712            case 0x2028:            case 0x2028:
1713            case 0x2029:            case 0x2029:
1714    #endif  /* Not EBCDIC */
1715            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1716            goto ANYNL02;            goto ANYNL02;
1717
1718            case 0x000d:            case CHAR_CR:
1719            if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;            if (ptr + 1 < end_subject && RAWUCHARTEST(ptr + 1) == CHAR_LF) ncount = 1;
1720            /* Fall through */            /* Fall through */
1721
1722            ANYNL02:            ANYNL02:
1723            case 0x000a:            case CHAR_LF:
1724            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||
1725                codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)                codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
1726              {              {
1727              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
1728              next_active_state--;              next_active_state--;
1729              }              }
1731            break;            break;
1732
1733            default:            default:
# Line 1487  for (;;) Line 1755  for (;;)
1755          BOOL OK;          BOOL OK;
1756          switch (c)          switch (c)
1757            {            {
1758            case 0x000a:            VSPACE_CASES:
case 0x000b:
case 0x000c:
case 0x000d:
case 0x0085:
case 0x2028:
case 0x2029:
1759            OK = TRUE;            OK = TRUE;
1760            break;            break;
1761
# Line 1509  for (;;) Line 1771  for (;;)
1771              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
1772              next_active_state--;              next_active_state--;
1773              }              }
1775            }            }
1776          }          }
1777        break;        break;
# Line 1533  for (;;) Line 1795  for (;;)
1795          BOOL OK;          BOOL OK;
1796          switch (c)          switch (c)
1797            {            {
1798            case 0x09:      /* HT */            HSPACE_CASES:
case 0x20:      /* SPACE */
case 0xa0:      /* NBSP */
case 0x1680:    /* OGHAM SPACE MARK */
case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
case 0x2000:    /* EN QUAD */
case 0x2001:    /* EM QUAD */
case 0x2002:    /* EN SPACE */
case 0x2003:    /* EM SPACE */
case 0x2004:    /* THREE-PER-EM SPACE */
case 0x2005:    /* FOUR-PER-EM SPACE */
case 0x2006:    /* SIX-PER-EM SPACE */
case 0x2007:    /* FIGURE SPACE */
case 0x2008:    /* PUNCTUATION SPACE */
case 0x2009:    /* THIN SPACE */
case 0x200A:    /* HAIR SPACE */
case 0x202f:    /* NARROW NO-BREAK SPACE */
case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
case 0x3000:    /* IDEOGRAPHIC SPACE */
1799            OK = TRUE;            OK = TRUE;
1800            break;            break;
1801
# Line 1568  for (;;) Line 1812  for (;;)
1812              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
1813              next_active_state--;              next_active_state--;
1814              }              }
1816            }            }
1817          }          }
1818        break;        break;
# Line 1580  for (;;) Line 1824  for (;;)
1824        case OP_PROP_EXTRA + OP_TYPEMINUPTO:        case OP_PROP_EXTRA + OP_TYPEMINUPTO:
1825        case OP_PROP_EXTRA + OP_TYPEPOSUPTO:        case OP_PROP_EXTRA + OP_TYPEPOSUPTO:
1826        if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)
1827          { ADD_ACTIVE(state_offset + 6, 0); }          { ADD_ACTIVE(state_offset + 1 + IMM2_SIZE + 3, 0); }
1828        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1829        if (clen > 0)        if (clen > 0)
1830          {          {
1831          BOOL OK;          BOOL OK;
1832            const pcre_uint32 *cp;
1833          const ucd_record * prop = GET_UCD(c);          const ucd_record * prop = GET_UCD(c);
1834          switch(code[4])          switch(code[1 + IMM2_SIZE + 1])
1835            {            {
1836            case PT_ANY:            case PT_ANY:
1837            OK = TRUE;            OK = TRUE;
1838            break;            break;
1839
1840            case PT_LAMP:            case PT_LAMP:
1841            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
1842                prop->chartype == ucp_Lt;
1843            break;            break;
1844
1845            case PT_GC:            case PT_GC:
1846            OK = _pcre_ucp_gentype[prop->chartype] == code[5];            OK = PRIV(ucp_gentype)[prop->chartype] == code[1 + IMM2_SIZE + 2];
1847            break;            break;
1848
1849            case PT_PC:            case PT_PC:
1850            OK = prop->chartype == code[5];            OK = prop->chartype == code[1 + IMM2_SIZE + 2];
1851            break;            break;
1852
1853            case PT_SC:            case PT_SC:
1854            OK = prop->script == code[5];            OK = prop->script == code[1 + IMM2_SIZE + 2];
1855              break;
1856
1857              /* These are specials for combination cases. */
1858
1859              case PT_ALNUM:
1860              OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1861                   PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1862              break;
1863
1864              /* Perl space used to exclude VT, but from Perl 5.18 it is included,
1865              which means that Perl space and POSIX space are now identical. PCRE
1866              was changed at release 8.34. */
1867
1868              case PT_SPACE:    /* Perl space */
1869              case PT_PXSPACE:  /* POSIX space */
1870              OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1871                   c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
1872                   c == CHAR_FF || c == CHAR_CR;
1873              break;
1874
1875              case PT_WORD:
1876              OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1877                   PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1878                   c == CHAR_UNDERSCORE;
1879              break;
1880
1881              case PT_CLIST:
1882              cp = PRIV(ucd_caseless_sets) + code[1 + IMM2_SIZE + 2];
1883              for (;;)
1884                {
1885                if (c < *cp) { OK = FALSE; break; }
1886                if (c == *cp++) { OK = TRUE; break; }
1887                }
1888              break;
1889
1890              case PT_UCNC:
1891              OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
1892                   c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
1893                   c >= 0xe000;
1894            break;            break;
1895
1896            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
# Line 1622  for (;;) Line 1907  for (;;)
1907              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
1908              next_active_state--;              next_active_state--;
1909              }              }
1910            if (++count >= GET2(code, 1))            if (++count >= (int)GET2(code, 1))
1911              { ADD_NEW(state_offset + 6, 0); }              { ADD_NEW(state_offset + 1 + IMM2_SIZE + 3, 0); }
1912            else            else
1914            }            }
# Line 1636  for (;;) Line 1921  for (;;)
1921        case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:        case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:
1922        case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:        case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:
1923        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
1924          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1925        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1926        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0)
1927          {          {
1928          const uschar *nptr = ptr + clen;          int lgb, rgb;
1929            const pcre_uchar *nptr = ptr + clen;
1930          int ncount = 0;          int ncount = 0;
1931          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
1932            {            {
1933            active_count--;           /* Remove non-match possibility */            active_count--;           /* Remove non-match possibility */
1934            next_active_state--;            next_active_state--;
1935            }            }
1936            lgb = UCD_GRAPHBREAK(c);
1937          while (nptr < end_subject)          while (nptr < end_subject)
1938            {            {
1939            int nd;            dlen = 1;
1940            int ndlen = 1;            if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
1941            GETCHARLEN(nd, nptr, ndlen);            rgb = UCD_GRAPHBREAK(d);
1942            if (UCD_CATEGORY(nd) != ucp_M) break;            if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
1943            ncount++;            ncount++;
1944            nptr += ndlen;            lgb = rgb;
1945              nptr += dlen;
1946            }            }
1947          if (++count >= GET2(code, 1))          if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
1948            { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }              reset_could_continue = TRUE;
1949            if (++count >= (int)GET2(code, 1))
1950              { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
1951          else          else
1953          }          }
# Line 1670  for (;;) Line 1960  for (;;)
1960        case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:        case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:
1961        case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:        case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:
1962        if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)
1963          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1964        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1965        if (clen > 0)        if (clen > 0)
1966          {          {
1967          int ncount = 0;          int ncount = 0;
1968          switch (c)          switch (c)
1969            {            {
1970            case 0x000b:            case CHAR_VT:
1971            case 0x000c:            case CHAR_FF:
1972            case 0x0085:            case CHAR_NEL:
1973    #ifndef EBCDIC
1974            case 0x2028:            case 0x2028:
1975            case 0x2029:            case 0x2029:
1976    #endif  /* Not EBCDIC */
1977            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1978            goto ANYNL03;            goto ANYNL03;
1979
1980            case 0x000d:            case CHAR_CR:
1981            if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;            if (ptr + 1 < end_subject && RAWUCHARTEST(ptr + 1) == CHAR_LF) ncount = 1;
1982            /* Fall through */            /* Fall through */
1983
1984            ANYNL03:            ANYNL03:
1985            case 0x000a:            case CHAR_LF:
1986            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)
1987              {              {
1988              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
1989              next_active_state--;              next_active_state--;
1990              }              }
1991            if (++count >= GET2(code, 1))            if (++count >= (int)GET2(code, 1))
1992              { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }              { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
1993            else            else
1995            break;            break;
# Line 1714  for (;;) Line 2006  for (;;)
2006        case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:        case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:
2007        case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:        case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:
2008        if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)
2009          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
2010        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
2011        if (clen > 0)        if (clen > 0)
2012          {          {
2013          BOOL OK;          BOOL OK;
2014          switch (c)          switch (c)
2015            {            {
2016            case 0x000a:            VSPACE_CASES:
case 0x000b:
case 0x000c:
case 0x000d:
case 0x0085:
case 0x2028:
case 0x2029:
2017            OK = TRUE;            OK = TRUE;
2018            break;            break;
2019
# Line 1742  for (;;) Line 2028  for (;;)
2028              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
2029              next_active_state--;              next_active_state--;
2030              }              }
2031            if (++count >= GET2(code, 1))            if (++count >= (int)GET2(code, 1))
2032              { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }              { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
2033            else            else
2035            }            }
# Line 1756  for (;;) Line 2042  for (;;)
2042        case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:        case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:
2043        case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:        case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:
2044        if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)
2045          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
2046        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
2047        if (clen > 0)        if (clen > 0)
2048          {          {
2049          BOOL OK;          BOOL OK;
2050          switch (c)          switch (c)
2051            {            {
2052            case 0x09:      /* HT */            HSPACE_CASES:
case 0x20:      /* SPACE */
case 0xa0:      /* NBSP */
case 0x1680:    /* OGHAM SPACE MARK */
case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
case 0x2000:    /* EN QUAD */
case 0x2001:    /* EM QUAD */
case 0x2002:    /* EN SPACE */
case 0x2003:    /* EM SPACE */
case 0x2004:    /* THREE-PER-EM SPACE */
case 0x2005:    /* FOUR-PER-EM SPACE */
case 0x2006:    /* SIX-PER-EM SPACE */
case 0x2007:    /* FIGURE SPACE */
case 0x2008:    /* PUNCTUATION SPACE */
case 0x2009:    /* THIN SPACE */
case 0x200A:    /* HAIR SPACE */
case 0x202f:    /* NARROW NO-BREAK SPACE */
case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
case 0x3000:    /* IDEOGRAPHIC SPACE */
2053            OK = TRUE;            OK = TRUE;
2054            break;            break;
2055
# Line 1797  for (;;) Line 2065  for (;;)
2065              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
2066              next_active_state--;              next_active_state--;
2067              }              }
2068            if (++count >= GET2(code, 1))            if (++count >= (int)GET2(code, 1))
2069              { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }              { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
2070            else            else
2072            }            }
# Line 1817  for (;;) Line 2085  for (;;)
2085        break;        break;
2086
2087        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2088        case OP_CHARNC:        case OP_CHARI:
2089        if (clen == 0) break;        if (clen == 0) break;
2090
2091  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2092        if (utf8)        if (utf)
2093          {          {
2094          if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else          if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
2095            {            {
2096            unsigned int othercase;            unsigned int othercase;
2097            if (c < 128) othercase = fcc[c]; else            if (c < 128)
2098                othercase = fcc[c];
2099            /* If we have Unicode property support, we can use it to test the            else
2100            other case of the character. */              /* If we have Unicode property support, we can use it to test the
2101                other case of the character. */
2102  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2103            othercase = UCD_OTHERCASE(c);              othercase = UCD_OTHERCASE(c);
2104  #else  #else
2105            othercase = NOTACHAR;              othercase = NOTACHAR;
2106  #endif  #endif
2107
2108            if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }            if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }
2109            }            }
2110          }          }
2111        else        else
2112  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2113          /* Not UTF mode */
/* Non-UTF-8 mode */
2114          {          {
2115          if (lcc[c] == lcc[d]) { ADD_NEW(state_offset + 2, 0); }          if (TABLE_GET(c, lcc, c) == TABLE_GET(d, lcc, d))
2116              { ADD_NEW(state_offset + 2, 0); }
2117          }          }
2118        break;        break;
2119
# Line 1857  for (;;) Line 2125  for (;;)
2125        to wait for them to pass before continuing. */        to wait for them to pass before continuing. */
2126
2127        case OP_EXTUNI:        case OP_EXTUNI:
2128        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0)
2129          {          {
2130          const uschar *nptr = ptr + clen;          int lgb, rgb;
2131            const pcre_uchar *nptr = ptr + clen;
2132          int ncount = 0;          int ncount = 0;
2133            lgb = UCD_GRAPHBREAK(c);
2134          while (nptr < end_subject)          while (nptr < end_subject)
2135            {            {
2136            int nclen = 1;            dlen = 1;
2137            GETCHARLEN(c, nptr, nclen);            if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
2138            if (UCD_CATEGORY(c) != ucp_M) break;            rgb = UCD_GRAPHBREAK(d);
2139              if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
2140            ncount++;            ncount++;
2141            nptr += nclen;            lgb = rgb;
2142              nptr += dlen;
2143            }            }
2144            if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
2145                reset_could_continue = TRUE;
2147          }          }
2148        break;        break;
# Line 1882  for (;;) Line 2156  for (;;)
2156        case OP_ANYNL:        case OP_ANYNL:
2157        if (clen > 0) switch(c)        if (clen > 0) switch(c)
2158          {          {
2159          case 0x000b:          case CHAR_VT:
2160          case 0x000c:          case CHAR_FF:
2161          case 0x0085:          case CHAR_NEL:
2162    #ifndef EBCDIC
2163          case 0x2028:          case 0x2028:
2164          case 0x2029:          case 0x2029:
2165    #endif  /* Not EBCDIC */
2166          if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;          if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
2167
2168          case 0x000a:          case CHAR_LF:
2170          break;          break;
2171
2172          case 0x000d:          case CHAR_CR:
2173          if (ptr + 1 < end_subject && ptr[1] == 0x0a)          if (ptr + 1 >= end_subject)
2174              {
2176              if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
2177                reset_could_continue = TRUE;
2178              }
2179            else if (RAWUCHARTEST(ptr + 1) == CHAR_LF)
2180            {            {
2182            }            }
# Line 1910  for (;;) Line 2192  for (;;)
2192        case OP_NOT_VSPACE:        case OP_NOT_VSPACE:
2193        if (clen > 0) switch(c)        if (clen > 0) switch(c)
2194          {          {
2195          case 0x000a:          VSPACE_CASES:
case 0x000b:
case 0x000c:
case 0x000d:
case 0x0085:
case 0x2028:
case 0x2029:
2196          break;          break;
2197
2198          default:          default:
# Line 1929  for (;;) Line 2205  for (;;)
2205        case OP_VSPACE:        case OP_VSPACE:
2206        if (clen > 0) switch(c)        if (clen > 0) switch(c)
2207          {          {
2208          case 0x000a:          VSPACE_CASES:
case 0x000b:
case 0x000c:
case 0x000d:
case 0x0085:
case 0x2028:
case 0x2029:
2210          break;          break;
2211
2212          default: break;          default:
2213            break;
2214          }          }
2215        break;        break;
2216
# Line 1947  for (;;) Line 2218  for (;;)
2218        case OP_NOT_HSPACE:        case OP_NOT_HSPACE:
2219        if (clen > 0) switch(c)        if (clen > 0) switch(c)
2220          {          {
2221          case 0x09:      /* HT */          HSPACE_CASES:
case 0x20:      /* SPACE */
case 0xa0:      /* NBSP */
case 0x1680:    /* OGHAM SPACE MARK */
case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
case 0x2000:    /* EN QUAD */
case 0x2001:    /* EM QUAD */
case 0x2002:    /* EN SPACE */
case 0x2003:    /* EM SPACE */
case 0x2004:    /* THREE-PER-EM SPACE */
case 0x2005:    /* FOUR-PER-EM SPACE */
case 0x2006:    /* SIX-PER-EM SPACE */
case 0x2007:    /* FIGURE SPACE */
case 0x2008:    /* PUNCTUATION SPACE */
case 0x2009:    /* THIN SPACE */
case 0x200A:    /* HAIR SPACE */
case 0x202f:    /* NARROW NO-BREAK SPACE */
case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
case 0x3000:    /* IDEOGRAPHIC SPACE */
2222          break;          break;
2223
2224          default:          default:
# Line 1978  for (;;) Line 2231  for (;;)
2231        case OP_HSPACE:        case OP_HSPACE:
2232        if (clen > 0) switch(c)        if (clen > 0) switch(c)
2233          {          {
2234          case 0x09:      /* HT */          HSPACE_CASES:
case 0x20:      /* SPACE */
case 0xa0:      /* NBSP */
case 0x1680:    /* OGHAM SPACE MARK */
case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
case 0x2000:    /* EN QUAD */
case 0x2001:    /* EM QUAD */
case 0x2002:    /* EN SPACE */
case 0x2003:    /* EM SPACE */
case 0x2004:    /* THREE-PER-EM SPACE */
case 0x2005:    /* FOUR-PER-EM SPACE */
case 0x2006:    /* SIX-PER-EM SPACE */
case 0x2007:    /* FIGURE SPACE */
case 0x2008:    /* PUNCTUATION SPACE */
case 0x2009:    /* THIN SPACE */
case 0x200A:    /* HAIR SPACE */
case 0x202f:    /* NARROW NO-BREAK SPACE */
case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
case 0x3000:    /* IDEOGRAPHIC SPACE */
2236          break;          break;
2237
2238            default:
2239            break;
2240          }          }
2241        break;        break;
2242
2243        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2244        /* Match a negated single character. This is only used for one-byte        /* Match a negated single character casefully. */
characters, that is, we know that d < 256. The character we are
checking (c) can be multibyte. */
2245
2246        case OP_NOT:        case OP_NOT:
2247          if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); }
2248          break;
2249
2250          /*-----------------------------------------------------------------*/
2251          /* Match a negated single character caselessly. */
2252
2253          case OP_NOTI:
2254        if (clen > 0)        if (clen > 0)
2255          {          {
2256          unsigned int otherd = ((ims & PCRE_CASELESS) != 0)? fcc[d] : d;          unsigned int otherd;
2257          if (c != d && c != otherd) { ADD_NEW(state_offset + dlen + 1, 0); }  #ifdef SUPPORT_UTF
2258            if (utf && d >= 128)
2259              {
2260    #ifdef SUPPORT_UCP
2261              otherd = UCD_OTHERCASE(d);
2262    #endif  /* SUPPORT_UCP */
2263              }
2264            else
2265    #endif  /* SUPPORT_UTF */
2266            otherd = TABLE_GET(d, fcc, d);
2267            if (c != d && c != otherd)
2268              { ADD_NEW(state_offset + dlen + 1, 0); }
2269          }          }
2270        break;        break;
2271
2272        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2273          case OP_PLUSI:
2274          case OP_MINPLUSI:
2275          case OP_POSPLUSI:
2276          case OP_NOTPLUSI:
2277          case OP_NOTMINPLUSI:
2278          case OP_NOTPOSPLUSI:
2279          caseless = TRUE;
2280          codevalue -= OP_STARI - OP_STAR;
2281
2282          /* Fall through */
2283        case OP_PLUS:        case OP_PLUS:
2284        case OP_MINPLUS:        case OP_MINPLUS:
2285        case OP_POSPLUS:        case OP_POSPLUS:
# Line 2026  for (;;) Line 2290  for (;;)
2290        if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }
2291        if (clen > 0)        if (clen > 0)
2292          {          {
2293          unsigned int otherd = NOTACHAR;          pcre_uint32 otherd = NOTACHAR;
2294          if ((ims & PCRE_CASELESS) != 0)          if (caseless)
2295            {            {
2296  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2297            if (utf8 && d >= 128)            if (utf && d >= 128)
2298              {              {
2299  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2300              otherd = UCD_OTHERCASE(d);              otherd = UCD_OTHERCASE(d);
2301  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2302              }              }
2303            else            else
2304  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2305            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2306            }            }
2307          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2308            {            {
# Line 2055  for (;;) Line 2319  for (;;)
2319        break;        break;
2320
2321        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2322          case OP_QUERYI:
2323          case OP_MINQUERYI:
2324          case OP_POSQUERYI:
2325          case OP_NOTQUERYI:
2326          case OP_NOTMINQUERYI:
2327          case OP_NOTPOSQUERYI:
2328          caseless = TRUE;
2329          codevalue -= OP_STARI - OP_STAR;
2330          /* Fall through */
2331        case OP_QUERY:        case OP_QUERY:
2332        case OP_MINQUERY:        case OP_MINQUERY:
2333        case OP_POSQUERY:        case OP_POSQUERY:
# Line 2064  for (;;) Line 2337  for (;;)
2337        ADD_ACTIVE(state_offset + dlen + 1, 0);        ADD_ACTIVE(state_offset + dlen + 1, 0);
2338        if (clen > 0)        if (clen > 0)
2339          {          {
2340          unsigned int otherd = NOTACHAR;          pcre_uint32 otherd = NOTACHAR;
2341          if ((ims & PCRE_CASELESS) != 0)          if (caseless)
2342            {            {
2343  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2344            if (utf8 && d >= 128)            if (utf && d >= 128)
2345              {              {
2346  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2347              otherd = UCD_OTHERCASE(d);              otherd = UCD_OTHERCASE(d);
2348  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2349              }              }
2350            else            else
2351  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2352            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2353            }            }
2354          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2355            {            {
# Line 2091  for (;;) Line 2364  for (;;)
2364        break;        break;
2365
2366        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2367          case OP_STARI:
2368          case OP_MINSTARI:
2369          case OP_POSSTARI:
2370          case OP_NOTSTARI:
2371          case OP_NOTMINSTARI:
2372          case OP_NOTPOSSTARI:
2373          caseless = TRUE;
2374          codevalue -= OP_STARI - OP_STAR;
2375          /* Fall through */
2376        case OP_STAR:        case OP_STAR:
2377        case OP_MINSTAR:        case OP_MINSTAR:
2378        case OP_POSSTAR:        case OP_POSSTAR:
# Line 2100  for (;;) Line 2382  for (;;)
2382        ADD_ACTIVE(state_offset + dlen + 1, 0);        ADD_ACTIVE(state_offset + dlen + 1, 0);
2383        if (clen > 0)        if (clen > 0)
2384          {          {
2385          unsigned int otherd = NOTACHAR;          pcre_uint32 otherd = NOTACHAR;
2386          if ((ims & PCRE_CASELESS) != 0)          if (caseless)
2387            {            {
2388  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2389            if (utf8 && d >= 128)            if (utf && d >= 128)
2390              {              {
2391  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2392              otherd = UCD_OTHERCASE(d);              otherd = UCD_OTHERCASE(d);
2393  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2394              }              }
2395            else            else
2396  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2397            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2398            }            }
2399          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2400            {            {
# Line 2127  for (;;) Line 2409  for (;;)
2409        break;        break;
2410
2411        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2412          case OP_EXACTI:
2413          case OP_NOTEXACTI:
2414          caseless = TRUE;
2415          codevalue -= OP_STARI - OP_STAR;
2416          /* Fall through */
2417        case OP_EXACT:        case OP_EXACT:
2418        case OP_NOTEXACT:        case OP_NOTEXACT:
2419        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
2420        if (clen > 0)        if (clen > 0)
2421          {          {
2422          unsigned int otherd = NOTACHAR;          pcre_uint32 otherd = NOTACHAR;
2423          if ((ims & PCRE_CASELESS) != 0)          if (caseless)
2424            {            {
2425  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2426            if (utf8 && d >= 128)            if (utf && d >= 128)
2427              {              {
2428  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2429              otherd = UCD_OTHERCASE(d);              otherd = UCD_OTHERCASE(d);
2430  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2431              }              }
2432            else            else
2433  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2434            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2435            }            }
2436          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2437            {            {
2438            if (++count >= GET2(code, 1))            if (++count >= (int)GET2(code, 1))
2439              { ADD_NEW(state_offset + dlen + 3, 0); }              { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
2440            else            else
2442            }            }
# Line 2157  for (;;) Line 2444  for (;;)
2444        break;        break;
2445
2446        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2447          case OP_UPTOI:
2448          case OP_MINUPTOI:
2449          case OP_POSUPTOI:
2450          case OP_NOTUPTOI:
2451          case OP_NOTMINUPTOI:
2452          case OP_NOTPOSUPTOI:
2453          caseless = TRUE;
2454          codevalue -= OP_STARI - OP_STAR;
2455          /* Fall through */
2456        case OP_UPTO:        case OP_UPTO:
2457        case OP_MINUPTO:        case OP_MINUPTO:
2458        case OP_POSUPTO:        case OP_POSUPTO:
2459        case OP_NOTUPTO:        case OP_NOTUPTO:
2460        case OP_NOTMINUPTO:        case OP_NOTMINUPTO:
2461        case OP_NOTPOSUPTO:        case OP_NOTPOSUPTO:
2462        ADD_ACTIVE(state_offset + dlen + 3, 0);        ADD_ACTIVE(state_offset + dlen + 1 + IMM2_SIZE, 0);
2463        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
2464        if (clen > 0)        if (clen > 0)
2465          {          {
2466          unsigned int otherd = NOTACHAR;          pcre_uint32 otherd = NOTACHAR;
2467          if ((ims & PCRE_CASELESS) != 0)          if (caseless)
2468            {            {
2469  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2470            if (utf8 && d >= 128)            if (utf && d >= 128)
2471              {              {
2472  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2473              otherd = UCD_OTHERCASE(d);              otherd = UCD_OTHERCASE(d);
2474  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2475              }              }
2476            else            else
2477  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2478            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2479            }            }
2480          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2481            {            {
# Line 2188  for (;;) Line 2484  for (;;)
2484              active_count--;             /* Remove non-match possibility */              active_count--;             /* Remove non-match possibility */
2485              next_active_state--;              next_active_state--;
2486              }              }
2487            if (++count >= GET2(code, 1))            if (++count >= (int)GET2(code, 1))
2488              { ADD_NEW(state_offset + dlen + 3, 0); }              { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
2489            else            else
2491            }            }
# Line 2206  for (;;) Line 2502  for (;;)
2502          {          {
2503          BOOL isinclass = FALSE;          BOOL isinclass = FALSE;
2504          int next_state_offset;          int next_state_offset;
2505          const uschar *ecode;          const pcre_uchar *ecode;
2506
2507          /* For a simple class, there is always just a 32-byte table, and we          /* For a simple class, there is always just a 32-byte table, and we
2508          can set isinclass from it. */          can set isinclass from it. */
2509
2510          if (codevalue != OP_XCLASS)          if (codevalue != OP_XCLASS)
2511            {            {
2512            ecode = code + 33;            ecode = code + 1 + (32 / sizeof(pcre_uchar));
2513            if (clen > 0)            if (clen > 0)
2514              {              {
2515              isinclass = (c > 255)? (codevalue == OP_NCLASS) :              isinclass = (c > 255)? (codevalue == OP_NCLASS) :
2516                ((code[1 + c/8] & (1 << (c&7))) != 0);                ((((pcre_uint8 *)(code + 1))[c/8] & (1 << (c&7))) != 0);
2517              }              }
2518            }            }
2519
# Line 2228  for (;;) Line 2524  for (;;)
2524          else          else
2525           {           {
2526           ecode = code + GET(code, 1);           ecode = code + GET(code, 1);
2527           if (clen > 0) isinclass = _pcre_xclass(c, code + 1 + LINK_SIZE);           if (clen > 0) isinclass = PRIV(xclass)(c, code + 1 + LINK_SIZE, utf);
2528           }           }
2529
2530          /* At this point, isinclass is set for all kinds of class, and ecode          /* At this point, isinclass is set for all kinds of class, and ecode
2531          points to the byte after the end of the class. If there is a          points to the byte after the end of the class. If there is a
2532          quantifier, this is where it will be. */          quantifier, this is where it will be. */
2533
2534          next_state_offset = ecode - start_code;          next_state_offset = (int)(ecode - start_code);
2535
2536          switch (*ecode)          switch (*ecode)
2537            {            {
# Line 2261  for (;;) Line 2557  for (;;)
2557            case OP_CRRANGE:            case OP_CRRANGE:
2558            case OP_CRMINRANGE:            case OP_CRMINRANGE:
2559            count = current_state->count;  /* Already matched */            count = current_state->count;  /* Already matched */
2560            if (count >= GET2(ecode, 1))            if (count >= (int)GET2(ecode, 1))
2561              { ADD_ACTIVE(next_state_offset + 5, 0); }              { ADD_ACTIVE(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
2562            if (isinclass)            if (isinclass)
2563              {              {
2564              int max = GET2(ecode, 3);              int max = (int)GET2(ecode, 1 + IMM2_SIZE);
2565              if (++count >= max && max != 0)   /* Max 0 => no limit */              if (++count >= max && max != 0)   /* Max 0 => no limit */
2566                { ADD_NEW(next_state_offset + 5, 0); }                { ADD_NEW(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
2567              else              else
2569              }              }
# Line 2298  for (;;) Line 2594  for (;;)
2594          int rc;          int rc;
2595          int local_offsets[2];          int local_offsets[2];
2596          int local_workspace[1000];          int local_workspace[1000];
2597          const uschar *endasscode = code + GET(code, 1);          const pcre_uchar *endasscode = code + GET(code, 1);
2598
2599          while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);          while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
2600
# Line 2306  for (;;) Line 2602  for (;;)
2602            md,                                   /* static match data */            md,                                   /* static match data */
2603            code,                                 /* this subexpression's code */            code,                                 /* this subexpression's code */
2604            ptr,                                  /* where we currently are */            ptr,                                  /* where we currently are */
2605            ptr - start_subject,                  /* start offset */            (int)(ptr - start_subject),           /* start offset */
2606            local_offsets,                        /* offset vector */            local_offsets,                        /* offset vector */
2607            sizeof(local_offsets)/sizeof(int),    /* size of same */            sizeof(local_offsets)/sizeof(int),    /* size of same */
2608            local_workspace,                      /* workspace vector */            local_workspace,                      /* workspace vector */
2609            sizeof(local_workspace)/sizeof(int),  /* size of same */            sizeof(local_workspace)/sizeof(int),  /* size of same */
2610            ims,                                  /* the current ims flags */            rlevel);                              /* function recursion level */
rlevel,                               /* function recursion level */
recursing);                           /* pass on regex recursion */
2611
2612          if (rc == PCRE_ERROR_DFA_UITEM) return rc;          if (rc == PCRE_ERROR_DFA_UITEM) return rc;
2613          if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK))          if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK))
2615          }          }
2616        break;        break;
2617
# Line 2337  for (;;) Line 2631  for (;;)
2632            {            {
2633            rrc = 0;            rrc = 0;
2634            if (pcre_callout != NULL)            if (PUBL(callout) != NULL)
2635              {              {
2636              pcre_callout_block cb;              PUBL(callout_block) cb;
2637              cb.version          = 1;   /* Version 1 of the callout block */              cb.version          = 1;   /* Version 1 of the callout block */
2639              cb.offset_vector    = offsets;              cb.offset_vector    = offsets;
2640    #if defined COMPILE_PCRE8
2641              cb.subject          = (PCRE_SPTR)start_subject;              cb.subject          = (PCRE_SPTR)start_subject;
2642              cb.subject_length   = end_subject - start_subject;  #elif defined COMPILE_PCRE16
2643              cb.start_match      = current_subject - start_subject;              cb.subject          = (PCRE_SPTR16)start_subject;
2644              cb.current_position = ptr - start_subject;  #elif defined COMPILE_PCRE32
2645                cb.subject          = (PCRE_SPTR32)start_subject;
2646    #endif
2647                cb.subject_length   = (int)(end_subject - start_subject);
2648                cb.start_match      = (int)(current_subject - start_subject);
2649                cb.current_position = (int)(ptr - start_subject);
2650              cb.pattern_position = GET(code, LINK_SIZE + 3);              cb.pattern_position = GET(code, LINK_SIZE + 3);
2651              cb.next_item_length = GET(code, 3 + 2*LINK_SIZE);              cb.next_item_length = GET(code, 3 + 2*LINK_SIZE);
2652              cb.capture_top      = 1;              cb.capture_top      = 1;
2653              cb.capture_last     = -1;              cb.capture_last     = -1;
2654              cb.callout_data     = md->callout_data;              cb.callout_data     = md->callout_data;
2655              if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */              cb.mark             = NULL;   /* No (*MARK) support */
2656                if ((rrc = (*PUBL(callout))(&cb)) < 0) return rrc;   /* Abandon */
2657              }              }
2658            if (rrc > 0) break;                      /* Fail this thread */            if (rrc > 0) break;                      /* Fail this thread */
2659            code += _pcre_OP_lengths[OP_CALLOUT];    /* Skip callout data */            code += PRIV(OP_lengths)[OP_CALLOUT];    /* Skip callout data */
2660            }            }
2661
# Line 2376  for (;;) Line 2677  for (;;)
2677
2678          else if (condcode == OP_RREF || condcode == OP_NRREF)          else if (condcode == OP_RREF || condcode == OP_NRREF)
2679            {            {
2680            int value = GET2(code, LINK_SIZE+2);            int value = GET2(code, LINK_SIZE + 2);
2681            if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;            if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
2682            if (recursing > 0)            if (md->recursive != NULL)
2685            }            }
2686
# Line 2388  for (;;) Line 2689  for (;;)
2689          else          else
2690            {            {
2691            int rc;            int rc;
2692            const uschar *asscode = code + LINK_SIZE + 1;            const pcre_uchar *asscode = code + LINK_SIZE + 1;
2693            const uschar *endasscode = asscode + GET(asscode, 1);            const pcre_uchar *endasscode = asscode + GET(asscode, 1);
2694
2695            while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);            while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
2696
# Line 2397  for (;;) Line 2698  for (;;)
2698              md,                                   /* fixed match data */              md,                                   /* fixed match data */
2699              asscode,                              /* this subexpression's code */              asscode,                              /* this subexpression's code */
2700              ptr,                                  /* where we currently are */              ptr,                                  /* where we currently are */
2701              ptr - start_subject,                  /* start offset */              (int)(ptr - start_subject),           /* start offset */
2702              local_offsets,                        /* offset vector */              local_offsets,                        /* offset vector */
2703              sizeof(local_offsets)/sizeof(int),    /* size of same */              sizeof(local_offsets)/sizeof(int),    /* size of same */
2704              local_workspace,                      /* workspace vector */              local_workspace,                      /* workspace vector */
2705              sizeof(local_workspace)/sizeof(int),  /* size of same */              sizeof(local_workspace)/sizeof(int),  /* size of same */
2706              ims,                                  /* the current ims flags */              rlevel);                              /* function recursion level */
rlevel,                               /* function recursion level */
recursing);                           /* pass on regex recursion */
2707
2708            if (rc == PCRE_ERROR_DFA_UITEM) return rc;            if (rc == PCRE_ERROR_DFA_UITEM) return rc;
2709            if ((rc >= 0) ==            if ((rc >= 0) ==
2710                  (condcode == OP_ASSERT || condcode == OP_ASSERTBACK))                  (condcode == OP_ASSERT || condcode == OP_ASSERTBACK))
2712            else            else
2714            }            }
# Line 2419  for (;;) Line 2718  for (;;)
2718        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2719        case OP_RECURSE:        case OP_RECURSE:
2720          {          {
2721            dfa_recursion_info *ri;
2722          int local_offsets[1000];          int local_offsets[1000];
2723          int local_workspace[1000];          int local_workspace[1000];
2724            const pcre_uchar *callpat = start_code + GET(code, 1);
2725            int recno = (callpat == md->start_code)? 0 :
2727          int rc;          int rc;
2728
2729          DPRINTF(("%.*sStarting regex recursion %d\n", rlevel*2-2, SP,          DPRINTF(("%.*sStarting regex recursion\n", rlevel*2-2, SP));
2730            recursing + 1));
2731            /* Check for repeating a recursion without advancing the subject
2732            pointer. This should catch convoluted mutual recursions. (Some simple
2733            cases are caught at compile time.) */
2734
2735            for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
2736              if (recno == ri->group_num && ptr == ri->subject_position)
2737                return PCRE_ERROR_RECURSELOOP;
2738
2739            /* Remember this recursion and where we started it so as to
2740            catch infinite loops. */
2741
2742            new_recursive.group_num = recno;
2743            new_recursive.subject_position = ptr;
2744            new_recursive.prevrec = md->recursive;
2745            md->recursive = &new_recursive;
2746
2747          rc = internal_dfa_exec(          rc = internal_dfa_exec(
2748            md,                                   /* fixed match data */            md,                                   /* fixed match data */
2749            start_code + GET(code, 1),            /* this subexpression's code */            callpat,                              /* this subexpression's code */
2750            ptr,                                  /* where we currently are */            ptr,                                  /* where we currently are */
2751            ptr - start_subject,                  /* start offset */            (int)(ptr - start_subject),           /* start offset */
2752            local_offsets,                        /* offset vector */            local_offsets,                        /* offset vector */
2753            sizeof(local_offsets)/sizeof(int),    /* size of same */            sizeof(local_offsets)/sizeof(int),    /* size of same */
2754            local_workspace,                      /* workspace vector */            local_workspace,                      /* workspace vector */
2755            sizeof(local_workspace)/sizeof(int),  /* size of same */            sizeof(local_workspace)/sizeof(int),  /* size of same */
2756            ims,                                  /* the current ims flags */            rlevel);                              /* function recursion level */
rlevel,                               /* function recursion level */
recursing + 1);                       /* regex recurse level */
2757
2758          DPRINTF(("%.*sReturn from regex recursion %d: rc=%d\n", rlevel*2-2, SP,          md->recursive = new_recursive.prevrec;  /* Done this recursion */
2759            recursing + 1, rc));
2760            DPRINTF(("%.*sReturn from regex recursion: rc=%d\n", rlevel*2-2, SP,
2761              rc));
2762
2763          /* Ran out of internal offsets */          /* Ran out of internal offsets */
2764
# Line 2454  for (;;) Line 2772  for (;;)
2772            {            {
2773            for (rc = rc*2 - 2; rc >= 0; rc -= 2)            for (rc = rc*2 - 2; rc >= 0; rc -= 2)
2774              {              {
const uschar *p = start_subject + local_offsets[rc];
const uschar *pp = start_subject + local_offsets[rc+1];
2775              int charcount = local_offsets[rc+1] - local_offsets[rc];              int charcount = local_offsets[rc+1] - local_offsets[rc];
2776              while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2777                if (utf)
2778                  {
2779                  const pcre_uchar *p = start_subject + local_offsets[rc];
2780                  const pcre_uchar *pp = start_subject + local_offsets[rc+1];
2781                  while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2782                  }
2783    #endif
2784              if (charcount > 0)              if (charcount > 0)
2785                {                {
# Line 2473  for (;;) Line 2796  for (;;)
2796        break;        break;
2797
2798        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2799          case OP_BRAPOS:
2800          case OP_SBRAPOS:
2801          case OP_CBRAPOS:
2802          case OP_SCBRAPOS:
2803          case OP_BRAPOSZERO:
2804            {
2805            int charcount, matched_count;
2806            const pcre_uchar *local_ptr = ptr;
2807            BOOL allow_zero;
2808
2809            if (codevalue == OP_BRAPOSZERO)
2810              {
2811              allow_zero = TRUE;
2812              codevalue = *(++code);  /* Codevalue will be one of above BRAs */
2813              }
2814            else allow_zero = FALSE;
2815
2816            /* Loop to match the subpattern as many times as possible as if it were
2817            a complete pattern. */
2818
2819            for (matched_count = 0;; matched_count++)
2820              {
2821              int local_offsets[2];
2822              int local_workspace[1000];
2823
2824              int rc = internal_dfa_exec(
2825                md,                                   /* fixed match data */
2826                code,                                 /* this subexpression's code */
2827                local_ptr,                            /* where we currently are */
2828                (int)(ptr - start_subject),           /* start offset */
2829                local_offsets,                        /* offset vector */
2830                sizeof(local_offsets)/sizeof(int),    /* size of same */
2831                local_workspace,                      /* workspace vector */
2832                sizeof(local_workspace)/sizeof(int),  /* size of same */
2833                rlevel);                              /* function recursion level */
2834
2835              /* Failed to match */
2836
2837              if (rc < 0)
2838                {
2839                if (rc != PCRE_ERROR_NOMATCH) return rc;
2840                break;
2841                }
2842
2843              /* Matched: break the loop if zero characters matched. */
2844
2845              charcount = local_offsets[1] - local_offsets[0];
2846              if (charcount == 0) break;
2847              local_ptr += charcount;    /* Advance temporary position ptr */
2848              }
2849
2850            /* At this point we have matched the subpattern matched_count
2851            times, and local_ptr is pointing to the character after the end of the
2852            last match. */
2853
2854            if (matched_count > 0 || allow_zero)
2855              {
2856              const pcre_uchar *end_subpattern = code;
2857              int next_state_offset;
2858
2859              do { end_subpattern += GET(end_subpattern, 1); }
2860                while (*end_subpattern == OP_ALT);
2861              next_state_offset =
2862                (int)(end_subpattern - start_code + LINK_SIZE + 1);
2863
2864              /* Optimization: if there are no more active states, and there
2865              are no new states yet set up, then skip over the subject string
2866              right here, to save looping. Otherwise, set up the new state to swing
2867              into action when the end of the matched substring is reached. */
2868
2869              if (i + 1 >= active_count && new_count == 0)
2870                {
2871                ptr = local_ptr;
2872                clen = 0;
2874                }
2875              else
2876                {
2877                const pcre_uchar *p = ptr;
2878                const pcre_uchar *pp = local_ptr;
2879                charcount = (int)(pp - p);
2880    #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2881                if (utf) while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2882    #endif
2883                ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
2884                }
2885              }
2886            }
2887          break;
2888
2889          /*-----------------------------------------------------------------*/
2890        case OP_ONCE:        case OP_ONCE:
2891          case OP_ONCE_NC:
2892          {          {
2893          int local_offsets[2];          int local_offsets[2];
2894          int local_workspace[1000];          int local_workspace[1000];
# Line 2482  for (;;) Line 2897  for (;;)
2897            md,                                   /* fixed match data */            md,                                   /* fixed match data */
2898            code,                                 /* this subexpression's code */            code,                                 /* this subexpression's code */
2899            ptr,                                  /* where we currently are */            ptr,                                  /* where we currently are */
2900            ptr - start_subject,                  /* start offset */            (int)(ptr - start_subject),           /* start offset */
2901            local_offsets,                        /* offset vector */            local_offsets,                        /* offset vector */
2902            sizeof(local_offsets)/sizeof(int),    /* size of same */            sizeof(local_offsets)/sizeof(int),    /* size of same */
2903            local_workspace,                      /* workspace vector */            local_workspace,                      /* workspace vector */
2904            sizeof(local_workspace)/sizeof(int),  /* size of same */            sizeof(local_workspace)/sizeof(int),  /* size of same */
2905            ims,                                  /* the current ims flags */            rlevel);                              /* function recursion level */
rlevel,                               /* function recursion level */
recursing);                           /* pass on regex recursion */
2906
2907          if (rc >= 0)          if (rc >= 0)
2908            {            {
2909            const uschar *end_subpattern = code;            const pcre_uchar *end_subpattern = code;
2910            int charcount = local_offsets[1] - local_offsets[0];            int charcount = local_offsets[1] - local_offsets[0];
2911            int next_state_offset, repeat_state_offset;            int next_state_offset, repeat_state_offset;
2912
2913            do { end_subpattern += GET(end_subpattern, 1); }            do { end_subpattern += GET(end_subpattern, 1); }
2914              while (*end_subpattern == OP_ALT);              while (*end_subpattern == OP_ALT);
2915            next_state_offset = end_subpattern - start_code + LINK_SIZE + 1;            next_state_offset =
2916                (int)(end_subpattern - start_code + LINK_SIZE + 1);
2917
2918            /* If the end of this subpattern is KETRMAX or KETRMIN, we must            /* If the end of this subpattern is KETRMAX or KETRMIN, we must
2919            arrange for the repeat state also to be added to the relevant list.            arrange for the repeat state also to be added to the relevant list.
# Line 2507  for (;;) Line 2921  for (;;)
2921
2922            repeat_state_offset = (*end_subpattern == OP_KETRMAX ||            repeat_state_offset = (*end_subpattern == OP_KETRMAX ||
2923                                   *end_subpattern == OP_KETRMIN)?                                   *end_subpattern == OP_KETRMIN)?
2924              end_subpattern - start_code - GET(end_subpattern, 1) : -1;              (int)(end_subpattern - start_code - GET(end_subpattern, 1)) : -1;
2925
2926            /* If we have matched an empty string, add the next state at the            /* If we have matched an empty string, add the next state at the
2927            current character pointer. This is important so that the duplicate            current character pointer. This is important so that the duplicate
# Line 2522  for (;;) Line 2936  for (;;)
2936            /* Optimization: if there are no more active states, and there            /* Optimization: if there are no more active states, and there
2937            are no new states yet set up, then skip over the subject string            are no new states yet set up, then skip over the subject string
2938            right here, to save looping. Otherwise, set up the new state to swing            right here, to save looping. Otherwise, set up the new state to swing
2939            into action when the end of the substring is reached. */            into action when the end of the matched substring is reached. */
2940
2941            else if (i + 1 >= active_count && new_count == 0)            else if (i + 1 >= active_count && new_count == 0)
2942              {              {
# Line 2545  for (;;) Line 2959  for (;;)
2959              }              }
2960            else            else
2961              {              {
2962              const uschar *p = start_subject + local_offsets[0];  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2963              const uschar *pp = start_subject + local_offsets[1];              if (utf)
2964              while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;                {
2965                  const pcre_uchar *p = start_subject + local_offsets[0];
2966                  const pcre_uchar *pp = start_subject + local_offsets[1];
2967                  while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2968                  }
2969    #endif
2971              if (repeat_state_offset >= 0)              if (repeat_state_offset >= 0)
2972                { ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); }                { ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); }
2973              }              }

2974            }            }
2975          else if (rc != PCRE_ERROR_NOMATCH) return rc;          else if (rc != PCRE_ERROR_NOMATCH) return rc;
2976          }          }
# Line 2564  for (;;) Line 2982  for (;;)
2982
2983        case OP_CALLOUT:        case OP_CALLOUT:
2984        rrc = 0;        rrc = 0;
2985        if (pcre_callout != NULL)        if (PUBL(callout) != NULL)
2986          {          {
2987          pcre_callout_block cb;          PUBL(callout_block) cb;
2988          cb.version          = 1;   /* Version 1 of the callout block */          cb.version          = 1;   /* Version 1 of the callout block */
2989          cb.callout_number   = code[1];          cb.callout_number   = code[1];
2990          cb.offset_vector    = offsets;          cb.offset_vector    = offsets;
2991    #if defined COMPILE_PCRE8
2992          cb.subject          = (PCRE_SPTR)start_subject;          cb.subject          = (PCRE_SPTR)start_subject;
2993          cb.subject_length   = end_subject - start_subject;  #elif defined COMPILE_PCRE16
2994          cb.start_match      = current_subject - start_subject;          cb.subject          = (PCRE_SPTR16)start_subject;
2995          cb.current_position = ptr - start_subject;  #elif defined COMPILE_PCRE32
2996            cb.subject          = (PCRE_SPTR32)start_subject;
2997    #endif
2998            cb.subject_length   = (int)(end_subject - start_subject);
2999            cb.start_match      = (int)(current_subject - start_subject);
3000            cb.current_position = (int)(ptr - start_subject);
3001          cb.pattern_position = GET(code, 2);          cb.pattern_position = GET(code, 2);
3002          cb.next_item_length = GET(code, 2 + LINK_SIZE);          cb.next_item_length = GET(code, 2 + LINK_SIZE);
3003          cb.capture_top      = 1;          cb.capture_top      = 1;
3004          cb.capture_last     = -1;          cb.capture_last     = -1;
3005          cb.callout_data     = md->callout_data;          cb.callout_data     = md->callout_data;
3006          if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */          cb.mark             = NULL;   /* No (*MARK) support */
3007            if ((rrc = (*PUBL(callout))(&cb)) < 0) return rrc;   /* Abandon */
3008          }          }
3009        if (rrc == 0)        if (rrc == 0)
3010          { ADD_ACTIVE(state_offset + _pcre_OP_lengths[OP_CALLOUT], 0); }          { ADD_ACTIVE(state_offset + PRIV(OP_lengths)[OP_CALLOUT], 0); }
3011        break;        break;
3012
3013
# Line 2611  for (;;) Line 3036  for (;;)
3036    if (new_count <= 0)    if (new_count <= 0)
3037      {      {
3038      if (rlevel == 1 &&                               /* Top level, and */      if (rlevel == 1 &&                               /* Top level, and */
3039          could_continue &&                            /* Some could go on */          could_continue &&                            /* Some could go on, and */
3040          forced_fail != workspace[1] &&               /* Not all forced fail & */          forced_fail != workspace[1] &&               /* Not all forced fail & */
3041          (                                            /* either... */          (                                            /* either... */
3042          (md->moptions & PCRE_PARTIAL_HARD) != 0      /* Hard partial */          (md->moptions & PCRE_PARTIAL_HARD) != 0      /* Hard partial */
# Line 2619  for (;;) Line 3044  for (;;)
3044          ((md->moptions & PCRE_PARTIAL_SOFT) != 0 &&  /* Soft partial and */          ((md->moptions & PCRE_PARTIAL_SOFT) != 0 &&  /* Soft partial and */
3045           match_count < 0)                            /* no matches */           match_count < 0)                            /* no matches */
3046          ) &&                                         /* And... */          ) &&                                         /* And... */
3047          ptr >= end_subject &&                     /* Reached end of subject */          (
3048          ptr > current_subject)                    /* Matched non-empty string */          partial_newline ||                           /* Either partial NL */
3049        {            (                                          /* or ... */
3050        if (offsetcount >= 2)            ptr >= end_subject &&                /* End of subject and */
3051          {            ptr > md->start_used_ptr)            /* Inspected non-empty string */
3052          offsets[0] = md->start_used_ptr - start_subject;            )
3053          offsets[1] = end_subject - start_subject;          )
}
3054        match_count = PCRE_ERROR_PARTIAL;        match_count = PCRE_ERROR_PARTIAL;
}

3055      DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"      DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
3056        "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count,        "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count,
3057        rlevel*2-2, SP));        rlevel*2-2, SP));
# Line 2679  Returns:          > 0 => number of match Line 3101  Returns:          > 0 => number of match
3101                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
3102  */  */
3103
3104    #if defined COMPILE_PCRE8
3105  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
3106  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
3107    const char *subject, int length, int start_offset, int options, int *offsets,    const char *subject, int length, int start_offset, int options, int *offsets,
3108    int offsetcount, int *workspace, int wscount)    int offsetcount, int *workspace, int wscount)
3109    #elif defined COMPILE_PCRE16
3110    PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
3111    pcre16_dfa_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
3112      PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
3113      int offsetcount, int *workspace, int wscount)
3114    #elif defined COMPILE_PCRE32
3115    PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
3116    pcre32_dfa_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
3117      PCRE_SPTR32 subject, int length, int start_offset, int options, int *offsets,
3118      int offsetcount, int *workspace, int wscount)
3119    #endif
3120  {  {
3121  real_pcre *re = (real_pcre *)argument_re;  REAL_PCRE *re = (REAL_PCRE *)argument_re;
3122  dfa_match_data match_block;  dfa_match_data match_block;
3123  dfa_match_data *md = &match_block;  dfa_match_data *md = &match_block;
3124  BOOL utf8, anchored, startline, firstline;  BOOL utf, anchored, startline, firstline;
3125  const uschar *current_subject, *end_subject, *lcc;  const pcre_uchar *current_subject, *end_subject;

pcre_study_data internal_study;
3126  const pcre_study_data *study = NULL;  const pcre_study_data *study = NULL;
real_pcre internal_re;
3127
3128  const uschar *req_byte_ptr;  const pcre_uchar *req_char_ptr;
3129  const uschar *start_bits = NULL;  const pcre_uint8 *start_bits = NULL;
3130  BOOL first_byte_caseless = FALSE;  BOOL has_first_char = FALSE;
3131  BOOL req_byte_caseless = FALSE;  BOOL has_req_char = FALSE;
3132  int first_byte = -1;  pcre_uchar first_char = 0;
3133  int req_byte = -1;  pcre_uchar first_char2 = 0;
3134  int req_byte2 = -1;  pcre_uchar req_char = 0;
3135    pcre_uchar req_char2 = 0;
3136  int newline;  int newline;
3137
3138  /* Plausibility checks */  /* Plausibility checks */
# Line 2710  if (re == NULL || subject == NULL || wor Line 3142  if (re == NULL || subject == NULL || wor
3142     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
3143  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
3144  if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;  if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;
3145    if (length < 0) return PCRE_ERROR_BADLENGTH;
3146    if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
3147
3148    /* Check that the first field in the block is the magic number. If it is not,
3149    return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
3150    REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
3151    means that the pattern is likely compiled with different endianness. */
3152
3153    if (re->magic_number != MAGIC_NUMBER)
3154      return re->magic_number == REVERSED_MAGIC_NUMBER?
3156    if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
3157
3158  /* We need to find the pointer to any study data before we test for byte  /* If restarting after a partial match, do some sanity checks on the contents
3159  flipping, so we scan the extra_data block first. This may set two fields in the  of the workspace. */
3160  match block, so we must initialize them beforehand. However, the other fields
3161  in the match block must not be set until after the byte flipping. */  if ((options & PCRE_DFA_RESTART) != 0)
3162      {
3163      if ((workspace[0] & (-2)) != 0 || workspace[1] < 1 ||
3164        workspace[1] > (wscount - 2)/INTS_PER_STATEBLOCK)
3166      }
3167
3168    /* Set up study, callout, and table data */
3169
3170  md->tables = re->tables;  md->tables = re->tables;
3171  md->callout_data = NULL;  md->callout_data = NULL;
# Line 2733  if (extra_data != NULL) Line 3184  if (extra_data != NULL)
3184      md->tables = extra_data->tables;      md->tables = extra_data->tables;
3185    }    }
3186
/* Check that the first field in the block is the magic number. If it is not,
test for a regex that was compiled on a host of opposite endianness. If this is
the case, flipped values are put in internal_re and internal_study if there was
study data too. */

if (re->magic_number != MAGIC_NUMBER)
{
re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
if (re == NULL) return PCRE_ERROR_BADMAGIC;
if (study != NULL) study = &internal_study;
}

3187  /* Set some local values */  /* Set some local values */
3188
3189  current_subject = (const unsigned char *)subject + start_offset;  current_subject = (const pcre_uchar *)subject + start_offset;
3190  end_subject = (const unsigned char *)subject + length;  end_subject = (const pcre_uchar *)subject + length;
3191  req_byte_ptr = current_subject - 1;  req_char_ptr = current_subject - 1;
3192
3193  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3194  utf8 = (re->options & PCRE_UTF8) != 0;  /* PCRE_UTF(16|32) have the same value as PCRE_UTF8. */
3195    utf = (re->options & PCRE_UTF8) != 0;
3196  #else  #else
3197  utf8 = FALSE;  utf = FALSE;
3198  #endif  #endif
3199
3200  anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||  anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||
# Line 2762  anchored = (options & (PCRE_ANCHORED|PCR Line 3202  anchored = (options & (PCRE_ANCHORED|PCR
3202
3203  /* The remaining fixed data for passing around. */  /* The remaining fixed data for passing around. */
3204
3205  md->start_code = (const uschar *)argument_re +  md->start_code = (const pcre_uchar *)argument_re +
3206      re->name_table_offset + re->name_count * re->name_entry_size;      re->name_table_offset + re->name_count * re->name_entry_size;
3207  md->start_subject = (const unsigned char *)subject;  md->start_subject = (const pcre_uchar *)subject;
3208  md->end_subject = end_subject;  md->end_subject = end_subject;
3209  md->start_offset = start_offset;  md->start_offset = start_offset;
3210  md->moptions = options;  md->moptions = options;
# Line 2825  else Line 3265  else
3265  /* Check a UTF-8 string if required. Unfortunately there's no way of passing  /* Check a UTF-8 string if required. Unfortunately there's no way of passing
3266  back the character offset. */  back the character offset. */
3267
3268  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3269  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
3270    {    {
3271    if (_pcre_valid_utf8((uschar *)subject, length) >= 0)    int erroroffset;
3272      return PCRE_ERROR_BADUTF8;    int errorcode = PRIV(valid_utf)((pcre_uchar *)subject, length, &erroroffset);
3273    if (start_offset > 0 && start_offset < length)    if (errorcode != 0)
3274      {      {
3275      int tb = ((uschar *)subject)[start_offset];      if (offsetcount >= 2)
if (tb > 127)
3276        {        {
3277        tb &= 0xc0;        offsets[0] = erroroffset;
3278        if (tb != 0 && tb != 0xc0) return PCRE_ERROR_BADUTF8_OFFSET;        offsets[1] = errorcode;
3279        }        }
3280    #if defined COMPILE_PCRE8
3281        return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != 0) ?
3283    #elif defined COMPILE_PCRE16
3284        return (errorcode <= PCRE_UTF16_ERR1 && (options & PCRE_PARTIAL_HARD) != 0) ?
3286    #elif defined COMPILE_PCRE32
3288    #endif
3289      }      }
3290    #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
3291      if (start_offset > 0 && start_offset < length &&
3292            NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
3294    #endif
3295    }    }
3296  #endif  #endif
3297
# Line 2846  if (utf8 && (options & PCRE_NO_UTF8_CHEC Line 3299  if (utf8 && (options & PCRE_NO_UTF8_CHEC
3299  is a feature that makes it possible to save compiled regex and re-use them  is a feature that makes it possible to save compiled regex and re-use them
3300  in other programs later. */  in other programs later. */
3301
3302  if (md->tables == NULL) md->tables = _pcre_default_tables;  if (md->tables == NULL) md->tables = PRIV(default_tables);
3303
3304  /* The lower casing table and the "must be at the start of a line" flag are  /* The "must be at the start of a line" flags are used in a loop when finding
3305  used in a loop when finding where to start. */  where to start. */
3306
lcc = md->tables + lcc_offset;
3307  startline = (re->flags & PCRE_STARTLINE) != 0;  startline = (re->flags & PCRE_STARTLINE) != 0;
3308  firstline = (re->options & PCRE_FIRSTLINE) != 0;  firstline = (re->options & PCRE_FIRSTLINE) != 0;
3309
# Line 2865  if (!anchored) Line 3317  if (!anchored)
3317    {    {
3318    if ((re->flags & PCRE_FIRSTSET) != 0)    if ((re->flags & PCRE_FIRSTSET) != 0)
3319      {      {
3320      first_byte = re->first_byte & 255;      has_first_char = TRUE;
3321      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)      first_char = first_char2 = (pcre_uchar)(re->first_char);
3322        first_byte = lcc[first_byte];      if ((re->flags & PCRE_FCH_CASELESS) != 0)
3323          {
3324          first_char2 = TABLE_GET(first_char, md->tables + fcc_offset, first_char);
3325    #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3326          if (utf && first_char > 127)
3327            first_char2 = UCD_OTHERCASE(first_char);
3328    #endif
3329          }
3330      }      }
3331    else    else
3332      {      {
# Line 2882  character" set. */ Line 3341  character" set. */
3341
3342  if ((re->flags & PCRE_REQCHSET) != 0)  if ((re->flags & PCRE_REQCHSET) != 0)
3343    {    {
3344    req_byte = re->req_byte & 255;    has_req_char = TRUE;
3345    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;    req_char = req_char2 = (pcre_uchar)(re->req_char);
3346    req_byte2 = (md->tables + fcc_offset)[req_byte];  /* case flipped */    if ((re->flags & PCRE_RCH_CASELESS) != 0)
3347        {
3348        req_char2 = TABLE_GET(req_char, md->tables + fcc_offset, req_char);
3349    #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3350        if (utf && req_char > 127)
3351          req_char2 = UCD_OTHERCASE(req_char);
3352    #endif
3353        }
3354    }    }
3355
3356  /* Call the main matching function, looping for a non-anchored regex after a  /* Call the main matching function, looping for a non-anchored regex after a
# Line 2897  for (;;) Line 3363  for (;;)
3363
3364    if ((options & PCRE_DFA_RESTART) == 0)    if ((options & PCRE_DFA_RESTART) == 0)
3365      {      {
3366      const uschar *save_end_subject = end_subject;      const pcre_uchar *save_end_subject = end_subject;
3367
3368      /* If firstline is TRUE, the start of the match is constrained to the first      /* If firstline is TRUE, the start of the match is constrained to the first
3369      line of a multiline string. Implement this by temporarily adjusting      line of a multiline string. Implement this by temporarily adjusting
# Line 2906  for (;;) Line 3372  for (;;)
3372
3373      if (firstline)      if (firstline)
3374        {        {
3375        USPTR t = current_subject;        PCRE_PUCHAR t = current_subject;
3376  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3377        if (utf8)        if (utf)
3378          {          {
3379          while (t < md->end_subject && !IS_NEWLINE(t))          while (t < md->end_subject && !IS_NEWLINE(t))
3380            {            {
3381            t++;            t++;
3382            while (t < end_subject && (*t & 0xc0) == 0x80) t++;            ACROSSCHAR(t < end_subject, *t, t++);
3383            }            }
3384          }          }
3385        else        else
# Line 2924  for (;;) Line 3390  for (;;)
3390
3391      /* There are some optimizations that avoid running the match if a known      /* There are some optimizations that avoid running the match if a known
3392      starting point is not found. However, there is an option that disables      starting point is not found. However, there is an option that disables
3393      these, for testing and for ensuring that all callouts do actually occur. */      these, for testing and for ensuring that all callouts do actually occur.
3394        The option can be set in the regex by (*NO_START_OPT) or passed in
3395        match-time options. */
3396
3397      if ((options & PCRE_NO_START_OPTIMIZE) == 0)      if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
3398        {        {
3399        /* Advance to a known first byte. */        /* Advance to a known first char. */
3400
3401        if (first_byte >= 0)        if (has_first_char)
3402          {          {
3403          if (first_byte_caseless)          if (first_char != first_char2)
3404              {
3405              pcre_uchar csc;
3406            while (current_subject < end_subject &&            while (current_subject < end_subject &&
3407                   lcc[*current_subject] != first_byte)                   (csc = RAWUCHARTEST(current_subject)) != first_char && csc != first_char2)
3408              current_subject++;              current_subject++;
3409              }
3410          else          else
3411            while (current_subject < end_subject &&            while (current_subject < end_subject &&
3412                   *current_subject != first_byte)                   RAWUCHARTEST(current_subject) != first_char)
3413              current_subject++;              current_subject++;
3414          }          }
3415
# Line 2948  for (;;) Line 3419  for (;;)
3419          {          {
3420          if (current_subject > md->start_subject + start_offset)          if (current_subject > md->start_subject + start_offset)
3421            {            {
3422  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3423            if (utf8)            if (utf)
3424              {              {
3425              while (current_subject < end_subject &&              while (current_subject < end_subject &&
3426                     !WAS_NEWLINE(current_subject))                     !WAS_NEWLINE(current_subject))
3427                {                {
3428                current_subject++;                current_subject++;
3429                while(current_subject < end_subject &&                ACROSSCHAR(current_subject < end_subject, *current_subject,
3430                      (*current_subject & 0xc0) == 0x80)                  current_subject++);
current_subject++;
3431                }                }
3432              }              }
3433            else            else
# Line 2969  for (;;) Line 3439  for (;;)
3439            ANYCRLF, and we are now at a LF, advance the match position by one            ANYCRLF, and we are now at a LF, advance the match position by one
3440            more character. */            more character. */
3441
3442            if (current_subject[-1] == CHAR_CR &&            if (RAWUCHARTEST(current_subject - 1) == CHAR_CR &&
3443                 (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&                 (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
3444                 current_subject < end_subject &&                 current_subject < end_subject &&
3445                 *current_subject == CHAR_NL)                 RAWUCHARTEST(current_subject) == CHAR_NL)
3446              current_subject++;              current_subject++;
3447            }            }
3448          }          }
# Line 2983  for (;;) Line 3453  for (;;)
3453          {          {
3454          while (current_subject < end_subject)          while (current_subject < end_subject)
3455            {            {
3456            register unsigned int c = *current_subject;            register pcre_uint32 c = RAWUCHARTEST(current_subject);
3457            if ((start_bits[c/8] & (1 << (c&7))) == 0) current_subject++;  #ifndef COMPILE_PCRE8
3458              else break;            if (c > 255) c = 255;
3459    #endif
3460              if ((start_bits[c/8] & (1 << (c&7))) == 0)
3461                {
3462                current_subject++;
3463    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3464                /* In non 8-bit mode, the iteration will stop for
3465                characters > 255 at the beginning or not stop at all. */
3466                if (utf)
3467                  ACROSSCHAR(current_subject < end_subject, *current_subject,
3468                    current_subject++);
3469    #endif
3470                }
3471              else break;
3472            }            }
3473          }          }
3474        }        }
# Line 2998  for (;;) Line 3481  for (;;)
3481      disabling is explicitly requested (and of course, by the test above, this      disabling is explicitly requested (and of course, by the test above, this
3482      code is not obeyed when restarting after a partial match). */      code is not obeyed when restarting after a partial match). */
3483
3484      if ((options & PCRE_NO_START_OPTIMIZE) == 0 &&      if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0 &&
3485          (options & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) == 0)          (options & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) == 0)
3486        {        {
3487        /* If the pattern was studied, a minimum subject length may be set. This        /* If the pattern was studied, a minimum subject length may be set. This
# Line 3010  for (;;) Line 3493  for (;;)
3493            (pcre_uint32)(end_subject - current_subject) < study->minlength)            (pcre_uint32)(end_subject - current_subject) < study->minlength)
3494          return PCRE_ERROR_NOMATCH;          return PCRE_ERROR_NOMATCH;
3495
3496        /* If req_byte is set, we know that that character must appear in the        /* If req_char is set, we know that that character must appear in the
3497        subject for the match to succeed. If the first character is set, req_byte        subject for the match to succeed. If the first character is set, req_char
3498        must be later in the subject; otherwise the test starts at the match        must be later in the subject; otherwise the test starts at the match
3499        point. This optimization can save a huge amount of work in patterns with        point. This optimization can save a huge amount of work in patterns with
3500        nested unlimited repeats that aren't going to match. Writing separate        nested unlimited repeats that aren't going to match. Writing separate
# Line 3023  for (;;) Line 3506  for (;;)
3506        patterns. This showed up when somebody was matching /^C/ on a 32-megabyte        patterns. This showed up when somebody was matching /^C/ on a 32-megabyte
3507        string... so we don't do this when the string is sufficiently long. */        string... so we don't do this when the string is sufficiently long. */
3508
3509        if (req_byte >= 0 && end_subject - current_subject < REQ_BYTE_MAX)        if (has_req_char && end_subject - current_subject < REQ_BYTE_MAX)
3510          {          {
3511          register const uschar *p = current_subject + ((first_byte >= 0)? 1 : 0);          register PCRE_PUCHAR p = current_subject + (has_first_char? 1:0);
3512
3513          /* We don't need to repeat the search if we haven't yet reached the          /* We don't need to repeat the search if we haven't yet reached the
3514          place we found it at last time. */          place we found it at last time. */
3515
3516          if (p > req_byte_ptr)          if (p > req_char_ptr)
3517            {            {
3518            if (req_byte_caseless)            if (req_char != req_char2)
3519              {              {
3520              while (p < end_subject)              while (p < end_subject)
3521                {                {
3522                register int pp = *p++;                register pcre_uint32 pp = RAWUCHARINCTEST(p);
3523                if (pp == req_byte || pp == req_byte2) { p--; break; }                if (pp == req_char || pp == req_char2) { p--; break; }
3524                }                }
3525              }              }
3526            else            else
3527              {              {
3528              while (p < end_subject)              while (p < end_subject)
3529                {                {
3530                if (*p++ == req_byte) { p--; break; }                if (RAWUCHARINCTEST(p) == req_char) { p--; break; }
3531                }                }
3532              }              }
3533
# Line 3057  for (;;) Line 3540  for (;;)
3540            found it, so that we don't search again next time round the loop if            found it, so that we don't search again next time round the loop if
3541            the start hasn't passed this character yet. */            the start hasn't passed this character yet. */
3542
3543            req_byte_ptr = p;            req_char_ptr = p;
3544            }            }
3545          }          }
3546        }        }
# Line 3066  for (;;) Line 3549  for (;;)
3549    /* OK, now we can do the business */    /* OK, now we can do the business */
3550
3551    md->start_used_ptr = current_subject;    md->start_used_ptr = current_subject;
3552      md->recursive = NULL;
3553
3554    rc = internal_dfa_exec(    rc = internal_dfa_exec(
3555      md,                                /* fixed match data */      md,                                /* fixed match data */
# Line 3076  for (;;) Line 3560  for (;;)
3560      offsetcount,                       /* size of same */      offsetcount,                       /* size of same */
3561      workspace,                         /* workspace vector */      workspace,                         /* workspace vector */
3562      wscount,                           /* size of same */      wscount,                           /* size of same */
3563      re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL), /* ims flags */      0);                                /* function recurse level */
0,                                 /* function recurse level */
0);                                /* regex recurse level */
3564
3565    /* Anything other than "no match" means we are done, always; otherwise, carry    /* Anything other than "no match" means we are done, always; otherwise, carry
3566    on only if not anchored. */    on only if not anchored. */
3567
3568    if (rc != PCRE_ERROR_NOMATCH || anchored) return rc;    if (rc != PCRE_ERROR_NOMATCH || anchored)
3569        {
3570        if (rc == PCRE_ERROR_PARTIAL && offsetcount >= 2)
3571          {
3572          offsets[0] = (int)(md->start_used_ptr - (PCRE_PUCHAR)subject);
3573          offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
3574          if (offsetcount > 2)
3575            offsets[2] = (int)(current_subject - (PCRE_PUCHAR)subject);
3576          }
3577        return rc;
3578        }
3579
3580    /* Advance to the next subject character unless we are at the end of a line    /* Advance to the next subject character unless we are at the end of a line
3581    and firstline is set. */    and firstline is set. */
3582
3583    if (firstline && IS_NEWLINE(current_subject)) break;    if (firstline && IS_NEWLINE(current_subject)) break;
3584    current_subject++;    current_subject++;
3585    if (utf8)  #ifdef SUPPORT_UTF
3586      if (utf)
3587      {      {
3588      while (current_subject < end_subject && (*current_subject & 0xc0) == 0x80)      ACROSSCHAR(current_subject < end_subject, *current_subject,
3589        current_subject++;        current_subject++);
3590      }      }
3591    #endif
3592    if (current_subject > end_subject) break;    if (current_subject > end_subject) break;
3593
3594    /* If we have just passed a CR and we are now at a LF, and the pattern does    /* If we have just passed a CR and we are now at a LF, and the pattern does
3595    not contain any explicit matches for \r or \n, and the newline option is CRLF    not contain any explicit matches for \r or \n, and the newline option is CRLF
3596    or ANY or ANYCRLF, advance the match position by one more character. */    or ANY or ANYCRLF, advance the match position by one more character. */
3597
3598    if (current_subject[-1] == CHAR_CR &&    if (RAWUCHARTEST(current_subject - 1) == CHAR_CR &&
3599        current_subject < end_subject &&        current_subject < end_subject &&
3600        *current_subject == CHAR_NL &&        RAWUCHARTEST(current_subject) == CHAR_NL &&
3601        (re->flags & PCRE_HASCRORLF) == 0 &&        (re->flags & PCRE_HASCRORLF) == 0 &&
3602          (md->nltype == NLTYPE_ANY ||          (md->nltype == NLTYPE_ANY ||
3603           md->nltype == NLTYPE_ANYCRLF ||           md->nltype == NLTYPE_ANYCRLF ||

Legend:
 Removed from v.510 changed lines Added in v.1364