# Diff of /code/trunk/pcre_dfa_exec.c

revision 518 by ph10, Tue May 18 15:47:01 2010 UTC revision 922 by ph10, Mon Feb 20 18:44:42 2012 UTC
# Line 7  and semantics are as close as possible t Line 7  and semantics are as close as possible t
7  below for why this module is different).  below for why this module is different).
8
9                         Written by Philip Hazel                         Written by Philip Hazel
10             Copyright (c) 1997-2010 University of Cambridge             Copyright (c) 1997-2012 University of Cambridge
11
12  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
13  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 113  small value. Non-zero values in the tabl Line 113  small value. Non-zero values in the tabl
113  the character is to be found. ***NOTE*** If the start of this table is  the character is to be found. ***NOTE*** If the start of this table is
114  modified, the three tables that follow must also be modified. */  modified, the three tables that follow must also be modified. */
115
116  static const uschar coptable[] = {  static const pcre_uint8 coptable[] = {
117    0,                             /* End                                    */    0,                             /* End                                    */
118    0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */    0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */
119    0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */    0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */
# Line 121  static const uschar coptable[] = { Line 121  static const uschar coptable[] = {
121    0, 0,                          /* \P, \p                                 */    0, 0,                          /* \P, \p                                 */
122    0, 0, 0, 0, 0,                 /* \R, \H, \h, \V, \v                     */    0, 0, 0, 0, 0,                 /* \R, \H, \h, \V, \v                     */
123    0,                             /* \X                                     */    0,                             /* \X                                     */
124    0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, \$                      */    0, 0, 0, 0, 0, 0,              /* \Z, \z, ^, ^M, \$, \$M                   */
125    1,                             /* Char                                   */    1,                             /* Char                                   */
126    1,                             /* Charnc                                 */    1,                             /* Chari                                  */
127    1,                             /* not                                    */    1,                             /* not                                    */
128      1,                             /* noti                                   */
129    /* Positive single-char repeats                                          */    /* Positive single-char repeats                                          */
130    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
131    3, 3, 3,                       /* upto, minupto, exact                   */    1+IMM2_SIZE, 1+IMM2_SIZE,      /* upto, minupto                          */
132    1, 1, 1, 3,                    /* *+, ++, ?+, upto+                      */    1+IMM2_SIZE,                   /* exact                                  */
133      1, 1, 1, 1+IMM2_SIZE,          /* *+, ++, ?+, upto+                      */
134      1, 1, 1, 1, 1, 1,              /* *I, *?I, +I, +?I, ?I, ??I              */
135      1+IMM2_SIZE, 1+IMM2_SIZE,      /* upto I, minupto I                      */
136      1+IMM2_SIZE,                   /* exact I                                */
137      1, 1, 1, 1+IMM2_SIZE,          /* *+I, ++I, ?+I, upto+I                  */
138    /* Negative single-char repeats - only for chars < 256                   */    /* Negative single-char repeats - only for chars < 256                   */
139    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */
140    3, 3, 3,                       /* NOT upto, minupto, exact               */    1+IMM2_SIZE, 1+IMM2_SIZE,      /* NOT upto, minupto                      */
141    1, 1, 1, 3,                    /* NOT *+, ++, ?+, updo+                  */    1+IMM2_SIZE,                   /* NOT exact                              */
142      1, 1, 1, 1+IMM2_SIZE,          /* NOT *+, ++, ?+, upto+                  */
143      1, 1, 1, 1, 1, 1,              /* NOT *I, *?I, +I, +?I, ?I, ??I          */
144      1+IMM2_SIZE, 1+IMM2_SIZE,      /* NOT upto I, minupto I                  */
145      1+IMM2_SIZE,                   /* NOT exact I                            */
146      1, 1, 1, 1+IMM2_SIZE,          /* NOT *+I, ++I, ?+I, upto+I              */
147    /* Positive type repeats                                                 */    /* Positive type repeats                                                 */
148    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */
149    3, 3, 3,                       /* Type upto, minupto, exact              */    1+IMM2_SIZE, 1+IMM2_SIZE,      /* Type upto, minupto                     */
150    1, 1, 1, 3,                    /* Type *+, ++, ?+, upto+                 */    1+IMM2_SIZE,                   /* Type exact                             */
151      1, 1, 1, 1+IMM2_SIZE,          /* Type *+, ++, ?+, upto+                 */
152    /* Character class & ref repeats                                         */    /* Character class & ref repeats                                         */
153    0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */    0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */
154    0, 0,                          /* CRRANGE, CRMINRANGE                    */    0, 0,                          /* CRRANGE, CRMINRANGE                    */
# Line 144  static const uschar coptable[] = { Line 156  static const uschar coptable[] = {
156    0,                             /* NCLASS                                 */    0,                             /* NCLASS                                 */
157    0,                             /* XCLASS - variable length               */    0,                             /* XCLASS - variable length               */
158    0,                             /* REF                                    */    0,                             /* REF                                    */
159      0,                             /* REFI                                   */
160    0,                             /* RECURSE                                */    0,                             /* RECURSE                                */
161    0,                             /* CALLOUT                                */    0,                             /* CALLOUT                                */
162    0,                             /* Alt                                    */    0,                             /* Alt                                    */
163    0,                             /* Ket                                    */    0,                             /* Ket                                    */
164    0,                             /* KetRmax                                */    0,                             /* KetRmax                                */
165    0,                             /* KetRmin                                */    0,                             /* KetRmin                                */
166      0,                             /* KetRpos                                */
167      0,                             /* Reverse                                */
168    0,                             /* Assert                                 */    0,                             /* Assert                                 */
169    0,                             /* Assert not                             */    0,                             /* Assert not                             */
170    0,                             /* Assert behind                          */    0,                             /* Assert behind                          */
171    0,                             /* Assert behind not                      */    0,                             /* Assert behind not                      */
172    0,                             /* Reverse                                */    0, 0,                          /* ONCE, ONCE_NC                          */
173    0, 0, 0, 0,                    /* ONCE, BRA, CBRA, COND                  */    0, 0, 0, 0, 0,                 /* BRA, BRAPOS, CBRA, CBRAPOS, COND       */
174    0, 0, 0,                       /* SBRA, SCBRA, SCOND                     */    0, 0, 0, 0, 0,                 /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND  */
175    0, 0,                          /* CREF, NCREF                            */    0, 0,                          /* CREF, NCREF                            */
176    0, 0,                          /* RREF, NRREF                            */    0, 0,                          /* RREF, NRREF                            */
177    0,                             /* DEF                                    */    0,                             /* DEF                                    */
178    0, 0,                          /* BRAZERO, BRAMINZERO                    */    0, 0, 0,                       /* BRAZERO, BRAMINZERO, BRAPOSZERO        */
179    0, 0, 0,                       /* MARK, PRUNE, PRUNE_ARG,                */    0, 0, 0,                       /* MARK, PRUNE, PRUNE_ARG                 */
180    0, 0, 0, 0,                    /* SKIP, SKIP_ARG, THEN, THEN_ARG,        */    0, 0, 0, 0,                    /* SKIP, SKIP_ARG, THEN, THEN_ARG         */
181    0, 0, 0, 0, 0                  /* COMMIT, FAIL, ACCEPT, CLOSE, SKIPZERO  */    0, 0, 0, 0,                    /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT    */
182      0, 0                           /* CLOSE, SKIPZERO  */
183  };  };
184
185  /* This table identifies those opcodes that inspect a character. It is used to  /* This table identifies those opcodes that inspect a character. It is used to
# Line 171  remember the fact that a character could Line 187  remember the fact that a character could
187  the subject is reached. ***NOTE*** If the start of this table is modified, the  the subject is reached. ***NOTE*** If the start of this table is modified, the
188  two tables that follow must also be modified. */  two tables that follow must also be modified. */
189
190  static const uschar poptable[] = {  static const pcre_uint8 poptable[] = {
191    0,                             /* End                                    */    0,                             /* End                                    */
192    0, 0, 0, 1, 1,                 /* \A, \G, \K, \B, \b                     */    0, 0, 0, 1, 1,                 /* \A, \G, \K, \B, \b                     */
193    1, 1, 1, 1, 1, 1,              /* \D, \d, \S, \s, \W, \w                 */    1, 1, 1, 1, 1, 1,              /* \D, \d, \S, \s, \W, \w                 */
# Line 179  static const uschar poptable[] = { Line 195  static const uschar poptable[] = {
195    1, 1,                          /* \P, \p                                 */    1, 1,                          /* \P, \p                                 */
196    1, 1, 1, 1, 1,                 /* \R, \H, \h, \V, \v                     */    1, 1, 1, 1, 1,                 /* \R, \H, \h, \V, \v                     */
197    1,                             /* \X                                     */    1,                             /* \X                                     */
198    0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, \$                      */    0, 0, 0, 0, 0, 0,              /* \Z, \z, ^, ^M, \$, \$M                   */
199    1,                             /* Char                                   */    1,                             /* Char                                   */
200    1,                             /* Charnc                                 */    1,                             /* Chari                                  */
201    1,                             /* not                                    */    1,                             /* not                                    */
202      1,                             /* noti                                   */
203    /* Positive single-char repeats                                          */    /* Positive single-char repeats                                          */
204    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
205    1, 1, 1,                       /* upto, minupto, exact                   */    1, 1, 1,                       /* upto, minupto, exact                   */
206    1, 1, 1, 1,                    /* *+, ++, ?+, upto+                      */    1, 1, 1, 1,                    /* *+, ++, ?+, upto+                      */
207      1, 1, 1, 1, 1, 1,              /* *I, *?I, +I, +?I, ?I, ??I              */
208      1, 1, 1,                       /* upto I, minupto I, exact I             */
209      1, 1, 1, 1,                    /* *+I, ++I, ?+I, upto+I                  */
210    /* Negative single-char repeats - only for chars < 256                   */    /* Negative single-char repeats - only for chars < 256                   */
211    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */
212    1, 1, 1,                       /* NOT upto, minupto, exact               */    1, 1, 1,                       /* NOT upto, minupto, exact               */
213    1, 1, 1, 1,                    /* NOT *+, ++, ?+, upto+                  */    1, 1, 1, 1,                    /* NOT *+, ++, ?+, upto+                  */
214      1, 1, 1, 1, 1, 1,              /* NOT *I, *?I, +I, +?I, ?I, ??I          */
215      1, 1, 1,                       /* NOT upto I, minupto I, exact I         */
216      1, 1, 1, 1,                    /* NOT *+I, ++I, ?+I, upto+I              */
217    /* Positive type repeats                                                 */    /* Positive type repeats                                                 */
218    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */
219    1, 1, 1,                       /* Type upto, minupto, exact              */    1, 1, 1,                       /* Type upto, minupto, exact              */
# Line 202  static const uschar poptable[] = { Line 225  static const uschar poptable[] = {
225    1,                             /* NCLASS                                 */    1,                             /* NCLASS                                 */
226    1,                             /* XCLASS - variable length               */    1,                             /* XCLASS - variable length               */
227    0,                             /* REF                                    */    0,                             /* REF                                    */
228      0,                             /* REFI                                   */
229    0,                             /* RECURSE                                */    0,                             /* RECURSE                                */
230    0,                             /* CALLOUT                                */    0,                             /* CALLOUT                                */
231    0,                             /* Alt                                    */    0,                             /* Alt                                    */
232    0,                             /* Ket                                    */    0,                             /* Ket                                    */
233    0,                             /* KetRmax                                */    0,                             /* KetRmax                                */
234    0,                             /* KetRmin                                */    0,                             /* KetRmin                                */
235      0,                             /* KetRpos                                */
236      0,                             /* Reverse                                */
237    0,                             /* Assert                                 */    0,                             /* Assert                                 */
238    0,                             /* Assert not                             */    0,                             /* Assert not                             */
239    0,                             /* Assert behind                          */    0,                             /* Assert behind                          */
240    0,                             /* Assert behind not                      */    0,                             /* Assert behind not                      */
241    0,                             /* Reverse                                */    0, 0,                          /* ONCE, ONCE_NC                          */
242    0, 0, 0, 0,                    /* ONCE, BRA, CBRA, COND                  */    0, 0, 0, 0, 0,                 /* BRA, BRAPOS, CBRA, CBRAPOS, COND       */
243    0, 0, 0,                       /* SBRA, SCBRA, SCOND                     */    0, 0, 0, 0, 0,                 /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND  */
244    0, 0,                          /* CREF, NCREF                            */    0, 0,                          /* CREF, NCREF                            */
245    0, 0,                          /* RREF, NRREF                            */    0, 0,                          /* RREF, NRREF                            */
246    0,                             /* DEF                                    */    0,                             /* DEF                                    */
247    0, 0,                          /* BRAZERO, BRAMINZERO                    */    0, 0, 0,                       /* BRAZERO, BRAMINZERO, BRAPOSZERO        */
248    0, 0, 0,                       /* MARK, PRUNE, PRUNE_ARG,                */    0, 0, 0,                       /* MARK, PRUNE, PRUNE_ARG                 */
249    0, 0, 0, 0,                    /* SKIP, SKIP_ARG, THEN, THEN_ARG,        */    0, 0, 0, 0,                    /* SKIP, SKIP_ARG, THEN, THEN_ARG         */
250    0, 0, 0, 0, 0                  /* COMMIT, FAIL, ACCEPT, CLOSE, SKIPZERO  */    0, 0, 0, 0,                    /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT    */
251      0, 0                           /* CLOSE, SKIPZERO                        */
252  };  };
253
254  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
255  and \w */  and \w */
256
257  static const uschar toptable1[] = {  static const pcre_uint8 toptable1[] = {
258    0, 0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
259    ctype_digit, ctype_digit,    ctype_digit, ctype_digit,
260    ctype_space, ctype_space,    ctype_space, ctype_space,
# Line 235  static const uschar toptable1[] = { Line 262  static const uschar toptable1[] = {
262    0, 0                            /* OP_ANY, OP_ALLANY */    0, 0                            /* OP_ANY, OP_ALLANY */
263  };  };
264
265  static const uschar toptable2[] = {  static const pcre_uint8 toptable2[] = {
266    0, 0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
267    ctype_digit, 0,    ctype_digit, 0,
268    ctype_space, 0,    ctype_space, 0,
# Line 252  these structures in, is a vector of ints Line 279  these structures in, is a vector of ints
279  typedef struct stateblock {  typedef struct stateblock {
280    int offset;                     /* Offset to opcode */    int offset;                     /* Offset to opcode */
281    int count;                      /* Count for repeats */    int count;                      /* Count for repeats */
int ims;                        /* ims flag bits */
282    int data;                       /* Some use extra data */    int data;                       /* Some use extra data */
283  } stateblock;  } stateblock;
284
# Line 275  Returns:       nothing Line 301  Returns:       nothing
301  */  */
302
303  static void  static void
304  pchars(unsigned char *p, int length, FILE *f)  pchars(const pcre_uchar *p, int length, FILE *f)
305  {  {
306  int c;  int c;
307  while (length-- > 0)  while (length-- > 0)
# Line 308  Arguments: Line 334  Arguments:
334    offsetcount       size of same    offsetcount       size of same
335    workspace         vector of workspace    workspace         vector of workspace
336    wscount           size of same    wscount           size of same
ims               the current ims flags
337    rlevel            function call recursion level    rlevel            function call recursion level
recursing         regex recursive call level
338
339  Returns:            > 0 => number of match offset pairs placed in offsets  Returns:            > 0 => number of match offset pairs placed in offsets
340                      = 0 => offsets overflowed; longest matches are present                      = 0 => offsets overflowed; longest matches are present
# Line 325  for the current character, one for the f Line 349  for the current character, one for the f
349      { \      { \
350      next_active_state->offset = (x); \      next_active_state->offset = (x); \
351      next_active_state->count  = (y); \      next_active_state->count  = (y); \
next_active_state->ims    = ims; \
352      next_active_state++; \      next_active_state++; \
353      DPRINTF(("%.*sADD_ACTIVE(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \      DPRINTF(("%.*sADD_ACTIVE(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \
354      } \      } \
# Line 336  for the current character, one for the f Line 359  for the current character, one for the f
359      { \      { \
360      next_active_state->offset = (x); \      next_active_state->offset = (x); \
361      next_active_state->count  = (y); \      next_active_state->count  = (y); \
next_active_state->ims    = ims; \
362      next_active_state->data   = (z); \      next_active_state->data   = (z); \
363      next_active_state++; \      next_active_state++; \
364      DPRINTF(("%.*sADD_ACTIVE_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \      DPRINTF(("%.*sADD_ACTIVE_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \
# Line 348  for the current character, one for the f Line 370  for the current character, one for the f
370      { \      { \
371      next_new_state->offset = (x); \      next_new_state->offset = (x); \
372      next_new_state->count  = (y); \      next_new_state->count  = (y); \
next_new_state->ims    = ims; \
373      next_new_state++; \      next_new_state++; \
374      DPRINTF(("%.*sADD_NEW(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \      DPRINTF(("%.*sADD_NEW(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \
375      } \      } \
# Line 359  for the current character, one for the f Line 380  for the current character, one for the f
380      { \      { \
381      next_new_state->offset = (x); \      next_new_state->offset = (x); \
382      next_new_state->count  = (y); \      next_new_state->count  = (y); \
next_new_state->ims    = ims; \
383      next_new_state->data   = (z); \      next_new_state->data   = (z); \
384      next_new_state++; \      next_new_state++; \
385      DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \      DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \
# Line 371  for the current character, one for the f Line 391  for the current character, one for the f
391  static int  static int
392  internal_dfa_exec(  internal_dfa_exec(
393    dfa_match_data *md,    dfa_match_data *md,
394    const uschar *this_start_code,    const pcre_uchar *this_start_code,
395    const uschar *current_subject,    const pcre_uchar *current_subject,
396    int start_offset,    int start_offset,
397    int *offsets,    int *offsets,
398    int offsetcount,    int offsetcount,
399    int *workspace,    int *workspace,
400    int wscount,    int wscount,
401    int ims,    int  rlevel)
int  rlevel,
int  recursing)
402  {  {
403  stateblock *active_states, *new_states, *temp_states;  stateblock *active_states, *new_states, *temp_states;
404  stateblock *next_active_state, *next_new_state;  stateblock *next_active_state, *next_new_state;
405
406  const uschar *ctypes, *lcc, *fcc;  const pcre_uint8 *ctypes, *lcc, *fcc;
407  const uschar *ptr;  const pcre_uchar *ptr;
408  const uschar *end_code, *first_op;  const pcre_uchar *end_code, *first_op;
409
410    dfa_recursion_info new_recursive;
411
412  int active_count, new_count, match_count;  int active_count, new_count, match_count;
413
414  /* Some fields in the md block are frequently referenced, so we load them into  /* Some fields in the md block are frequently referenced, so we load them into
415  independent variables in the hope that this will perform better. */  independent variables in the hope that this will perform better. */
416
417  const uschar *start_subject = md->start_subject;  const pcre_uchar *start_subject = md->start_subject;
418  const uschar *end_subject = md->end_subject;  const pcre_uchar *end_subject = md->end_subject;
419  const uschar *start_code = md->start_code;  const pcre_uchar *start_code = md->start_code;
420
421  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
422  BOOL utf8 = (md->poptions & PCRE_UTF8) != 0;  BOOL utf = (md->poptions & PCRE_UTF8) != 0;
423  #else  #else
424  BOOL utf8 = FALSE;  BOOL utf = FALSE;
425  #endif  #endif
426
427    BOOL reset_could_continue = FALSE;
428
429  rlevel++;  rlevel++;
430  offsetcount &= (-2);  offsetcount &= (-2);
431
# Line 412  wscount = (wscount - (wscount % (INTS_PE Line 434  wscount = (wscount - (wscount % (INTS_PE
434            (2 * INTS_PER_STATEBLOCK);            (2 * INTS_PER_STATEBLOCK);
435
436  DPRINTF(("\n%.*s---------------------\n"  DPRINTF(("\n%.*s---------------------\n"
437    "%.*sCall to internal_dfa_exec f=%d r=%d\n",    "%.*sCall to internal_dfa_exec f=%d\n",
438    rlevel*2-2, SP, rlevel*2-2, SP, rlevel, recursing));    rlevel*2-2, SP, rlevel*2-2, SP, rlevel));
439
440  ctypes = md->tables + ctypes_offset;  ctypes = md->tables + ctypes_offset;
441  lcc = md->tables + lcc_offset;  lcc = md->tables + lcc_offset;
# Line 426  next_new_state = new_states = active_sta Line 448  next_new_state = new_states = active_sta
448  new_count = 0;  new_count = 0;
449
450  first_op = this_start_code + 1 + LINK_SIZE +  first_op = this_start_code + 1 + LINK_SIZE +
451    ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA)? 2:0);    ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
452        *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
453        ? IMM2_SIZE:0);
454
455  /* The first thing in any (sub) pattern is a bracket of some sort. Push all  /* The first thing in any (sub) pattern is a bracket of some sort. Push all
456  the alternative states onto the list, and find out where the end is. This  the alternative states onto the list, and find out where the end is. This
# Line 454  if (*first_op == OP_REVERSE) Line 478  if (*first_op == OP_REVERSE)
478    /* If we can't go back the amount required for the longest lookbehind    /* If we can't go back the amount required for the longest lookbehind
479    pattern, go back as far as we can; some alternatives may still be viable. */    pattern, go back as far as we can; some alternatives may still be viable. */
480
481  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
482    /* In character mode we have to step back character by character */    /* In character mode we have to step back character by character */
483
484    if (utf8)    if (utf)
485      {      {
486      for (gone_back = 0; gone_back < max_back; gone_back++)      for (gone_back = 0; gone_back < max_back; gone_back++)
487        {        {
488        if (current_subject <= start_subject) break;        if (current_subject <= start_subject) break;
489        current_subject--;        current_subject--;
490        while (current_subject > start_subject &&        ACROSSCHAR(current_subject > start_subject, *current_subject, current_subject--);
(*current_subject & 0xc0) == 0x80)
current_subject--;
491        }        }
492      }      }
493    else    else
# Line 475  if (*first_op == OP_REVERSE) Line 497  if (*first_op == OP_REVERSE)
497
498      {      {
499      gone_back = (current_subject - max_back < start_subject)?      gone_back = (current_subject - max_back < start_subject)?
500        current_subject - start_subject : max_back;        (int)(current_subject - start_subject) : max_back;
501      current_subject -= gone_back;      current_subject -= gone_back;
502      }      }
503
# Line 492  if (*first_op == OP_REVERSE) Line 514  if (*first_op == OP_REVERSE)
515      if (back <= gone_back)      if (back <= gone_back)
516        {        {
517        int bstate = end_code - start_code + 2 + 2*LINK_SIZE;        int bstate = (int)(end_code - start_code + 2 + 2*LINK_SIZE);
519        }        }
520      end_code += GET(end_code, 1);      end_code += GET(end_code, 1);
# Line 525  else Line 547  else
547    else    else
548      {      {
549      int length = 1 + LINK_SIZE +      int length = 1 + LINK_SIZE +
550        ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA)? 2:0);        ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
551            *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
552            ? IMM2_SIZE:0);
553      do      do
554        {        {
555        ADD_NEW(end_code - start_code + length, 0);        ADD_NEW((int)(end_code - start_code + length), 0);
556        end_code += GET(end_code, 1);        end_code += GET(end_code, 1);
558        }        }
# Line 538  else Line 562  else
562
563  workspace[0] = 0;    /* Bit indicating which vector is current */  workspace[0] = 0;    /* Bit indicating which vector is current */
564
565  DPRINTF(("%.*sEnd state = %d\n", rlevel*2-2, SP, end_code - start_code));  DPRINTF(("%.*sEnd state = %d\n", rlevel*2-2, SP, (int)(end_code - start_code)));
566
567  /* Loop for scanning the subject */  /* Loop for scanning the subject */
568
# Line 549  for (;;) Line 573  for (;;)
573    int clen, dlen;    int clen, dlen;
574    unsigned int c, d;    unsigned int c, d;
575    int forced_fail = 0;    int forced_fail = 0;
576    BOOL could_continue = FALSE;    BOOL partial_newline = FALSE;
577      BOOL could_continue = reset_could_continue;
578      reset_could_continue = FALSE;
579
580    /* Make the new state list into the active state list and empty the    /* Make the new state list into the active state list and empty the
581    new state list. */    new state list. */
582
# Line 565  for (;;) Line 591  for (;;)
591
592  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
593    printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP);    printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP);
594    pchars((uschar *)ptr, strlen((char *)ptr), stdout);    pchars(ptr, STRLEN_UC(ptr), stdout);
595    printf("\"\n");    printf("\"\n");
596
597    printf("%.*sActive states: ", rlevel*2-2, SP);    printf("%.*sActive states: ", rlevel*2-2, SP);
# Line 586  for (;;) Line 612  for (;;)
612    if (ptr < end_subject)    if (ptr < end_subject)
613      {      {
614      clen = 1;        /* Number of bytes in the character */      clen = 1;        /* Number of bytes in the character */
615  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
616      if (utf8) { GETCHARLEN(c, ptr, clen); } else      if (utf) { GETCHARLEN(c, ptr, clen); } else
617  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
618      c = *ptr;      c = *ptr;
619      }      }
620    else    else
# Line 605  for (;;) Line 631  for (;;)
631    for (i = 0; i < active_count; i++)    for (i = 0; i < active_count; i++)
632      {      {
633      stateblock *current_state = active_states + i;      stateblock *current_state = active_states + i;
634      const uschar *code;      BOOL caseless = FALSE;
635        const pcre_uchar *code;
636      int state_offset = current_state->offset;      int state_offset = current_state->offset;
637      int count, codevalue, rrc;      int count, codevalue, rrc;
638
# Line 616  for (;;) Line 643  for (;;)
643          else printf("0x%02x\n", c);          else printf("0x%02x\n", c);
644  #endif  #endif
645
/* This variable is referred to implicity in the ADD_xxx macros. */

ims = current_state->ims;

646      /* A negative offset is a special case meaning "hold off going to this      /* A negative offset is a special case meaning "hold off going to this
647      (negated) state until the number of characters in the data field have      (negated) state until the number of characters in the data field have
648      been skipped". */      been skipped". If the could_continue flag was passed over from a previous
649        state, arrange for it to passed on. */
650
651      if (state_offset < 0)      if (state_offset < 0)
652        {        {
# Line 631  for (;;) Line 655  for (;;)
655          DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP));          DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP));
657            current_state->data - 1);            current_state->data - 1);
658            if (could_continue) reset_could_continue = TRUE;
659          continue;          continue;
660          }          }
661        else        else
# Line 678  for (;;) Line 703  for (;;)
703      if (coptable[codevalue] > 0)      if (coptable[codevalue] > 0)
704        {        {
705        dlen = 1;        dlen = 1;
706  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
707        if (utf8) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else        if (utf) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else
708  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
709        d = code[coptable[codevalue]];        d = code[coptable[codevalue]];
710        if (codevalue >= OP_TYPESTAR)        if (codevalue >= OP_TYPESTAR)
711          {          {
# Line 725  for (;;) Line 750  for (;;)
750
751  /* ========================================================================== */  /* ========================================================================== */
752        /* Reached a closing bracket. If not at the end of the pattern, carry        /* Reached a closing bracket. If not at the end of the pattern, carry
753        on with the next opcode. Otherwise, unless we have an empty string and        on with the next opcode. For repeating opcodes, also add the repeat
754          state. Note that KETRPOS will always be encountered at the end of the
755          subpattern, because the possessive subpattern repeats are always handled
756          using recursive calls. Thus, it never adds any new states.
757
758          At the end of the (sub)pattern, unless we have an empty string and
759        PCRE_NOTEMPTY is set, or PCRE_NOTEMPTY_ATSTART is set and we are at the        PCRE_NOTEMPTY is set, or PCRE_NOTEMPTY_ATSTART is set and we are at the
760        start of the subject, save the match data, shifting up all previous        start of the subject, save the match data, shifting up all previous
761        matches so we always have the longest first. */        matches so we always have the longest first. */
# Line 733  for (;;) Line 763  for (;;)
763        case OP_KET:        case OP_KET:
764        case OP_KETRMIN:        case OP_KETRMIN:
765        case OP_KETRMAX:        case OP_KETRMAX:
766          case OP_KETRPOS:
767        if (code != end_code)        if (code != end_code)
768          {          {
# Line 749  for (;;) Line 780  for (;;)
780                  current_subject > start_subject + md->start_offset)))                  current_subject > start_subject + md->start_offset)))
781            {            {
782            if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;            if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;
783              else if (match_count > 0 && ++match_count * 2 >= offsetcount)              else if (match_count > 0 && ++match_count * 2 > offsetcount)
784                match_count = 0;                match_count = 0;
785            count = ((match_count == 0)? offsetcount : match_count * 2) - 2;            count = ((match_count == 0)? offsetcount : match_count * 2) - 2;
786            if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int));            if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int));
787            if (offsetcount >= 2)            if (offsetcount >= 2)
788              {              {
789              offsets[0] = current_subject - start_subject;              offsets[0] = (int)(current_subject - start_subject);
790              offsets[1] = ptr - start_subject;              offsets[1] = (int)(ptr - start_subject);
791              DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,              DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,
792                offsets[1] - offsets[0], current_subject));                offsets[1] - offsets[0], current_subject));
793              }              }
# Line 778  for (;;) Line 809  for (;;)
809        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
810        case OP_ALT:        case OP_ALT:
811        do { code += GET(code, 1); } while (*code == OP_ALT);        do { code += GET(code, 1); } while (*code == OP_ALT);
813        break;        break;
814
815        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
# Line 786  for (;;) Line 817  for (;;)
817        case OP_SBRA:        case OP_SBRA:
818        do        do
819          {          {
821          code += GET(code, 1);          code += GET(code, 1);
822          }          }
823        while (*code == OP_ALT);        while (*code == OP_ALT);
# Line 795  for (;;) Line 826  for (;;)
826        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
827        case OP_CBRA:        case OP_CBRA:
828        case OP_SCBRA:        case OP_SCBRA:
830        code += GET(code, 1);        code += GET(code, 1);
831        while (*code == OP_ALT)        while (*code == OP_ALT)
832          {          {
834          code += GET(code, 1);          code += GET(code, 1);
835          }          }
836        break;        break;
# Line 810  for (;;) Line 841  for (;;)
842        code += 1 + GET(code, 2);        code += 1 + GET(code, 2);
843        while (*code == OP_ALT) code += GET(code, 1);        while (*code == OP_ALT) code += GET(code, 1);
845        break;        break;
846
847        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
848        case OP_SKIPZERO:        case OP_SKIPZERO:
849        code += 1 + GET(code, 2);        code += 1 + GET(code, 2);
850        while (*code == OP_ALT) code += GET(code, 1);        while (*code == OP_ALT) code += GET(code, 1);
852        break;        break;
853
854        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
855        case OP_CIRC:        case OP_CIRC:
856        if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||        if (ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0)
((ims & PCRE_MULTILINE) != 0 &&
ptr != end_subject &&
WAS_NEWLINE(ptr)))
857          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
858        break;        break;
859
860        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
861        case OP_EOD:        case OP_CIRCM:
862        if (ptr >= end_subject) { ADD_ACTIVE(state_offset + 1, 0); }        if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||
863              (ptr != end_subject && WAS_NEWLINE(ptr)))
864            { ADD_ACTIVE(state_offset + 1, 0); }
865        break;        break;
866
867        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
868        case OP_OPT:        case OP_EOD:
869        ims = code[1];        if (ptr >= end_subject)
870        ADD_ACTIVE(state_offset + 2, 0);          {
871            if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
872              could_continue = TRUE;
873            else { ADD_ACTIVE(state_offset + 1, 0); }
874            }
875        break;        break;
876
877        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
# Line 860  for (;;) Line 894  for (;;)
894        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
895        case OP_ANY:        case OP_ANY:
896        if (clen > 0 && !IS_NEWLINE(ptr))        if (clen > 0 && !IS_NEWLINE(ptr))
897          { ADD_NEW(state_offset + 1, 0); }          {
898            if (ptr + 1 >= md->end_subject &&
899                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
900                NLBLOCK->nltype == NLTYPE_FIXED &&
901                NLBLOCK->nllen == 2 &&
902                c == NLBLOCK->nl[0])
903              {
904              could_continue = partial_newline = TRUE;
905              }
906            else
907              {
909              }
910            }
911        break;        break;
912
913        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
# Line 871  for (;;) Line 918  for (;;)
918
919        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
920        case OP_EODN:        case OP_EODN:
921        if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - md->nllen))        if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
922            could_continue = TRUE;
923          else if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - md->nllen))
924          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
925        break;        break;
926
# Line 879  for (;;) Line 928  for (;;)
928        case OP_DOLL:        case OP_DOLL:
929        if ((md->moptions & PCRE_NOTEOL) == 0)        if ((md->moptions & PCRE_NOTEOL) == 0)
930          {          {
931          if (clen == 0 ||          if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
932              could_continue = TRUE;
933            else if (clen == 0 ||
934              ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr) &&              ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr) &&
935                 ((ims & PCRE_MULTILINE) != 0 || ptr == end_subject - md->nllen)                 (ptr == end_subject - md->nllen)
936              ))              ))
937            { ADD_ACTIVE(state_offset + 1, 0); }            { ADD_ACTIVE(state_offset + 1, 0); }
938            else if (ptr + 1 >= md->end_subject &&
939                     (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
940                     NLBLOCK->nltype == NLTYPE_FIXED &&
941                     NLBLOCK->nllen == 2 &&
942                     c == NLBLOCK->nl[0])
943              {
944              if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
945                {
946                reset_could_continue = TRUE;
947                ADD_NEW_DATA(-(state_offset + 1), 0, 1);
948                }
949              else could_continue = partial_newline = TRUE;
950              }
951          }          }
952        else if ((ims & PCRE_MULTILINE) != 0 && IS_NEWLINE(ptr))        break;
953
954          /*-----------------------------------------------------------------*/
955          case OP_DOLLM:
956          if ((md->moptions & PCRE_NOTEOL) == 0)
957            {
958            if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
959              could_continue = TRUE;
960            else if (clen == 0 ||
961                ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr)))
962              { ADD_ACTIVE(state_offset + 1, 0); }
963            else if (ptr + 1 >= md->end_subject &&
964                     (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
965                     NLBLOCK->nltype == NLTYPE_FIXED &&
966                     NLBLOCK->nllen == 2 &&
967                     c == NLBLOCK->nl[0])
968              {
969              if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
970                {
971                reset_could_continue = TRUE;
972                ADD_NEW_DATA(-(state_offset + 1), 0, 1);
973                }
974              else could_continue = partial_newline = TRUE;
975              }
976            }
977          else if (IS_NEWLINE(ptr))
978          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
979        break;        break;
980
# Line 916  for (;;) Line 1005  for (;;)
1005
1006          if (ptr > start_subject)          if (ptr > start_subject)
1007            {            {
1008            const uschar *temp = ptr - 1;            const pcre_uchar *temp = ptr - 1;
1009            if (temp < md->start_used_ptr) md->start_used_ptr = temp;            if (temp < md->start_used_ptr) md->start_used_ptr = temp;
1010  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
1011            if (utf8) BACKCHAR(temp);            if (utf) { BACKCHAR(temp); }
1012  #endif  #endif
1013            GETCHARTEST(d, temp);            GETCHARTEST(d, temp);
1014  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1015            if ((md->poptions & PCRE_UCP) != 0)            if ((md->poptions & PCRE_UCP) != 0)
1016              {              {
1017              if (d == '_') left_word = TRUE; else              if (d == '_') left_word = TRUE; else
1018                {                {
1019                int cat = UCD_CATEGORY(d);                int cat = UCD_CATEGORY(d);
1020                left_word = (cat == ucp_L || cat == ucp_N);                left_word = (cat == ucp_L || cat == ucp_N);
1021                }                }
1022              }              }
1023            else            else
1024  #endif  #endif
1025            left_word = d < 256 && (ctypes[d] & ctype_word) != 0;            left_word = d < 256 && (ctypes[d] & ctype_word) != 0;
1026            }            }
1027          else left_word = FALSE;          else left_word = FALSE;
1028
1029          if (clen > 0)          if (clen > 0)
1030            {            {
1031  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1032            if ((md->poptions & PCRE_UCP) != 0)            if ((md->poptions & PCRE_UCP) != 0)
1033              {              {
1034              if (c == '_') right_word = TRUE; else              if (c == '_') right_word = TRUE; else
1035                {                {
1036                int cat = UCD_CATEGORY(c);                int cat = UCD_CATEGORY(c);
1037                right_word = (cat == ucp_L || cat == ucp_N);                right_word = (cat == ucp_L || cat == ucp_N);
1038                }                }
1039              }              }
1040            else            else
1041  #endif  #endif
1042            right_word = c < 256 && (ctypes[c] & ctype_word) != 0;            right_word = c < 256 && (ctypes[c] & ctype_word) != 0;
1043            }            }
1044          else right_word = FALSE;          else right_word = FALSE;
1045
1046          if ((left_word == right_word) == (codevalue == OP_NOT_WORD_BOUNDARY))          if ((left_word == right_word) == (codevalue == OP_NOT_WORD_BOUNDARY))
# Line 979  for (;;) Line 1068  for (;;)
1068            break;            break;
1069
1070            case PT_LAMP:            case PT_LAMP:
1071            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
1072                 prop->chartype == ucp_Lt;                 prop->chartype == ucp_Lt;
1073            break;            break;
1074
1075            case PT_GC:            case PT_GC:
1076            OK = _pcre_ucp_gentype[prop->chartype] == code[2];            OK = PRIV(ucp_gentype)[prop->chartype] == code[2];
1077            break;            break;
1078
1079            case PT_PC:            case PT_PC:
# Line 994  for (;;) Line 1083  for (;;)
1083            case PT_SC:            case PT_SC:
1084            OK = prop->script == code[2];            OK = prop->script == code[2];
1085            break;            break;
1086
1087            /* These are specials for combination cases. */            /* These are specials for combination cases. */
1088
1089            case PT_ALNUM:            case PT_ALNUM:
1090            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1091                 _pcre_ucp_gentype[prop->chartype] == ucp_N;                 PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1092            break;            break;
1093
1094            case PT_SPACE:    /* Perl space */            case PT_SPACE:    /* Perl space */
1095            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1096                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
1097            break;            break;
1098
1099            case PT_PXSPACE:  /* POSIX space */            case PT_PXSPACE:  /* POSIX space */
1100            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1101                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
1102                 c == CHAR_FF || c == CHAR_CR;                 c == CHAR_FF || c == CHAR_CR;
1103            break;            break;
1104
1105            case PT_WORD:            case PT_WORD:
1106            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1107                 _pcre_ucp_gentype[prop->chartype] == ucp_N ||                 PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1108                 c == CHAR_UNDERSCORE;                 c == CHAR_UNDERSCORE;
1109            break;            break;
1110
1111            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
1112
# Line 1046  for (;;) Line 1135  for (;;)
1135        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1136        if (clen > 0)        if (clen > 0)
1137          {          {
1138          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1139                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1140                NLBLOCK->nltype == NLTYPE_FIXED &&
1141                NLBLOCK->nllen == 2 &&
1142                c == NLBLOCK->nl[0])
1143              {
1144              could_continue = partial_newline = TRUE;
1145              }
1146            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1147              (c < 256 &&              (c < 256 &&
1148                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1149                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
# Line 1069  for (;;) Line 1166  for (;;)
1167        if (clen > 0)        if (clen > 0)
1168          {          {
1169          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1170                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1171                NLBLOCK->nltype == NLTYPE_FIXED &&
1172                NLBLOCK->nllen == 2 &&
1173                c == NLBLOCK->nl[0])
1174              {
1175              could_continue = partial_newline = TRUE;
1176              }
1177            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1178              (c < 256 &&              (c < 256 &&
1179                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1180                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
# Line 1091  for (;;) Line 1196  for (;;)
1197        if (clen > 0)        if (clen > 0)
1198          {          {
1199          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1200                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1201                NLBLOCK->nltype == NLTYPE_FIXED &&
1202                NLBLOCK->nllen == 2 &&
1203                c == NLBLOCK->nl[0])
1204              {
1205              could_continue = partial_newline = TRUE;
1206              }
1207            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1208              (c < 256 &&              (c < 256 &&
1209                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1210                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
# Line 1111  for (;;) Line 1224  for (;;)
1224        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1225        if (clen > 0)        if (clen > 0)
1226          {          {
1227          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1228                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1229                NLBLOCK->nltype == NLTYPE_FIXED &&
1230                NLBLOCK->nllen == 2 &&
1231                c == NLBLOCK->nl[0])
1232              {
1233              could_continue = partial_newline = TRUE;
1234              }
1235            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1236              (c < 256 &&              (c < 256 &&
1237                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1238                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
1239            {            {
1240            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1241              { ADD_NEW(state_offset + 4, 0); }              { ADD_NEW(state_offset + 1 + IMM2_SIZE + 1, 0); }
1242            else            else
1244            }            }
# Line 1128  for (;;) Line 1249  for (;;)
1249        case OP_TYPEUPTO:        case OP_TYPEUPTO:
1250        case OP_TYPEMINUPTO:        case OP_TYPEMINUPTO:
1251        case OP_TYPEPOSUPTO:        case OP_TYPEPOSUPTO:
1253        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1254        if (clen > 0)        if (clen > 0)
1255          {          {
1256          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1257                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1258                NLBLOCK->nltype == NLTYPE_FIXED &&
1259                NLBLOCK->nllen == 2 &&
1260                c == NLBLOCK->nl[0])
1261              {
1262              could_continue = partial_newline = TRUE;
1263              }
1264            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1265              (c < 256 &&              (c < 256 &&
1266                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1267                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
# Line 1143  for (;;) Line 1272  for (;;)
1272              next_active_state--;              next_active_state--;
1273              }              }
1274            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1275              { ADD_NEW(state_offset + 4, 0); }              { ADD_NEW(state_offset + 2 + IMM2_SIZE, 0); }
1276            else            else
1278            }            }
# Line 1173  for (;;) Line 1302  for (;;)
1302            break;            break;
1303
1304            case PT_LAMP:            case PT_LAMP:
1305            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
1306              prop->chartype == ucp_Lt;              prop->chartype == ucp_Lt;
1307            break;            break;
1308
1309            case PT_GC:            case PT_GC:
1310            OK = _pcre_ucp_gentype[prop->chartype] == code[3];            OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
1311            break;            break;
1312
1313            case PT_PC:            case PT_PC:
# Line 1190  for (;;) Line 1319  for (;;)
1319            break;            break;
1320
1321            /* These are specials for combination cases. */            /* These are specials for combination cases. */
1322
1323            case PT_ALNUM:            case PT_ALNUM:
1324            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1325                 _pcre_ucp_gentype[prop->chartype] == ucp_N;                 PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1326            break;            break;
1327
1328            case PT_SPACE:    /* Perl space */            case PT_SPACE:    /* Perl space */
1329            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1330                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
1331            break;            break;
1332
1333            case PT_PXSPACE:  /* POSIX space */            case PT_PXSPACE:  /* POSIX space */
1334            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1335                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
1336                 c == CHAR_FF || c == CHAR_CR;                 c == CHAR_FF || c == CHAR_CR;
1337            break;            break;
1338
1339            case PT_WORD:            case PT_WORD:
1340            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1341                 _pcre_ucp_gentype[prop->chartype] == ucp_N ||                 PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1342                 c == CHAR_UNDERSCORE;                 c == CHAR_UNDERSCORE;
1343            break;            break;
1344
1345            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
1346
# Line 1241  for (;;) Line 1370  for (;;)
1370        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1371        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
1372          {          {
1373          const uschar *nptr = ptr + clen;          const pcre_uchar *nptr = ptr + clen;
1374          int ncount = 0;          int ncount = 0;
1375          if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)          if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
1376            {            {
# Line 1420  for (;;) Line 1549  for (;;)
1549            break;            break;
1550
1551            case PT_LAMP:            case PT_LAMP:
1552            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
1553              prop->chartype == ucp_Lt;              prop->chartype == ucp_Lt;
1554            break;            break;
1555
1556            case PT_GC:            case PT_GC:
1557            OK = _pcre_ucp_gentype[prop->chartype] == code[3];            OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
1558            break;            break;
1559
1560            case PT_PC:            case PT_PC:
# Line 1435  for (;;) Line 1564  for (;;)
1564            case PT_SC:            case PT_SC:
1565            OK = prop->script == code[3];            OK = prop->script == code[3];
1566            break;            break;
1567
1568            /* These are specials for combination cases. */            /* These are specials for combination cases. */
1569
1570            case PT_ALNUM:            case PT_ALNUM:
1571            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1572                 _pcre_ucp_gentype[prop->chartype] == ucp_N;                 PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1573            break;            break;
1574
1575            case PT_SPACE:    /* Perl space */            case PT_SPACE:    /* Perl space */
1576            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1577                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
1578            break;            break;
1579
1580            case PT_PXSPACE:  /* POSIX space */            case PT_PXSPACE:  /* POSIX space */
1581            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1582                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
1583                 c == CHAR_FF || c == CHAR_CR;                 c == CHAR_FF || c == CHAR_CR;
1584            break;            break;
1585
1586            case PT_WORD:            case PT_WORD:
1587            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1588                 _pcre_ucp_gentype[prop->chartype] == ucp_N ||                 PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1589                 c == CHAR_UNDERSCORE;                 c == CHAR_UNDERSCORE;
1590            break;            break;
1591
1592            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
1593
# Line 1497  for (;;) Line 1626  for (;;)
1627        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
1628          {          {
1629          const uschar *nptr = ptr + clen;          const pcre_uchar *nptr = ptr + clen;
1630          int ncount = 0;          int ncount = 0;
1631          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
1632              codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)              codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)
# Line 1679  for (;;) Line 1808  for (;;)
1808        case OP_PROP_EXTRA + OP_TYPEMINUPTO:        case OP_PROP_EXTRA + OP_TYPEMINUPTO:
1809        case OP_PROP_EXTRA + OP_TYPEPOSUPTO:        case OP_PROP_EXTRA + OP_TYPEPOSUPTO:
1810        if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)
1811          { ADD_ACTIVE(state_offset + 6, 0); }          { ADD_ACTIVE(state_offset + 1 + IMM2_SIZE + 3, 0); }
1812        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1813        if (clen > 0)        if (clen > 0)
1814          {          {
1815          BOOL OK;          BOOL OK;
1816          const ucd_record * prop = GET_UCD(c);          const ucd_record * prop = GET_UCD(c);
1817          switch(code[4])          switch(code[1 + IMM2_SIZE + 1])
1818            {            {
1819            case PT_ANY:            case PT_ANY:
1820            OK = TRUE;            OK = TRUE;
1821            break;            break;
1822
1823            case PT_LAMP:            case PT_LAMP:
1824            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
1825              prop->chartype == ucp_Lt;              prop->chartype == ucp_Lt;
1826            break;            break;
1827
1828            case PT_GC:            case PT_GC:
1829            OK = _pcre_ucp_gentype[prop->chartype] == code[5];            OK = PRIV(ucp_gentype)[prop->chartype] == code[1 + IMM2_SIZE + 2];
1830            break;            break;
1831
1832            case PT_PC:            case PT_PC:
1833            OK = prop->chartype == code[5];            OK = prop->chartype == code[1 + IMM2_SIZE + 2];
1834            break;            break;
1835
1836            case PT_SC:            case PT_SC:
1837            OK = prop->script == code[5];            OK = prop->script == code[1 + IMM2_SIZE + 2];
1838            break;            break;
1839
1840            /* These are specials for combination cases. */            /* These are specials for combination cases. */
1841
1842            case PT_ALNUM:            case PT_ALNUM:
1843            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1844                 _pcre_ucp_gentype[prop->chartype] == ucp_N;                 PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1845            break;            break;
1846
1847            case PT_SPACE:    /* Perl space */            case PT_SPACE:    /* Perl space */
1848            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1849                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
1850            break;            break;
1851
1852            case PT_PXSPACE:  /* POSIX space */            case PT_PXSPACE:  /* POSIX space */
1853            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1854                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
1855                 c == CHAR_FF || c == CHAR_CR;                 c == CHAR_FF || c == CHAR_CR;
1856            break;            break;
1857
1858            case PT_WORD:            case PT_WORD:
1859            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1860                 _pcre_ucp_gentype[prop->chartype] == ucp_N ||                 PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1861                 c == CHAR_UNDERSCORE;                 c == CHAR_UNDERSCORE;
1862            break;            break;
1863
1864            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
1865
# Line 1747  for (;;) Line 1876  for (;;)
1876              next_active_state--;              next_active_state--;
1877              }              }
1878            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1879              { ADD_NEW(state_offset + 6, 0); }              { ADD_NEW(state_offset + 1 + IMM2_SIZE + 3, 0); }
1880            else            else
1882            }            }
# Line 1760  for (;;) Line 1889  for (;;)
1889        case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:        case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:
1890        case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:        case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:
1891        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
1892          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1893        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1894        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
1895          {          {
1896          const uschar *nptr = ptr + clen;          const pcre_uchar *nptr = ptr + clen;
1897          int ncount = 0;          int ncount = 0;
1898          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
1899            {            {
# Line 1780  for (;;) Line 1909  for (;;)
1909            ncount++;            ncount++;
1910            nptr += ndlen;            nptr += ndlen;
1911            }            }
1912            if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
1913                reset_could_continue = TRUE;
1914          if (++count >= GET2(code, 1))          if (++count >= GET2(code, 1))
1915            { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }            { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
1916          else          else
1918          }          }
# Line 1794  for (;;) Line 1925  for (;;)
1925        case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:        case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:
1926        case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:        case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:
1927        if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)
1928          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1929        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1930        if (clen > 0)        if (clen > 0)
1931          {          {
# Line 1821  for (;;) Line 1952  for (;;)
1952              next_active_state--;              next_active_state--;
1953              }              }
1954            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1955              { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }              { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
1956            else            else
1958            break;            break;
# Line 1838  for (;;) Line 1969  for (;;)
1969        case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:        case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:
1970        case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:        case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:
1971        if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)
1972          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1973        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1974        if (clen > 0)        if (clen > 0)
1975          {          {
# Line 1867  for (;;) Line 1998  for (;;)
1998              next_active_state--;              next_active_state--;
1999              }              }
2000            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
2001              { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }              { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
2002            else            else
2004            }            }
# Line 1880  for (;;) Line 2011  for (;;)
2011        case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:        case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:
2012        case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:        case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:
2013        if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)
2014          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
2015        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
2016        if (clen > 0)        if (clen > 0)
2017          {          {
# Line 1922  for (;;) Line 2053  for (;;)
2053              next_active_state--;              next_active_state--;
2054              }              }
2055            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
2056              { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }              { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
2057            else            else
2059            }            }
# Line 1941  for (;;) Line 2072  for (;;)
2072        break;        break;
2073
2074        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2075        case OP_CHARNC:        case OP_CHARI:
2076        if (clen == 0) break;        if (clen == 0) break;
2077
2078  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2079        if (utf8)        if (utf)
2080          {          {
2081          if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else          if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
2082            {            {
2083            unsigned int othercase;            unsigned int othercase;
2084            if (c < 128) othercase = fcc[c]; else            if (c < 128)
2085                othercase = fcc[c];
2086            /* If we have Unicode property support, we can use it to test the            else
2087            other case of the character. */              /* If we have Unicode property support, we can use it to test the
2088                other case of the character. */
2089  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2090            othercase = UCD_OTHERCASE(c);              othercase = UCD_OTHERCASE(c);
2091  #else  #else
2092            othercase = NOTACHAR;              othercase = NOTACHAR;
2093  #endif  #endif
2094
2095            if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }            if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }
2096            }            }
2097          }          }
2098        else        else
2099  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2100          /* Not UTF mode */
/* Non-UTF-8 mode */
2101          {          {
2102          if (lcc[c] == lcc[d]) { ADD_NEW(state_offset + 2, 0); }          if (TABLE_GET(c, lcc, c) == TABLE_GET(d, lcc, d))
2103              { ADD_NEW(state_offset + 2, 0); }
2104          }          }
2105        break;        break;
2106
# Line 1983  for (;;) Line 2114  for (;;)
2114        case OP_EXTUNI:        case OP_EXTUNI:
2115        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
2116          {          {
2117          const uschar *nptr = ptr + clen;          const pcre_uchar *nptr = ptr + clen;
2118          int ncount = 0;          int ncount = 0;
2119          while (nptr < end_subject)          while (nptr < end_subject)
2120            {            {
# Line 1993  for (;;) Line 2124  for (;;)
2124            ncount++;            ncount++;
2125            nptr += nclen;            nptr += nclen;
2126            }            }
2127            if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
2128                reset_could_continue = TRUE;
2130          }          }
2131        break;        break;
# Line 2018  for (;;) Line 2151  for (;;)
2151          break;          break;
2152
2153          case 0x000d:          case 0x000d:
2154          if (ptr + 1 < end_subject && ptr[1] == 0x0a)          if (ptr + 1 >= end_subject)
2155              {
2157              if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
2158                reset_could_continue = TRUE;
2159              }
2160            else if (ptr[1] == 0x0a)
2161            {            {
2163            }            }
2164          else          else
2165            {            {
2167            }            }
2168          break;          break;
2169          }          }
2170        break;        break;
# Line 2127  for (;;) Line 2266  for (;;)
2266        break;        break;
2267
2268        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2269        /* Match a negated single character. This is only used for one-byte        /* Match a negated single character casefully. This is only used for
2270        characters, that is, we know that d < 256. The character we are        one-byte characters, that is, we know that d < 256. The character we are
2271        checking (c) can be multibyte. */        checking (c) can be multibyte. */
2272
2273        case OP_NOT:        case OP_NOT:
2274        if (clen > 0)        if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); }
2275          {        break;
2276          unsigned int otherd = ((ims & PCRE_CASELESS) != 0)? fcc[d] : d;
2277          if (c != d && c != otherd) { ADD_NEW(state_offset + dlen + 1, 0); }        /*-----------------------------------------------------------------*/
2278          }        /* Match a negated single character caselessly. This is only used for
2279          one-byte characters, that is, we know that d < 256. The character we are
2280          checking (c) can be multibyte. */
2281
2282          case OP_NOTI:
2283          if (clen > 0 && c != d && c != fcc[d])
2284            { ADD_NEW(state_offset + dlen + 1, 0); }
2285        break;        break;
2286
2287        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2288          case OP_PLUSI:
2289          case OP_MINPLUSI:
2290          case OP_POSPLUSI:
2291          case OP_NOTPLUSI:
2292          case OP_NOTMINPLUSI:
2293          case OP_NOTPOSPLUSI:
2294          caseless = TRUE;
2295          codevalue -= OP_STARI - OP_STAR;
2296
2297          /* Fall through */
2298        case OP_PLUS:        case OP_PLUS:
2299        case OP_MINPLUS:        case OP_MINPLUS:
2300        case OP_POSPLUS:        case OP_POSPLUS:
# Line 2151  for (;;) Line 2306  for (;;)
2306        if (clen > 0)        if (clen > 0)
2307          {          {
2308          unsigned int otherd = NOTACHAR;          unsigned int otherd = NOTACHAR;
2309          if ((ims & PCRE_CASELESS) != 0)          if (caseless)
2310            {            {
2311  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2312            if (utf8 && d >= 128)            if (utf && d >= 128)
2313              {              {
2314  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2315              otherd = UCD_OTHERCASE(d);              otherd = UCD_OTHERCASE(d);
2316  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2317              }              }
2318            else            else
2319  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2320            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2321            }            }
2322          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2323            {            {
# Line 2179  for (;;) Line 2334  for (;;)
2334        break;        break;
2335
2336        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2337          case OP_QUERYI:
2338          case OP_MINQUERYI:
2339          case OP_POSQUERYI:
2340          case OP_NOTQUERYI:
2341          case OP_NOTMINQUERYI:
2342          case OP_NOTPOSQUERYI:
2343          caseless = TRUE;
2344          codevalue -= OP_STARI - OP_STAR;
2345          /* Fall through */
2346        case OP_QUERY:        case OP_QUERY:
2347        case OP_MINQUERY:        case OP_MINQUERY:
2348        case OP_POSQUERY:        case OP_POSQUERY:
# Line 2189  for (;;) Line 2353  for (;;)
2353        if (clen > 0)        if (clen > 0)
2354          {          {
2355          unsigned int otherd = NOTACHAR;          unsigned int otherd = NOTACHAR;
2356          if ((ims & PCRE_CASELESS) != 0)          if (caseless)
2357            {            {
2358  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2359            if (utf8 && d >= 128)            if (utf && d >= 128)
2360              {              {
2361  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2362              otherd = UCD_OTHERCASE(d);              otherd = UCD_OTHERCASE(d);
2363  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2364              }              }
2365            else            else
2366  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2367            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2368            }            }
2369          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2370            {            {
# Line 2215  for (;;) Line 2379  for (;;)
2379        break;        break;
2380
2381        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2382          case OP_STARI:
2383          case OP_MINSTARI:
2384          case OP_POSSTARI:
2385          case OP_NOTSTARI:
2386          case OP_NOTMINSTARI:
2387          case OP_NOTPOSSTARI:
2388          caseless = TRUE;
2389          codevalue -= OP_STARI - OP_STAR;
2390          /* Fall through */
2391        case OP_STAR:        case OP_STAR:
2392        case OP_MINSTAR:        case OP_MINSTAR:
2393        case OP_POSSTAR:        case OP_POSSTAR:
# Line 2225  for (;;) Line 2398  for (;;)
2398        if (clen > 0)        if (clen > 0)
2399          {          {
2400          unsigned int otherd = NOTACHAR;          unsigned int otherd = NOTACHAR;
2401          if ((ims & PCRE_CASELESS) != 0)          if (caseless)
2402            {            {
2403  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2404            if (utf8 && d >= 128)            if (utf && d >= 128)
2405              {              {
2406  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2407              otherd = UCD_OTHERCASE(d);              otherd = UCD_OTHERCASE(d);
2408  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2409              }              }
2410            else            else
2411  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2412            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2413            }            }
2414          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2415            {            {
# Line 2251  for (;;) Line 2424  for (;;)
2424        break;        break;
2425
2426        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2427          case OP_EXACTI:
2428          case OP_NOTEXACTI:
2429          caseless = TRUE;
2430          codevalue -= OP_STARI - OP_STAR;
2431          /* Fall through */
2432        case OP_EXACT:        case OP_EXACT:
2433        case OP_NOTEXACT:        case OP_NOTEXACT:
2434        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
2435        if (clen > 0)        if (clen > 0)
2436          {          {
2437          unsigned int otherd = NOTACHAR;          unsigned int otherd = NOTACHAR;
2438          if ((ims & PCRE_CASELESS) != 0)          if (caseless)
2439            {            {
2440  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2441            if (utf8 && d >= 128)            if (utf && d >= 128)
2442              {              {
2443  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2444              otherd = UCD_OTHERCASE(d);              otherd = UCD_OTHERCASE(d);
2445  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2446              }              }
2447            else            else
2448  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2449            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2450            }            }
2451          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2452            {            {
2453            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
2454              { ADD_NEW(state_offset + dlen + 3, 0); }              { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
2455            else            else
2457            }            }
# Line 2281  for (;;) Line 2459  for (;;)
2459        break;        break;
2460
2461        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2462          case OP_UPTOI:
2463          case OP_MINUPTOI:
2464          case OP_POSUPTOI:
2465          case OP_NOTUPTOI:
2466          case OP_NOTMINUPTOI:
2467          case OP_NOTPOSUPTOI:
2468          caseless = TRUE;
2469          codevalue -= OP_STARI - OP_STAR;
2470          /* Fall through */
2471        case OP_UPTO:        case OP_UPTO:
2472        case OP_MINUPTO:        case OP_MINUPTO:
2473        case OP_POSUPTO:        case OP_POSUPTO:
2474        case OP_NOTUPTO:        case OP_NOTUPTO:
2475        case OP_NOTMINUPTO:        case OP_NOTMINUPTO:
2476        case OP_NOTPOSUPTO:        case OP_NOTPOSUPTO:
2477        ADD_ACTIVE(state_offset + dlen + 3, 0);        ADD_ACTIVE(state_offset + dlen + 1 + IMM2_SIZE, 0);
2478        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
2479        if (clen > 0)        if (clen > 0)
2480          {          {
2481          unsigned int otherd = NOTACHAR;          unsigned int otherd = NOTACHAR;
2482          if ((ims & PCRE_CASELESS) != 0)          if (caseless)
2483            {            {
2484  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2485            if (utf8 && d >= 128)            if (utf && d >= 128)
2486              {              {
2487  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2488              otherd = UCD_OTHERCASE(d);              otherd = UCD_OTHERCASE(d);
2489  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2490              }              }
2491            else            else
2492  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2493            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2494            }            }
2495          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2496            {            {
# Line 2313  for (;;) Line 2500  for (;;)
2500              next_active_state--;              next_active_state--;
2501              }              }
2502            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
2503              { ADD_NEW(state_offset + dlen + 3, 0); }              { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
2504            else            else
2506            }            }
# Line 2330  for (;;) Line 2517  for (;;)
2517          {          {
2518          BOOL isinclass = FALSE;          BOOL isinclass = FALSE;
2519          int next_state_offset;          int next_state_offset;
2520          const uschar *ecode;          const pcre_uchar *ecode;
2521
2522          /* For a simple class, there is always just a 32-byte table, and we          /* For a simple class, there is always just a 32-byte table, and we
2523          can set isinclass from it. */          can set isinclass from it. */
2524
2525          if (codevalue != OP_XCLASS)          if (codevalue != OP_XCLASS)
2526            {            {
2527            ecode = code + 33;            ecode = code + 1 + (32 / sizeof(pcre_uchar));
2528            if (clen > 0)            if (clen > 0)
2529              {              {
2530              isinclass = (c > 255)? (codevalue == OP_NCLASS) :              isinclass = (c > 255)? (codevalue == OP_NCLASS) :
2531                ((code[1 + c/8] & (1 << (c&7))) != 0);                ((((pcre_uint8 *)(code + 1))[c/8] & (1 << (c&7))) != 0);
2532              }              }
2533            }            }
2534
# Line 2352  for (;;) Line 2539  for (;;)
2539          else          else
2540           {           {
2541           ecode = code + GET(code, 1);           ecode = code + GET(code, 1);
2542           if (clen > 0) isinclass = _pcre_xclass(c, code + 1 + LINK_SIZE);           if (clen > 0) isinclass = PRIV(xclass)(c, code + 1 + LINK_SIZE, utf);
2543           }           }
2544
2545          /* At this point, isinclass is set for all kinds of class, and ecode          /* At this point, isinclass is set for all kinds of class, and ecode
2546          points to the byte after the end of the class. If there is a          points to the byte after the end of the class. If there is a
2547          quantifier, this is where it will be. */          quantifier, this is where it will be. */
2548
2549          next_state_offset = ecode - start_code;          next_state_offset = (int)(ecode - start_code);
2550
2551          switch (*ecode)          switch (*ecode)
2552            {            {
# Line 2386  for (;;) Line 2573  for (;;)
2573            case OP_CRMINRANGE:            case OP_CRMINRANGE:
2574            count = current_state->count;  /* Already matched */            count = current_state->count;  /* Already matched */
2575            if (count >= GET2(ecode, 1))            if (count >= GET2(ecode, 1))
2576              { ADD_ACTIVE(next_state_offset + 5, 0); }              { ADD_ACTIVE(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
2577            if (isinclass)            if (isinclass)
2578              {              {
2579              int max = GET2(ecode, 3);              int max = GET2(ecode, 1 + IMM2_SIZE);
2580              if (++count >= max && max != 0)   /* Max 0 => no limit */              if (++count >= max && max != 0)   /* Max 0 => no limit */
2581                { ADD_NEW(next_state_offset + 5, 0); }                { ADD_NEW(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
2582              else              else
2584              }              }
# Line 2422  for (;;) Line 2609  for (;;)
2609          int rc;          int rc;
2610          int local_offsets[2];          int local_offsets[2];
2611          int local_workspace[1000];          int local_workspace[1000];
2612          const uschar *endasscode = code + GET(code, 1);          const pcre_uchar *endasscode = code + GET(code, 1);
2613
2614          while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);          while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
2615
# Line 2430  for (;;) Line 2617  for (;;)
2617            md,                                   /* static match data */            md,                                   /* static match data */
2618            code,                                 /* this subexpression's code */            code,                                 /* this subexpression's code */
2619            ptr,                                  /* where we currently are */            ptr,                                  /* where we currently are */
2620            ptr - start_subject,                  /* start offset */            (int)(ptr - start_subject),           /* start offset */
2621            local_offsets,                        /* offset vector */            local_offsets,                        /* offset vector */
2622            sizeof(local_offsets)/sizeof(int),    /* size of same */            sizeof(local_offsets)/sizeof(int),    /* size of same */
2623            local_workspace,                      /* workspace vector */            local_workspace,                      /* workspace vector */
2624            sizeof(local_workspace)/sizeof(int),  /* size of same */            sizeof(local_workspace)/sizeof(int),  /* size of same */
2625            ims,                                  /* the current ims flags */            rlevel);                              /* function recursion level */
rlevel,                               /* function recursion level */
recursing);                           /* pass on regex recursion */
2626
2627          if (rc == PCRE_ERROR_DFA_UITEM) return rc;          if (rc == PCRE_ERROR_DFA_UITEM) return rc;
2628          if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK))          if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK))
2630          }          }
2631        break;        break;
2632
# Line 2461  for (;;) Line 2646  for (;;)
2647            {            {
2648            rrc = 0;            rrc = 0;
2649            if (pcre_callout != NULL)            if (PUBL(callout) != NULL)
2650              {              {
2651              pcre_callout_block cb;              PUBL(callout_block) cb;
2652              cb.version          = 1;   /* Version 1 of the callout block */              cb.version          = 1;   /* Version 1 of the callout block */
2654              cb.offset_vector    = offsets;              cb.offset_vector    = offsets;
2655    #ifdef COMPILE_PCRE8
2656              cb.subject          = (PCRE_SPTR)start_subject;              cb.subject          = (PCRE_SPTR)start_subject;
2657              cb.subject_length   = end_subject - start_subject;  #else
2658              cb.start_match      = current_subject - start_subject;              cb.subject          = (PCRE_SPTR16)start_subject;
2659              cb.current_position = ptr - start_subject;  #endif
2660                cb.subject_length   = (int)(end_subject - start_subject);
2661                cb.start_match      = (int)(current_subject - start_subject);
2662                cb.current_position = (int)(ptr - start_subject);
2663              cb.pattern_position = GET(code, LINK_SIZE + 3);              cb.pattern_position = GET(code, LINK_SIZE + 3);
2664              cb.next_item_length = GET(code, 3 + 2*LINK_SIZE);              cb.next_item_length = GET(code, 3 + 2*LINK_SIZE);
2665              cb.capture_top      = 1;              cb.capture_top      = 1;
2666              cb.capture_last     = -1;              cb.capture_last     = -1;
2667              cb.callout_data     = md->callout_data;              cb.callout_data     = md->callout_data;
2668              if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */              cb.mark             = NULL;   /* No (*MARK) support */
2669                if ((rrc = (*PUBL(callout))(&cb)) < 0) return rrc;   /* Abandon */
2670              }              }
2671            if (rrc > 0) break;                      /* Fail this thread */            if (rrc > 0) break;                      /* Fail this thread */
2672            code += _pcre_OP_lengths[OP_CALLOUT];    /* Skip callout data */            code += PRIV(OP_lengths)[OP_CALLOUT];    /* Skip callout data */
2673            }            }
2674
# Line 2500  for (;;) Line 2690  for (;;)
2690
2691          else if (condcode == OP_RREF || condcode == OP_NRREF)          else if (condcode == OP_RREF || condcode == OP_NRREF)
2692            {            {
2693            int value = GET2(code, LINK_SIZE+2);            int value = GET2(code, LINK_SIZE + 2);
2694            if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;            if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
2695            if (recursing > 0)            if (md->recursive != NULL)
2698            }            }
2699
# Line 2512  for (;;) Line 2702  for (;;)
2702          else          else
2703            {            {
2704            int rc;            int rc;
2705            const uschar *asscode = code + LINK_SIZE + 1;            const pcre_uchar *asscode = code + LINK_SIZE + 1;
2706            const uschar *endasscode = asscode + GET(asscode, 1);            const pcre_uchar *endasscode = asscode + GET(asscode, 1);
2707
2708            while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);            while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
2709
# Line 2521  for (;;) Line 2711  for (;;)
2711              md,                                   /* fixed match data */              md,                                   /* fixed match data */
2712              asscode,                              /* this subexpression's code */              asscode,                              /* this subexpression's code */
2713              ptr,                                  /* where we currently are */              ptr,                                  /* where we currently are */
2714              ptr - start_subject,                  /* start offset */              (int)(ptr - start_subject),           /* start offset */
2715              local_offsets,                        /* offset vector */              local_offsets,                        /* offset vector */
2716              sizeof(local_offsets)/sizeof(int),    /* size of same */              sizeof(local_offsets)/sizeof(int),    /* size of same */
2717              local_workspace,                      /* workspace vector */              local_workspace,                      /* workspace vector */
2718              sizeof(local_workspace)/sizeof(int),  /* size of same */              sizeof(local_workspace)/sizeof(int),  /* size of same */
2719              ims,                                  /* the current ims flags */              rlevel);                              /* function recursion level */
rlevel,                               /* function recursion level */
recursing);                           /* pass on regex recursion */
2720
2721            if (rc == PCRE_ERROR_DFA_UITEM) return rc;            if (rc == PCRE_ERROR_DFA_UITEM) return rc;
2722            if ((rc >= 0) ==            if ((rc >= 0) ==
2723                  (condcode == OP_ASSERT || condcode == OP_ASSERTBACK))                  (condcode == OP_ASSERT || condcode == OP_ASSERTBACK))
2725            else            else
2727            }            }
# Line 2543  for (;;) Line 2731  for (;;)
2731        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2732        case OP_RECURSE:        case OP_RECURSE:
2733          {          {
2734            dfa_recursion_info *ri;
2735          int local_offsets[1000];          int local_offsets[1000];
2736          int local_workspace[1000];          int local_workspace[1000];
2737            const pcre_uchar *callpat = start_code + GET(code, 1);
2738            int recno = (callpat == md->start_code)? 0 :
2740          int rc;          int rc;
2741
2742          DPRINTF(("%.*sStarting regex recursion %d\n", rlevel*2-2, SP,          DPRINTF(("%.*sStarting regex recursion\n", rlevel*2-2, SP));
2743            recursing + 1));
2744            /* Check for repeating a recursion without advancing the subject
2745            pointer. This should catch convoluted mutual recursions. (Some simple
2746            cases are caught at compile time.) */
2747
2748            for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
2749              if (recno == ri->group_num && ptr == ri->subject_position)
2750                return PCRE_ERROR_RECURSELOOP;
2751
2752            /* Remember this recursion and where we started it so as to
2753            catch infinite loops. */
2754
2755            new_recursive.group_num = recno;
2756            new_recursive.subject_position = ptr;
2757            new_recursive.prevrec = md->recursive;
2758            md->recursive = &new_recursive;
2759
2760          rc = internal_dfa_exec(          rc = internal_dfa_exec(
2761            md,                                   /* fixed match data */            md,                                   /* fixed match data */
2762            start_code + GET(code, 1),            /* this subexpression's code */            callpat,                              /* this subexpression's code */
2763            ptr,                                  /* where we currently are */            ptr,                                  /* where we currently are */
2764            ptr - start_subject,                  /* start offset */            (int)(ptr - start_subject),           /* start offset */
2765            local_offsets,                        /* offset vector */            local_offsets,                        /* offset vector */
2766            sizeof(local_offsets)/sizeof(int),    /* size of same */            sizeof(local_offsets)/sizeof(int),    /* size of same */
2767            local_workspace,                      /* workspace vector */            local_workspace,                      /* workspace vector */
2768            sizeof(local_workspace)/sizeof(int),  /* size of same */            sizeof(local_workspace)/sizeof(int),  /* size of same */
2769            ims,                                  /* the current ims flags */            rlevel);                              /* function recursion level */
2770            rlevel,                               /* function recursion level */
2771            recursing + 1);                       /* regex recurse level */          md->recursive = new_recursive.prevrec;  /* Done this recursion */
2772
2773          DPRINTF(("%.*sReturn from regex recursion %d: rc=%d\n", rlevel*2-2, SP,          DPRINTF(("%.*sReturn from regex recursion: rc=%d\n", rlevel*2-2, SP,
2774            recursing + 1, rc));            rc));
2775
2776          /* Ran out of internal offsets */          /* Ran out of internal offsets */
2777
# Line 2578  for (;;) Line 2785  for (;;)
2785            {            {
2786            for (rc = rc*2 - 2; rc >= 0; rc -= 2)            for (rc = rc*2 - 2; rc >= 0; rc -= 2)
2787              {              {
const uschar *p = start_subject + local_offsets[rc];
const uschar *pp = start_subject + local_offsets[rc+1];
2788              int charcount = local_offsets[rc+1] - local_offsets[rc];              int charcount = local_offsets[rc+1] - local_offsets[rc];
2789              while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;  #ifdef SUPPORT_UTF
2790                const pcre_uchar *p = start_subject + local_offsets[rc];
2791                const pcre_uchar *pp = start_subject + local_offsets[rc+1];
2792                while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2793    #endif
2794              if (charcount > 0)              if (charcount > 0)
2795                {                {
# Line 2597  for (;;) Line 2806  for (;;)
2806        break;        break;
2807
2808        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2809          case OP_BRAPOS:
2810          case OP_SBRAPOS:
2811          case OP_CBRAPOS:
2812          case OP_SCBRAPOS:
2813          case OP_BRAPOSZERO:
2814            {
2815            int charcount, matched_count;
2816            const pcre_uchar *local_ptr = ptr;
2817            BOOL allow_zero;
2818
2819            if (codevalue == OP_BRAPOSZERO)
2820              {
2821              allow_zero = TRUE;
2822              codevalue = *(++code);  /* Codevalue will be one of above BRAs */
2823              }
2824            else allow_zero = FALSE;
2825
2826            /* Loop to match the subpattern as many times as possible as if it were
2827            a complete pattern. */
2828
2829            for (matched_count = 0;; matched_count++)
2830              {
2831              int local_offsets[2];
2832              int local_workspace[1000];
2833
2834              int rc = internal_dfa_exec(
2835                md,                                   /* fixed match data */
2836                code,                                 /* this subexpression's code */
2837                local_ptr,                            /* where we currently are */
2838                (int)(ptr - start_subject),           /* start offset */
2839                local_offsets,                        /* offset vector */
2840                sizeof(local_offsets)/sizeof(int),    /* size of same */
2841                local_workspace,                      /* workspace vector */
2842                sizeof(local_workspace)/sizeof(int),  /* size of same */
2843                rlevel);                              /* function recursion level */
2844
2845              /* Failed to match */
2846
2847              if (rc < 0)
2848                {
2849                if (rc != PCRE_ERROR_NOMATCH) return rc;
2850                break;
2851                }
2852
2853              /* Matched: break the loop if zero characters matched. */
2854
2855              charcount = local_offsets[1] - local_offsets[0];
2856              if (charcount == 0) break;
2857              local_ptr += charcount;    /* Advance temporary position ptr */
2858              }
2859
2860            /* At this point we have matched the subpattern matched_count
2861            times, and local_ptr is pointing to the character after the end of the
2862            last match. */
2863
2864            if (matched_count > 0 || allow_zero)
2865              {
2866              const pcre_uchar *end_subpattern = code;
2867              int next_state_offset;
2868
2869              do { end_subpattern += GET(end_subpattern, 1); }
2870                while (*end_subpattern == OP_ALT);
2871              next_state_offset =
2872                (int)(end_subpattern - start_code + LINK_SIZE + 1);
2873
2874              /* Optimization: if there are no more active states, and there
2875              are no new states yet set up, then skip over the subject string
2876              right here, to save looping. Otherwise, set up the new state to swing
2877              into action when the end of the matched substring is reached. */
2878
2879              if (i + 1 >= active_count && new_count == 0)
2880                {
2881                ptr = local_ptr;
2882                clen = 0;
2884                }
2885              else
2886                {
2887                const pcre_uchar *p = ptr;
2888                const pcre_uchar *pp = local_ptr;
2889                charcount = (int)(pp - p);
2890    #ifdef SUPPORT_UTF
2891                while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2892    #endif
2893                ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
2894                }
2895              }
2896            }
2897          break;
2898
2899          /*-----------------------------------------------------------------*/
2900        case OP_ONCE:        case OP_ONCE:
2901          case OP_ONCE_NC:
2902          {          {
2903          int local_offsets[2];          int local_offsets[2];
2904          int local_workspace[1000];          int local_workspace[1000];
# Line 2606  for (;;) Line 2907  for (;;)
2907            md,                                   /* fixed match data */            md,                                   /* fixed match data */
2908            code,                                 /* this subexpression's code */            code,                                 /* this subexpression's code */
2909            ptr,                                  /* where we currently are */            ptr,                                  /* where we currently are */
2910            ptr - start_subject,                  /* start offset */            (int)(ptr - start_subject),           /* start offset */
2911            local_offsets,                        /* offset vector */            local_offsets,                        /* offset vector */
2912            sizeof(local_offsets)/sizeof(int),    /* size of same */            sizeof(local_offsets)/sizeof(int),    /* size of same */
2913            local_workspace,                      /* workspace vector */            local_workspace,                      /* workspace vector */
2914            sizeof(local_workspace)/sizeof(int),  /* size of same */            sizeof(local_workspace)/sizeof(int),  /* size of same */
2915            ims,                                  /* the current ims flags */            rlevel);                              /* function recursion level */
rlevel,                               /* function recursion level */
recursing);                           /* pass on regex recursion */
2916
2917          if (rc >= 0)          if (rc >= 0)
2918            {            {
2919            const uschar *end_subpattern = code;            const pcre_uchar *end_subpattern = code;
2920            int charcount = local_offsets[1] - local_offsets[0];            int charcount = local_offsets[1] - local_offsets[0];
2921            int next_state_offset, repeat_state_offset;            int next_state_offset, repeat_state_offset;
2922
2923            do { end_subpattern += GET(end_subpattern, 1); }            do { end_subpattern += GET(end_subpattern, 1); }
2924              while (*end_subpattern == OP_ALT);              while (*end_subpattern == OP_ALT);
2925            next_state_offset = end_subpattern - start_code + LINK_SIZE + 1;            next_state_offset =
2926                (int)(end_subpattern - start_code + LINK_SIZE + 1);
2927
2928            /* If the end of this subpattern is KETRMAX or KETRMIN, we must            /* If the end of this subpattern is KETRMAX or KETRMIN, we must
2929            arrange for the repeat state also to be added to the relevant list.            arrange for the repeat state also to be added to the relevant list.
# Line 2631  for (;;) Line 2931  for (;;)
2931
2932            repeat_state_offset = (*end_subpattern == OP_KETRMAX ||            repeat_state_offset = (*end_subpattern == OP_KETRMAX ||
2933                                   *end_subpattern == OP_KETRMIN)?                                   *end_subpattern == OP_KETRMIN)?
2934              end_subpattern - start_code - GET(end_subpattern, 1) : -1;              (int)(end_subpattern - start_code - GET(end_subpattern, 1)) : -1;
2935
2936            /* If we have matched an empty string, add the next state at the            /* If we have matched an empty string, add the next state at the
2937            current character pointer. This is important so that the duplicate            current character pointer. This is important so that the duplicate
# Line 2646  for (;;) Line 2946  for (;;)
2946            /* Optimization: if there are no more active states, and there            /* Optimization: if there are no more active states, and there
2947            are no new states yet set up, then skip over the subject string            are no new states yet set up, then skip over the subject string
2948            right here, to save looping. Otherwise, set up the new state to swing            right here, to save looping. Otherwise, set up the new state to swing
2949            into action when the end of the substring is reached. */            into action when the end of the matched substring is reached. */
2950
2951            else if (i + 1 >= active_count && new_count == 0)            else if (i + 1 >= active_count && new_count == 0)
2952              {              {
# Line 2669  for (;;) Line 2969  for (;;)
2969              }              }
2970            else            else
2971              {              {
2972              const uschar *p = start_subject + local_offsets[0];  #ifdef SUPPORT_UTF
2973              const uschar *pp = start_subject + local_offsets[1];              const pcre_uchar *p = start_subject + local_offsets[0];
2974              while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;              const pcre_uchar *pp = start_subject + local_offsets[1];
2975                while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2976    #endif
2978              if (repeat_state_offset >= 0)              if (repeat_state_offset >= 0)
2979                { ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); }                { ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); }
2980              }              }

2981            }            }
2982          else if (rc != PCRE_ERROR_NOMATCH) return rc;          else if (rc != PCRE_ERROR_NOMATCH) return rc;
2983          }          }
# Line 2688  for (;;) Line 2989  for (;;)
2989
2990        case OP_CALLOUT:        case OP_CALLOUT:
2991        rrc = 0;        rrc = 0;
2992        if (pcre_callout != NULL)        if (PUBL(callout) != NULL)
2993          {          {
2994          pcre_callout_block cb;          PUBL(callout_block) cb;
2995          cb.version          = 1;   /* Version 1 of the callout block */          cb.version          = 1;   /* Version 1 of the callout block */
2996          cb.callout_number   = code[1];          cb.callout_number   = code[1];
2997          cb.offset_vector    = offsets;          cb.offset_vector    = offsets;
2998    #ifdef COMPILE_PCRE8
2999          cb.subject          = (PCRE_SPTR)start_subject;          cb.subject          = (PCRE_SPTR)start_subject;
3000          cb.subject_length   = end_subject - start_subject;  #else
3001          cb.start_match      = current_subject - start_subject;          cb.subject          = (PCRE_SPTR16)start_subject;
3002          cb.current_position = ptr - start_subject;  #endif
3003            cb.subject_length   = (int)(end_subject - start_subject);
3004            cb.start_match      = (int)(current_subject - start_subject);
3005            cb.current_position = (int)(ptr - start_subject);
3006          cb.pattern_position = GET(code, 2);          cb.pattern_position = GET(code, 2);
3007          cb.next_item_length = GET(code, 2 + LINK_SIZE);          cb.next_item_length = GET(code, 2 + LINK_SIZE);
3008          cb.capture_top      = 1;          cb.capture_top      = 1;
3009          cb.capture_last     = -1;          cb.capture_last     = -1;
3010          cb.callout_data     = md->callout_data;          cb.callout_data     = md->callout_data;
3011          if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */          cb.mark             = NULL;   /* No (*MARK) support */
3012            if ((rrc = (*PUBL(callout))(&cb)) < 0) return rrc;   /* Abandon */
3013          }          }
3014        if (rrc == 0)        if (rrc == 0)
3015          { ADD_ACTIVE(state_offset + _pcre_OP_lengths[OP_CALLOUT], 0); }          { ADD_ACTIVE(state_offset + PRIV(OP_lengths)[OP_CALLOUT], 0); }
3016        break;        break;
3017
3018
# Line 2731  for (;;) Line 3037  for (;;)
3037
3038    The "could_continue" variable is true if a state could have continued but    The "could_continue" variable is true if a state could have continued but
3039    for the fact that the end of the subject was reached. */    for the fact that the end of the subject was reached. */
3040
3041    if (new_count <= 0)    if (new_count <= 0)
3042      {      {
3043      if (rlevel == 1 &&                               /* Top level, and */      if (rlevel == 1 &&                               /* Top level, and */
3044          could_continue &&                            /* Some could go on */          could_continue &&                            /* Some could go on, and */
3045          forced_fail != workspace[1] &&               /* Not all forced fail & */          forced_fail != workspace[1] &&               /* Not all forced fail & */
3046          (                                            /* either... */          (                                            /* either... */
3047          (md->moptions & PCRE_PARTIAL_HARD) != 0      /* Hard partial */          (md->moptions & PCRE_PARTIAL_HARD) != 0      /* Hard partial */
# Line 2743  for (;;) Line 3049  for (;;)
3049          ((md->moptions & PCRE_PARTIAL_SOFT) != 0 &&  /* Soft partial and */          ((md->moptions & PCRE_PARTIAL_SOFT) != 0 &&  /* Soft partial and */
3050           match_count < 0)                            /* no matches */           match_count < 0)                            /* no matches */
3051          ) &&                                         /* And... */          ) &&                                         /* And... */
3052          ptr >= end_subject &&                     /* Reached end of subject */          (
3053          ptr > current_subject)                    /* Matched non-empty string */          partial_newline ||                           /* Either partial NL */
3054              (                                          /* or ... */
3055              ptr >= end_subject &&                /* End of subject and */
3056              ptr > md->start_used_ptr)            /* Inspected non-empty string */
3057              )
3058            )
3059        {        {
3060        if (offsetcount >= 2)        if (offsetcount >= 2)
3061          {          {
3062          offsets[0] = md->start_used_ptr - start_subject;          offsets[0] = (int)(md->start_used_ptr - start_subject);
3063          offsets[1] = end_subject - start_subject;          offsets[1] = (int)(end_subject - start_subject);
3064          }          }
3065        match_count = PCRE_ERROR_PARTIAL;        match_count = PCRE_ERROR_PARTIAL;
3066        }        }
# Line 2803  Returns:          > 0 => number of match Line 3114  Returns:          > 0 => number of match
3114                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
3115  */  */
3116
3117    #ifdef COMPILE_PCRE8
3118  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
3119  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
3120    const char *subject, int length, int start_offset, int options, int *offsets,    const char *subject, int length, int start_offset, int options, int *offsets,
3121    int offsetcount, int *workspace, int wscount)    int offsetcount, int *workspace, int wscount)
3122    #else
3123    PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
3124    pcre16_dfa_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
3125      PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
3126      int offsetcount, int *workspace, int wscount)
3127    #endif
3128  {  {
3129  real_pcre *re = (real_pcre *)argument_re;  REAL_PCRE *re = (REAL_PCRE *)argument_re;
3130  dfa_match_data match_block;  dfa_match_data match_block;
3131  dfa_match_data *md = &match_block;  dfa_match_data *md = &match_block;
3132  BOOL utf8, anchored, startline, firstline;  BOOL utf, anchored, startline, firstline;
3133  const uschar *current_subject, *end_subject, *lcc;  const pcre_uchar *current_subject, *end_subject;

pcre_study_data internal_study;
3134  const pcre_study_data *study = NULL;  const pcre_study_data *study = NULL;
real_pcre internal_re;
3135
3136  const uschar *req_byte_ptr;  const pcre_uchar *req_char_ptr;
3137  const uschar *start_bits = NULL;  const pcre_uint8 *start_bits = NULL;
3138  BOOL first_byte_caseless = FALSE;  BOOL has_first_char = FALSE;
3139  BOOL req_byte_caseless = FALSE;  BOOL has_req_char = FALSE;
3140  int first_byte = -1;  pcre_uchar first_char = 0;
3141  int req_byte = -1;  pcre_uchar first_char2 = 0;
3142  int req_byte2 = -1;  pcre_uchar req_char = 0;
3143    pcre_uchar req_char2 = 0;
3144  int newline;  int newline;
3145
3146  /* Plausibility checks */  /* Plausibility checks */
# Line 2834  if (re == NULL || subject == NULL || wor Line 3150  if (re == NULL || subject == NULL || wor
3150     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
3151  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
3152  if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;  if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;
3153    if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
3154
3155  /* We need to find the pointer to any study data before we test for byte  /* We need to find the pointer to any study data before we test for byte
3156  flipping, so we scan the extra_data block first. This may set two fields in the  flipping, so we scan the extra_data block first. This may set two fields in the
# Line 2855  if (extra_data != NULL) Line 3172  if (extra_data != NULL)
3172      md->callout_data = extra_data->callout_data;      md->callout_data = extra_data->callout_data;
3173    if ((flags & PCRE_EXTRA_TABLES) != 0)    if ((flags & PCRE_EXTRA_TABLES) != 0)
3174      md->tables = extra_data->tables;      md->tables = extra_data->tables;
3175      ((pcre_extra *)extra_data)->flags &= ~PCRE_EXTRA_USED_JIT;  /* No JIT support here */
3176    }    }
3177
3178  /* Check that the first field in the block is the magic number. If it is not,  /* Check that the first field in the block is the magic number. If it is not,
3179  test for a regex that was compiled on a host of opposite endianness. If this is  return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
3180  the case, flipped values are put in internal_re and internal_study if there was  REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
3181  study data too. */  means that the pattern is likely compiled with different endianness. */
3182
3183  if (re->magic_number != MAGIC_NUMBER)  if (re->magic_number != MAGIC_NUMBER)
3184    {    return re->magic_number == REVERSED_MAGIC_NUMBER?
3186    if (re == NULL) return PCRE_ERROR_BADMAGIC;  if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
if (study != NULL) study = &internal_study;
}
3187
3188  /* Set some local values */  /* Set some local values */
3189
3190  current_subject = (const unsigned char *)subject + start_offset;  current_subject = (const pcre_uchar *)subject + start_offset;
3191  end_subject = (const unsigned char *)subject + length;  end_subject = (const pcre_uchar *)subject + length;
3192  req_byte_ptr = current_subject - 1;  req_char_ptr = current_subject - 1;
3193
3194  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3195  utf8 = (re->options & PCRE_UTF8) != 0;  /* PCRE_UTF16 has the same value as PCRE_UTF8. */
3196    utf = (re->options & PCRE_UTF8) != 0;
3197  #else  #else
3198  utf8 = FALSE;  utf = FALSE;
3199  #endif  #endif
3200
3201  anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||  anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||
# Line 2886  anchored = (options & (PCRE_ANCHORED|PCR Line 3203  anchored = (options & (PCRE_ANCHORED|PCR
3203
3204  /* The remaining fixed data for passing around. */  /* The remaining fixed data for passing around. */
3205
3206  md->start_code = (const uschar *)argument_re +  md->start_code = (const pcre_uchar *)argument_re +
3207      re->name_table_offset + re->name_count * re->name_entry_size;      re->name_table_offset + re->name_count * re->name_entry_size;
3208  md->start_subject = (const unsigned char *)subject;  md->start_subject = (const pcre_uchar *)subject;
3209  md->end_subject = end_subject;  md->end_subject = end_subject;
3210  md->start_offset = start_offset;  md->start_offset = start_offset;
3211  md->moptions = options;  md->moptions = options;
# Line 2949  else Line 3266  else
3266  /* Check a UTF-8 string if required. Unfortunately there's no way of passing  /* Check a UTF-8 string if required. Unfortunately there's no way of passing
3267  back the character offset. */  back the character offset. */
3268
3269  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3270  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
3271    {    {
3272    if (_pcre_valid_utf8((uschar *)subject, length) >= 0)    int erroroffset;
3273      return PCRE_ERROR_BADUTF8;    int errorcode = PRIV(valid_utf)((pcre_uchar *)subject, length, &erroroffset);
3274    if (start_offset > 0 && start_offset < length)    if (errorcode != 0)
3275      {      {
3276      int tb = ((uschar *)subject)[start_offset];      if (offsetcount >= 2)
if (tb > 127)
3277        {        {
3278        tb &= 0xc0;        offsets[0] = erroroffset;
3279        if (tb != 0 && tb != 0xc0) return PCRE_ERROR_BADUTF8_OFFSET;        offsets[1] = errorcode;
3280        }        }
3281        return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != 0)?
3283      }      }
3284      if (start_offset > 0 && start_offset < length &&
3285            NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
3287    }    }
3288  #endif  #endif
3289
# Line 2970  if (utf8 && (options & PCRE_NO_UTF8_CHEC Line 3291  if (utf8 && (options & PCRE_NO_UTF8_CHEC
3291  is a feature that makes it possible to save compiled regex and re-use them  is a feature that makes it possible to save compiled regex and re-use them
3292  in other programs later. */  in other programs later. */
3293
3294  if (md->tables == NULL) md->tables = _pcre_default_tables;  if (md->tables == NULL) md->tables = PRIV(default_tables);
3295
3296  /* The lower casing table and the "must be at the start of a line" flag are  /* The "must be at the start of a line" flags are used in a loop when finding
3297  used in a loop when finding where to start. */  where to start. */
3298
lcc = md->tables + lcc_offset;
3299  startline = (re->flags & PCRE_STARTLINE) != 0;  startline = (re->flags & PCRE_STARTLINE) != 0;
3300  firstline = (re->options & PCRE_FIRSTLINE) != 0;  firstline = (re->options & PCRE_FIRSTLINE) != 0;
3301
# Line 2989  if (!anchored) Line 3309  if (!anchored)
3309    {    {
3310    if ((re->flags & PCRE_FIRSTSET) != 0)    if ((re->flags & PCRE_FIRSTSET) != 0)
3311      {      {
3312      first_byte = re->first_byte & 255;      has_first_char = TRUE;
3313      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)      first_char = first_char2 = (pcre_uchar)(re->first_char);
3314        first_byte = lcc[first_byte];      if ((re->flags & PCRE_FCH_CASELESS) != 0)
3315          {
3316          first_char2 = TABLE_GET(first_char, md->tables + fcc_offset, first_char);
3317    #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3318          if (utf && first_char > 127)
3319            first_char2 = UCD_OTHERCASE(first_char);
3320    #endif
3321          }
3322      }      }
3323    else    else
3324      {      {
# Line 3006  character" set. */ Line 3333  character" set. */
3333
3334  if ((re->flags & PCRE_REQCHSET) != 0)  if ((re->flags & PCRE_REQCHSET) != 0)
3335    {    {
3336    req_byte = re->req_byte & 255;    has_req_char = TRUE;
3337    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;    req_char = req_char2 = (pcre_uchar)(re->req_char);
3338    req_byte2 = (md->tables + fcc_offset)[req_byte];  /* case flipped */    if ((re->flags & PCRE_RCH_CASELESS) != 0)
3339        {
3340        req_char2 = TABLE_GET(req_char, md->tables + fcc_offset, req_char);
3341    #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3342        if (utf && req_char > 127)
3343          req_char2 = UCD_OTHERCASE(req_char);
3344    #endif
3345        }
3346    }    }
3347
3348  /* Call the main matching function, looping for a non-anchored regex after a  /* Call the main matching function, looping for a non-anchored regex after a
# Line 3021  for (;;) Line 3355  for (;;)
3355
3356    if ((options & PCRE_DFA_RESTART) == 0)    if ((options & PCRE_DFA_RESTART) == 0)
3357      {      {
3358      const uschar *save_end_subject = end_subject;      const pcre_uchar *save_end_subject = end_subject;
3359
3360      /* If firstline is TRUE, the start of the match is constrained to the first      /* If firstline is TRUE, the start of the match is constrained to the first
3361      line of a multiline string. Implement this by temporarily adjusting      line of a multiline string. Implement this by temporarily adjusting
# Line 3030  for (;;) Line 3364  for (;;)
3364
3365      if (firstline)      if (firstline)
3366        {        {
3367        USPTR t = current_subject;        PCRE_PUCHAR t = current_subject;
3368  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3369        if (utf8)        if (utf)
3370          {          {
3371          while (t < md->end_subject && !IS_NEWLINE(t))          while (t < md->end_subject && !IS_NEWLINE(t))
3372            {            {
3373            t++;            t++;
3374            while (t < end_subject && (*t & 0xc0) == 0x80) t++;            ACROSSCHAR(t < end_subject, *t, t++);
3375            }            }
3376          }          }
3377        else        else
# Line 3048  for (;;) Line 3382  for (;;)
3382
3383      /* There are some optimizations that avoid running the match if a known      /* There are some optimizations that avoid running the match if a known
3384      starting point is not found. However, there is an option that disables      starting point is not found. However, there is an option that disables
3385      these, for testing and for ensuring that all callouts do actually occur. */      these, for testing and for ensuring that all callouts do actually occur.
3386        The option can be set in the regex by (*NO_START_OPT) or passed in
3387        match-time options. */
3388
3389      if ((options & PCRE_NO_START_OPTIMIZE) == 0)      if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
3390        {        {
3391        /* Advance to a known first byte. */        /* Advance to a known first char. */
3392
3393        if (first_byte >= 0)        if (has_first_char)
3394          {          {
3395          if (first_byte_caseless)          if (first_char != first_char2)
3396            while (current_subject < end_subject &&            while (current_subject < end_subject &&
3397                   lcc[*current_subject] != first_byte)                *current_subject != first_char && *current_subject != first_char2)
3398              current_subject++;              current_subject++;
3399          else          else
3400            while (current_subject < end_subject &&            while (current_subject < end_subject &&
3401                   *current_subject != first_byte)                   *current_subject != first_char)
3402              current_subject++;              current_subject++;
3403          }          }
3404
# Line 3072  for (;;) Line 3408  for (;;)
3408          {          {
3409          if (current_subject > md->start_subject + start_offset)          if (current_subject > md->start_subject + start_offset)
3410            {            {
3411  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3412            if (utf8)            if (utf)
3413              {              {
3414              while (current_subject < end_subject &&              while (current_subject < end_subject &&
3415                     !WAS_NEWLINE(current_subject))                     !WAS_NEWLINE(current_subject))
3416                {                {
3417                current_subject++;                current_subject++;
3418                while(current_subject < end_subject &&                ACROSSCHAR(current_subject < end_subject, *current_subject,
3419                      (*current_subject & 0xc0) == 0x80)                  current_subject++);
current_subject++;
3420                }                }
3421              }              }
3422            else            else
# Line 3108  for (;;) Line 3443  for (;;)
3443          while (current_subject < end_subject)          while (current_subject < end_subject)
3444            {            {
3445            register unsigned int c = *current_subject;            register unsigned int c = *current_subject;
3446            if ((start_bits[c/8] & (1 << (c&7))) == 0) current_subject++;  #ifndef COMPILE_PCRE8
3447              else break;            if (c > 255) c = 255;
3448    #endif
3449              if ((start_bits[c/8] & (1 << (c&7))) == 0)
3450                {
3451                current_subject++;
3452    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3453                /* In non 8-bit mode, the iteration will stop for
3454                characters > 255 at the beginning or not stop at all. */
3455                if (utf)
3456                  ACROSSCHAR(current_subject < end_subject, *current_subject,
3457                    current_subject++);
3458    #endif
3459                }
3460              else break;
3461            }            }
3462          }          }
3463        }        }
# Line 3122  for (;;) Line 3470  for (;;)
3470      disabling is explicitly requested (and of course, by the test above, this      disabling is explicitly requested (and of course, by the test above, this
3471      code is not obeyed when restarting after a partial match). */      code is not obeyed when restarting after a partial match). */
3472
3473      if ((options & PCRE_NO_START_OPTIMIZE) == 0 &&      if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0 &&
3474          (options & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) == 0)          (options & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) == 0)
3475        {        {
3476        /* If the pattern was studied, a minimum subject length may be set. This        /* If the pattern was studied, a minimum subject length may be set. This
# Line 3134  for (;;) Line 3482  for (;;)
3482            (pcre_uint32)(end_subject - current_subject) < study->minlength)            (pcre_uint32)(end_subject - current_subject) < study->minlength)
3483          return PCRE_ERROR_NOMATCH;          return PCRE_ERROR_NOMATCH;
3484
3485        /* If req_byte is set, we know that that character must appear in the        /* If req_char is set, we know that that character must appear in the
3486        subject for the match to succeed. If the first character is set, req_byte        subject for the match to succeed. If the first character is set, req_char
3487        must be later in the subject; otherwise the test starts at the match        must be later in the subject; otherwise the test starts at the match
3488        point. This optimization can save a huge amount of work in patterns with        point. This optimization can save a huge amount of work in patterns with
3489        nested unlimited repeats that aren't going to match. Writing separate        nested unlimited repeats that aren't going to match. Writing separate
# Line 3147  for (;;) Line 3495  for (;;)
3495        patterns. This showed up when somebody was matching /^C/ on a 32-megabyte        patterns. This showed up when somebody was matching /^C/ on a 32-megabyte
3496        string... so we don't do this when the string is sufficiently long. */        string... so we don't do this when the string is sufficiently long. */
3497
3498        if (req_byte >= 0 && end_subject - current_subject < REQ_BYTE_MAX)        if (has_req_char && end_subject - current_subject < REQ_BYTE_MAX)
3499          {          {
3500          register const uschar *p = current_subject + ((first_byte >= 0)? 1 : 0);          register PCRE_PUCHAR p = current_subject + (has_first_char? 1:0);
3501
3502          /* We don't need to repeat the search if we haven't yet reached the          /* We don't need to repeat the search if we haven't yet reached the
3503          place we found it at last time. */          place we found it at last time. */
3504
3505          if (p > req_byte_ptr)          if (p > req_char_ptr)
3506            {            {
3507            if (req_byte_caseless)            if (req_char != req_char2)
3508              {              {
3509              while (p < end_subject)              while (p < end_subject)
3510                {                {
3511                register int pp = *p++;                register int pp = *p++;
3512                if (pp == req_byte || pp == req_byte2) { p--; break; }                if (pp == req_char || pp == req_char2) { p--; break; }
3513                }                }
3514              }              }
3515            else            else
3516              {              {
3517              while (p < end_subject)              while (p < end_subject)
3518                {                {
3519                if (*p++ == req_byte) { p--; break; }                if (*p++ == req_char) { p--; break; }
3520                }                }
3521              }              }
3522
# Line 3181  for (;;) Line 3529  for (;;)
3529            found it, so that we don't search again next time round the loop if            found it, so that we don't search again next time round the loop if
3530            the start hasn't passed this character yet. */            the start hasn't passed this character yet. */
3531
3532            req_byte_ptr = p;            req_char_ptr = p;
3533            }            }
3534          }          }
3535        }        }
# Line 3190  for (;;) Line 3538  for (;;)
3538    /* OK, now we can do the business */    /* OK, now we can do the business */
3539
3540    md->start_used_ptr = current_subject;    md->start_used_ptr = current_subject;
3541      md->recursive = NULL;
3542
3543    rc = internal_dfa_exec(    rc = internal_dfa_exec(
3544      md,                                /* fixed match data */      md,                                /* fixed match data */
# Line 3200  for (;;) Line 3549  for (;;)
3549      offsetcount,                       /* size of same */      offsetcount,                       /* size of same */
3550      workspace,                         /* workspace vector */      workspace,                         /* workspace vector */
3551      wscount,                           /* size of same */      wscount,                           /* size of same */
3552      re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL), /* ims flags */      0);                                /* function recurse level */
0,                                 /* function recurse level */
0);                                /* regex recurse level */
3553
3554    /* Anything other than "no match" means we are done, always; otherwise, carry    /* Anything other than "no match" means we are done, always; otherwise, carry
3555    on only if not anchored. */    on only if not anchored. */
# Line 3214  for (;;) Line 3561  for (;;)
3561
3562    if (firstline && IS_NEWLINE(current_subject)) break;    if (firstline && IS_NEWLINE(current_subject)) break;
3563    current_subject++;    current_subject++;
3564    if (utf8)  #ifdef SUPPORT_UTF
3565      if (utf)
3566      {      {
3567      while (current_subject < end_subject && (*current_subject & 0xc0) == 0x80)      ACROSSCHAR(current_subject < end_subject, *current_subject,
3568        current_subject++;        current_subject++);
3569      }      }
3570    #endif
3571    if (current_subject > end_subject) break;    if (current_subject > end_subject) break;
3572
3573    /* If we have just passed a CR and we are now at a LF, and the pattern does    /* If we have just passed a CR and we are now at a LF, and the pattern does

Legend:
 Removed from v.518 changed lines Added in v.922