# Diff of /code/trunk/pcre_dfa_exec.c

revision 553 by ph10, Fri Oct 22 15:57:50 2010 UTC revision 850 by zherczeg, Wed Jan 4 17:29:11 2012 UTC
# Line 7  and semantics are as close as possible t Line 7  and semantics are as close as possible t
7  below for why this module is different).  below for why this module is different).
8
9                         Written by Philip Hazel                         Written by Philip Hazel
10             Copyright (c) 1997-2010 University of Cambridge             Copyright (c) 1997-2012 University of Cambridge
11
12  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
13  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 113  small value. Non-zero values in the tabl Line 113  small value. Non-zero values in the tabl
113  the character is to be found. ***NOTE*** If the start of this table is  the character is to be found. ***NOTE*** If the start of this table is
114  modified, the three tables that follow must also be modified. */  modified, the three tables that follow must also be modified. */
115
116  static const uschar coptable[] = {  static const pcre_uint8 coptable[] = {
117    0,                             /* End                                    */    0,                             /* End                                    */
118    0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */    0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */
119    0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */    0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */
# Line 121  static const uschar coptable[] = { Line 121  static const uschar coptable[] = {
121    0, 0,                          /* \P, \p                                 */    0, 0,                          /* \P, \p                                 */
122    0, 0, 0, 0, 0,                 /* \R, \H, \h, \V, \v                     */    0, 0, 0, 0, 0,                 /* \R, \H, \h, \V, \v                     */
123    0,                             /* \X                                     */    0,                             /* \X                                     */
124    0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, \$                      */    0, 0, 0, 0, 0, 0,              /* \Z, \z, ^, ^M, \$, \$M                   */
125    1,                             /* Char                                   */    1,                             /* Char                                   */
126    1,                             /* Charnc                                 */    1,                             /* Chari                                  */
127    1,                             /* not                                    */    1,                             /* not                                    */
128      1,                             /* noti                                   */
129    /* Positive single-char repeats                                          */    /* Positive single-char repeats                                          */
130    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
131    3, 3, 3,                       /* upto, minupto, exact                   */    1+IMM2_SIZE, 1+IMM2_SIZE,      /* upto, minupto                          */
132    1, 1, 1, 3,                    /* *+, ++, ?+, upto+                      */    1+IMM2_SIZE,                   /* exact                                  */
133      1, 1, 1, 1+IMM2_SIZE,          /* *+, ++, ?+, upto+                      */
134      1, 1, 1, 1, 1, 1,              /* *I, *?I, +I, +?I, ?I, ??I              */
135      1+IMM2_SIZE, 1+IMM2_SIZE,      /* upto I, minupto I                      */
136      1+IMM2_SIZE,                   /* exact I                                */
137      1, 1, 1, 1+IMM2_SIZE,          /* *+I, ++I, ?+I, upto+I                  */
138    /* Negative single-char repeats - only for chars < 256                   */    /* Negative single-char repeats - only for chars < 256                   */
139    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */
140    3, 3, 3,                       /* NOT upto, minupto, exact               */    1+IMM2_SIZE, 1+IMM2_SIZE,      /* NOT upto, minupto                      */
141    1, 1, 1, 3,                    /* NOT *+, ++, ?+, updo+                  */    1+IMM2_SIZE,                   /* NOT exact                              */
142      1, 1, 1, 1+IMM2_SIZE,          /* NOT *+, ++, ?+, upto+                  */
143      1, 1, 1, 1, 1, 1,              /* NOT *I, *?I, +I, +?I, ?I, ??I          */
144      1+IMM2_SIZE, 1+IMM2_SIZE,      /* NOT upto I, minupto I                  */
145      1+IMM2_SIZE,                   /* NOT exact I                            */
146      1, 1, 1, 1+IMM2_SIZE,          /* NOT *+I, ++I, ?+I, upto+I              */
147    /* Positive type repeats                                                 */    /* Positive type repeats                                                 */
148    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */
149    3, 3, 3,                       /* Type upto, minupto, exact              */    1+IMM2_SIZE, 1+IMM2_SIZE,      /* Type upto, minupto                     */
150    1, 1, 1, 3,                    /* Type *+, ++, ?+, upto+                 */    1+IMM2_SIZE,                   /* Type exact                             */
151      1, 1, 1, 1+IMM2_SIZE,          /* Type *+, ++, ?+, upto+                 */
152    /* Character class & ref repeats                                         */    /* Character class & ref repeats                                         */
153    0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */    0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */
154    0, 0,                          /* CRRANGE, CRMINRANGE                    */    0, 0,                          /* CRRANGE, CRMINRANGE                    */
# Line 144  static const uschar coptable[] = { Line 156  static const uschar coptable[] = {
156    0,                             /* NCLASS                                 */    0,                             /* NCLASS                                 */
157    0,                             /* XCLASS - variable length               */    0,                             /* XCLASS - variable length               */
158    0,                             /* REF                                    */    0,                             /* REF                                    */
159      0,                             /* REFI                                   */
160    0,                             /* RECURSE                                */    0,                             /* RECURSE                                */
161    0,                             /* CALLOUT                                */    0,                             /* CALLOUT                                */
162    0,                             /* Alt                                    */    0,                             /* Alt                                    */
163    0,                             /* Ket                                    */    0,                             /* Ket                                    */
164    0,                             /* KetRmax                                */    0,                             /* KetRmax                                */
165    0,                             /* KetRmin                                */    0,                             /* KetRmin                                */
166      0,                             /* KetRpos                                */
167      0,                             /* Reverse                                */
168    0,                             /* Assert                                 */    0,                             /* Assert                                 */
169    0,                             /* Assert not                             */    0,                             /* Assert not                             */
170    0,                             /* Assert behind                          */    0,                             /* Assert behind                          */
171    0,                             /* Assert behind not                      */    0,                             /* Assert behind not                      */
172    0,                             /* Reverse                                */    0, 0,                          /* ONCE, ONCE_NC                          */
173    0, 0, 0, 0,                    /* ONCE, BRA, CBRA, COND                  */    0, 0, 0, 0, 0,                 /* BRA, BRAPOS, CBRA, CBRAPOS, COND       */
174    0, 0, 0,                       /* SBRA, SCBRA, SCOND                     */    0, 0, 0, 0, 0,                 /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND  */
175    0, 0,                          /* CREF, NCREF                            */    0, 0,                          /* CREF, NCREF                            */
176    0, 0,                          /* RREF, NRREF                            */    0, 0,                          /* RREF, NRREF                            */
177    0,                             /* DEF                                    */    0,                             /* DEF                                    */
178    0, 0,                          /* BRAZERO, BRAMINZERO                    */    0, 0, 0,                       /* BRAZERO, BRAMINZERO, BRAPOSZERO        */
179    0, 0, 0,                       /* MARK, PRUNE, PRUNE_ARG,                */    0, 0, 0,                       /* MARK, PRUNE, PRUNE_ARG                 */
180    0, 0, 0, 0,                    /* SKIP, SKIP_ARG, THEN, THEN_ARG,        */    0, 0, 0, 0,                    /* SKIP, SKIP_ARG, THEN, THEN_ARG         */
181    0, 0, 0, 0, 0                  /* COMMIT, FAIL, ACCEPT, CLOSE, SKIPZERO  */    0, 0, 0, 0,                    /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT    */
182      0, 0                           /* CLOSE, SKIPZERO  */
183  };  };
184
185  /* This table identifies those opcodes that inspect a character. It is used to  /* This table identifies those opcodes that inspect a character. It is used to
# Line 171  remember the fact that a character could Line 187  remember the fact that a character could
187  the subject is reached. ***NOTE*** If the start of this table is modified, the  the subject is reached. ***NOTE*** If the start of this table is modified, the
188  two tables that follow must also be modified. */  two tables that follow must also be modified. */
189
190  static const uschar poptable[] = {  static const pcre_uint8 poptable[] = {
191    0,                             /* End                                    */    0,                             /* End                                    */
192    0, 0, 0, 1, 1,                 /* \A, \G, \K, \B, \b                     */    0, 0, 0, 1, 1,                 /* \A, \G, \K, \B, \b                     */
193    1, 1, 1, 1, 1, 1,              /* \D, \d, \S, \s, \W, \w                 */    1, 1, 1, 1, 1, 1,              /* \D, \d, \S, \s, \W, \w                 */
# Line 179  static const uschar poptable[] = { Line 195  static const uschar poptable[] = {
195    1, 1,                          /* \P, \p                                 */    1, 1,                          /* \P, \p                                 */
196    1, 1, 1, 1, 1,                 /* \R, \H, \h, \V, \v                     */    1, 1, 1, 1, 1,                 /* \R, \H, \h, \V, \v                     */
197    1,                             /* \X                                     */    1,                             /* \X                                     */
198    0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, \$                      */    0, 0, 0, 0, 0, 0,              /* \Z, \z, ^, ^M, \$, \$M                   */
199    1,                             /* Char                                   */    1,                             /* Char                                   */
200    1,                             /* Charnc                                 */    1,                             /* Chari                                  */
201    1,                             /* not                                    */    1,                             /* not                                    */
202      1,                             /* noti                                   */
203    /* Positive single-char repeats                                          */    /* Positive single-char repeats                                          */
204    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
205    1, 1, 1,                       /* upto, minupto, exact                   */    1, 1, 1,                       /* upto, minupto, exact                   */
206    1, 1, 1, 1,                    /* *+, ++, ?+, upto+                      */    1, 1, 1, 1,                    /* *+, ++, ?+, upto+                      */
207      1, 1, 1, 1, 1, 1,              /* *I, *?I, +I, +?I, ?I, ??I              */
208      1, 1, 1,                       /* upto I, minupto I, exact I             */
209      1, 1, 1, 1,                    /* *+I, ++I, ?+I, upto+I                  */
210    /* Negative single-char repeats - only for chars < 256                   */    /* Negative single-char repeats - only for chars < 256                   */
211    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */
212    1, 1, 1,                       /* NOT upto, minupto, exact               */    1, 1, 1,                       /* NOT upto, minupto, exact               */
213    1, 1, 1, 1,                    /* NOT *+, ++, ?+, upto+                  */    1, 1, 1, 1,                    /* NOT *+, ++, ?+, upto+                  */
214      1, 1, 1, 1, 1, 1,              /* NOT *I, *?I, +I, +?I, ?I, ??I          */
215      1, 1, 1,                       /* NOT upto I, minupto I, exact I         */
216      1, 1, 1, 1,                    /* NOT *+I, ++I, ?+I, upto+I              */
217    /* Positive type repeats                                                 */    /* Positive type repeats                                                 */
218    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */
219    1, 1, 1,                       /* Type upto, minupto, exact              */    1, 1, 1,                       /* Type upto, minupto, exact              */
# Line 202  static const uschar poptable[] = { Line 225  static const uschar poptable[] = {
225    1,                             /* NCLASS                                 */    1,                             /* NCLASS                                 */
226    1,                             /* XCLASS - variable length               */    1,                             /* XCLASS - variable length               */
227    0,                             /* REF                                    */    0,                             /* REF                                    */
228      0,                             /* REFI                                   */
229    0,                             /* RECURSE                                */    0,                             /* RECURSE                                */
230    0,                             /* CALLOUT                                */    0,                             /* CALLOUT                                */
231    0,                             /* Alt                                    */    0,                             /* Alt                                    */
232    0,                             /* Ket                                    */    0,                             /* Ket                                    */
233    0,                             /* KetRmax                                */    0,                             /* KetRmax                                */
234    0,                             /* KetRmin                                */    0,                             /* KetRmin                                */
235      0,                             /* KetRpos                                */
236      0,                             /* Reverse                                */
237    0,                             /* Assert                                 */    0,                             /* Assert                                 */
238    0,                             /* Assert not                             */    0,                             /* Assert not                             */
239    0,                             /* Assert behind                          */    0,                             /* Assert behind                          */
240    0,                             /* Assert behind not                      */    0,                             /* Assert behind not                      */
241    0,                             /* Reverse                                */    0, 0,                          /* ONCE, ONCE_NC                          */
242    0, 0, 0, 0,                    /* ONCE, BRA, CBRA, COND                  */    0, 0, 0, 0, 0,                 /* BRA, BRAPOS, CBRA, CBRAPOS, COND       */
243    0, 0, 0,                       /* SBRA, SCBRA, SCOND                     */    0, 0, 0, 0, 0,                 /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND  */
244    0, 0,                          /* CREF, NCREF                            */    0, 0,                          /* CREF, NCREF                            */
245    0, 0,                          /* RREF, NRREF                            */    0, 0,                          /* RREF, NRREF                            */
246    0,                             /* DEF                                    */    0,                             /* DEF                                    */
247    0, 0,                          /* BRAZERO, BRAMINZERO                    */    0, 0, 0,                       /* BRAZERO, BRAMINZERO, BRAPOSZERO        */
248    0, 0, 0,                       /* MARK, PRUNE, PRUNE_ARG,                */    0, 0, 0,                       /* MARK, PRUNE, PRUNE_ARG                 */
249    0, 0, 0, 0,                    /* SKIP, SKIP_ARG, THEN, THEN_ARG,        */    0, 0, 0, 0,                    /* SKIP, SKIP_ARG, THEN, THEN_ARG         */
250    0, 0, 0, 0, 0                  /* COMMIT, FAIL, ACCEPT, CLOSE, SKIPZERO  */    0, 0, 0, 0,                    /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT    */
251      0, 0                           /* CLOSE, SKIPZERO                        */
252  };  };
253
254  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
255  and \w */  and \w */
256
257  static const uschar toptable1[] = {  static const pcre_uint8 toptable1[] = {
258    0, 0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
259    ctype_digit, ctype_digit,    ctype_digit, ctype_digit,
260    ctype_space, ctype_space,    ctype_space, ctype_space,
# Line 235  static const uschar toptable1[] = { Line 262  static const uschar toptable1[] = {
262    0, 0                            /* OP_ANY, OP_ALLANY */    0, 0                            /* OP_ANY, OP_ALLANY */
263  };  };
264
265  static const uschar toptable2[] = {  static const pcre_uint8 toptable2[] = {
266    0, 0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
267    ctype_digit, 0,    ctype_digit, 0,
268    ctype_space, 0,    ctype_space, 0,
# Line 252  these structures in, is a vector of ints Line 279  these structures in, is a vector of ints
279  typedef struct stateblock {  typedef struct stateblock {
280    int offset;                     /* Offset to opcode */    int offset;                     /* Offset to opcode */
281    int count;                      /* Count for repeats */    int count;                      /* Count for repeats */
int ims;                        /* ims flag bits */
282    int data;                       /* Some use extra data */    int data;                       /* Some use extra data */
283  } stateblock;  } stateblock;
284
# Line 275  Returns:       nothing Line 301  Returns:       nothing
301  */  */
302
303  static void  static void
304  pchars(unsigned char *p, int length, FILE *f)  pchars(const pcre_uchar *p, int length, FILE *f)
305  {  {
306  int c;  int c;
307  while (length-- > 0)  while (length-- > 0)
# Line 308  Arguments: Line 334  Arguments:
334    offsetcount       size of same    offsetcount       size of same
335    workspace         vector of workspace    workspace         vector of workspace
336    wscount           size of same    wscount           size of same
ims               the current ims flags
337    rlevel            function call recursion level    rlevel            function call recursion level
recursing         regex recursive call level
338
339  Returns:            > 0 => number of match offset pairs placed in offsets  Returns:            > 0 => number of match offset pairs placed in offsets
340                      = 0 => offsets overflowed; longest matches are present                      = 0 => offsets overflowed; longest matches are present
# Line 325  for the current character, one for the f Line 349  for the current character, one for the f
349      { \      { \
350      next_active_state->offset = (x); \      next_active_state->offset = (x); \
351      next_active_state->count  = (y); \      next_active_state->count  = (y); \
next_active_state->ims    = ims; \
352      next_active_state++; \      next_active_state++; \
353      DPRINTF(("%.*sADD_ACTIVE(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \      DPRINTF(("%.*sADD_ACTIVE(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \
354      } \      } \
# Line 336  for the current character, one for the f Line 359  for the current character, one for the f
359      { \      { \
360      next_active_state->offset = (x); \      next_active_state->offset = (x); \
361      next_active_state->count  = (y); \      next_active_state->count  = (y); \
next_active_state->ims    = ims; \
362      next_active_state->data   = (z); \      next_active_state->data   = (z); \
363      next_active_state++; \      next_active_state++; \
364      DPRINTF(("%.*sADD_ACTIVE_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \      DPRINTF(("%.*sADD_ACTIVE_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \
# Line 348  for the current character, one for the f Line 370  for the current character, one for the f
370      { \      { \
371      next_new_state->offset = (x); \      next_new_state->offset = (x); \
372      next_new_state->count  = (y); \      next_new_state->count  = (y); \
next_new_state->ims    = ims; \
373      next_new_state++; \      next_new_state++; \
374      DPRINTF(("%.*sADD_NEW(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \      DPRINTF(("%.*sADD_NEW(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \
375      } \      } \
# Line 359  for the current character, one for the f Line 380  for the current character, one for the f
380      { \      { \
381      next_new_state->offset = (x); \      next_new_state->offset = (x); \
382      next_new_state->count  = (y); \      next_new_state->count  = (y); \
next_new_state->ims    = ims; \
383      next_new_state->data   = (z); \      next_new_state->data   = (z); \
384      next_new_state++; \      next_new_state++; \
385      DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \      DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \
# Line 371  for the current character, one for the f Line 391  for the current character, one for the f
391  static int  static int
392  internal_dfa_exec(  internal_dfa_exec(
393    dfa_match_data *md,    dfa_match_data *md,
394    const uschar *this_start_code,    const pcre_uchar *this_start_code,
395    const uschar *current_subject,    const pcre_uchar *current_subject,
396    int start_offset,    int start_offset,
397    int *offsets,    int *offsets,
398    int offsetcount,    int offsetcount,
399    int *workspace,    int *workspace,
400    int wscount,    int wscount,
401    int ims,    int  rlevel)
int  rlevel,
int  recursing)
402  {  {
403  stateblock *active_states, *new_states, *temp_states;  stateblock *active_states, *new_states, *temp_states;
404  stateblock *next_active_state, *next_new_state;  stateblock *next_active_state, *next_new_state;
405
406  const uschar *ctypes, *lcc, *fcc;  const pcre_uint8 *ctypes, *lcc, *fcc;
407  const uschar *ptr;  const pcre_uchar *ptr;
408  const uschar *end_code, *first_op;  const pcre_uchar *end_code, *first_op;
409
410    dfa_recursion_info new_recursive;
411
412  int active_count, new_count, match_count;  int active_count, new_count, match_count;
413
414  /* Some fields in the md block are frequently referenced, so we load them into  /* Some fields in the md block are frequently referenced, so we load them into
415  independent variables in the hope that this will perform better. */  independent variables in the hope that this will perform better. */
416
417  const uschar *start_subject = md->start_subject;  const pcre_uchar *start_subject = md->start_subject;
418  const uschar *end_subject = md->end_subject;  const pcre_uchar *end_subject = md->end_subject;
419  const uschar *start_code = md->start_code;  const pcre_uchar *start_code = md->start_code;
420
421  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
422  BOOL utf8 = (md->poptions & PCRE_UTF8) != 0;  BOOL utf = (md->poptions & PCRE_UTF8) != 0;
423  #else  #else
424  BOOL utf8 = FALSE;  BOOL utf = FALSE;
425  #endif  #endif
426
427  rlevel++;  rlevel++;
# Line 412  wscount = (wscount - (wscount % (INTS_PE Line 432  wscount = (wscount - (wscount % (INTS_PE
432            (2 * INTS_PER_STATEBLOCK);            (2 * INTS_PER_STATEBLOCK);
433
434  DPRINTF(("\n%.*s---------------------\n"  DPRINTF(("\n%.*s---------------------\n"
435    "%.*sCall to internal_dfa_exec f=%d r=%d\n",    "%.*sCall to internal_dfa_exec f=%d\n",
436    rlevel*2-2, SP, rlevel*2-2, SP, rlevel, recursing));    rlevel*2-2, SP, rlevel*2-2, SP, rlevel));
437
438  ctypes = md->tables + ctypes_offset;  ctypes = md->tables + ctypes_offset;
439  lcc = md->tables + lcc_offset;  lcc = md->tables + lcc_offset;
# Line 426  next_new_state = new_states = active_sta Line 446  next_new_state = new_states = active_sta
446  new_count = 0;  new_count = 0;
447
448  first_op = this_start_code + 1 + LINK_SIZE +  first_op = this_start_code + 1 + LINK_SIZE +
449    ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA)? 2:0);    ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
450        *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
451        ? IMM2_SIZE:0);
452
453  /* The first thing in any (sub) pattern is a bracket of some sort. Push all  /* The first thing in any (sub) pattern is a bracket of some sort. Push all
454  the alternative states onto the list, and find out where the end is. This  the alternative states onto the list, and find out where the end is. This
# Line 454  if (*first_op == OP_REVERSE) Line 476  if (*first_op == OP_REVERSE)
476    /* If we can't go back the amount required for the longest lookbehind    /* If we can't go back the amount required for the longest lookbehind
477    pattern, go back as far as we can; some alternatives may still be viable. */    pattern, go back as far as we can; some alternatives may still be viable. */
478
479  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
480    /* In character mode we have to step back character by character */    /* In character mode we have to step back character by character */
481
482    if (utf8)    if (utf)
483      {      {
484      for (gone_back = 0; gone_back < max_back; gone_back++)      for (gone_back = 0; gone_back < max_back; gone_back++)
485        {        {
486        if (current_subject <= start_subject) break;        if (current_subject <= start_subject) break;
487        current_subject--;        current_subject--;
488        while (current_subject > start_subject &&        ACROSSCHAR(current_subject > start_subject, *current_subject, current_subject--);
(*current_subject & 0xc0) == 0x80)
current_subject--;
489        }        }
490      }      }
491    else    else
# Line 525  else Line 545  else
545    else    else
546      {      {
547      int length = 1 + LINK_SIZE +      int length = 1 + LINK_SIZE +
548        ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA)? 2:0);        ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
549            *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
550            ? IMM2_SIZE:0);
551      do      do
552        {        {
553        ADD_NEW((int)(end_code - start_code + length), 0);        ADD_NEW((int)(end_code - start_code + length), 0);
# Line 538  else Line 560  else
560
561  workspace[0] = 0;    /* Bit indicating which vector is current */  workspace[0] = 0;    /* Bit indicating which vector is current */
562
563  DPRINTF(("%.*sEnd state = %d\n", rlevel*2-2, SP, end_code - start_code));  DPRINTF(("%.*sEnd state = %d\n", rlevel*2-2, SP, (int)(end_code - start_code)));
564
565  /* Loop for scanning the subject */  /* Loop for scanning the subject */
566
# Line 565  for (;;) Line 587  for (;;)
587
588  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
589    printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP);    printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP);
590    pchars((uschar *)ptr, strlen((char *)ptr), stdout);    pchars(ptr, STRLEN_UC(ptr), stdout);
591    printf("\"\n");    printf("\"\n");
592
593    printf("%.*sActive states: ", rlevel*2-2, SP);    printf("%.*sActive states: ", rlevel*2-2, SP);
# Line 586  for (;;) Line 608  for (;;)
608    if (ptr < end_subject)    if (ptr < end_subject)
609      {      {
610      clen = 1;        /* Number of bytes in the character */      clen = 1;        /* Number of bytes in the character */
611  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
612      if (utf8) { GETCHARLEN(c, ptr, clen); } else      if (utf) { GETCHARLEN(c, ptr, clen); } else
613  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
614      c = *ptr;      c = *ptr;
615      }      }
616    else    else
# Line 605  for (;;) Line 627  for (;;)
627    for (i = 0; i < active_count; i++)    for (i = 0; i < active_count; i++)
628      {      {
629      stateblock *current_state = active_states + i;      stateblock *current_state = active_states + i;
630      const uschar *code;      BOOL caseless = FALSE;
631        const pcre_uchar *code;
632      int state_offset = current_state->offset;      int state_offset = current_state->offset;
633      int count, codevalue, rrc;      int count, codevalue, rrc;
634
# Line 616  for (;;) Line 639  for (;;)
639          else printf("0x%02x\n", c);          else printf("0x%02x\n", c);
640  #endif  #endif
641
/* This variable is referred to implicity in the ADD_xxx macros. */

ims = current_state->ims;

642      /* A negative offset is a special case meaning "hold off going to this      /* A negative offset is a special case meaning "hold off going to this
643      (negated) state until the number of characters in the data field have      (negated) state until the number of characters in the data field have
644      been skipped". */      been skipped". */
# Line 678  for (;;) Line 697  for (;;)
697      if (coptable[codevalue] > 0)      if (coptable[codevalue] > 0)
698        {        {
699        dlen = 1;        dlen = 1;
700  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
701        if (utf8) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else        if (utf) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else
702  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
703        d = code[coptable[codevalue]];        d = code[coptable[codevalue]];
704        if (codevalue >= OP_TYPESTAR)        if (codevalue >= OP_TYPESTAR)
705          {          {
# Line 725  for (;;) Line 744  for (;;)
744
745  /* ========================================================================== */  /* ========================================================================== */
746        /* Reached a closing bracket. If not at the end of the pattern, carry        /* Reached a closing bracket. If not at the end of the pattern, carry
747        on with the next opcode. Otherwise, unless we have an empty string and        on with the next opcode. For repeating opcodes, also add the repeat
748          state. Note that KETRPOS will always be encountered at the end of the
749          subpattern, because the possessive subpattern repeats are always handled
750          using recursive calls. Thus, it never adds any new states.
751
752          At the end of the (sub)pattern, unless we have an empty string and
753        PCRE_NOTEMPTY is set, or PCRE_NOTEMPTY_ATSTART is set and we are at the        PCRE_NOTEMPTY is set, or PCRE_NOTEMPTY_ATSTART is set and we are at the
754        start of the subject, save the match data, shifting up all previous        start of the subject, save the match data, shifting up all previous
755        matches so we always have the longest first. */        matches so we always have the longest first. */
# Line 733  for (;;) Line 757  for (;;)
757        case OP_KET:        case OP_KET:
758        case OP_KETRMIN:        case OP_KETRMIN:
759        case OP_KETRMAX:        case OP_KETRMAX:
760          case OP_KETRPOS:
761        if (code != end_code)        if (code != end_code)
762          {          {
# Line 749  for (;;) Line 774  for (;;)
774                  current_subject > start_subject + md->start_offset)))                  current_subject > start_subject + md->start_offset)))
775            {            {
776            if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;            if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;
777              else if (match_count > 0 && ++match_count * 2 >= offsetcount)              else if (match_count > 0 && ++match_count * 2 > offsetcount)
778                match_count = 0;                match_count = 0;
779            count = ((match_count == 0)? offsetcount : match_count * 2) - 2;            count = ((match_count == 0)? offsetcount : match_count * 2) - 2;
780            if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int));            if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int));
# Line 795  for (;;) Line 820  for (;;)
820        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
821        case OP_CBRA:        case OP_CBRA:
822        case OP_SCBRA:        case OP_SCBRA:
824        code += GET(code, 1);        code += GET(code, 1);
825        while (*code == OP_ALT)        while (*code == OP_ALT)
826          {          {
# Line 822  for (;;) Line 847  for (;;)
847
848        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
849        case OP_CIRC:        case OP_CIRC:
850          if (ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0)
851            { ADD_ACTIVE(state_offset + 1, 0); }
852          break;
853
854          /*-----------------------------------------------------------------*/
855          case OP_CIRCM:
856        if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||        if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||
857            ((ims & PCRE_MULTILINE) != 0 &&            (ptr != end_subject && WAS_NEWLINE(ptr)))
ptr != end_subject &&
WAS_NEWLINE(ptr)))
858          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
859        break;        break;
860
861        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
862        case OP_EOD:        case OP_EOD:
863        if (ptr >= end_subject)        if (ptr >= end_subject)
864          {          {
865          if ((md->moptions & PCRE_PARTIAL_HARD) != 0)          if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
866            could_continue = TRUE;            could_continue = TRUE;
867          else { ADD_ACTIVE(state_offset + 1, 0); }          else { ADD_ACTIVE(state_offset + 1, 0); }
# Line 840  for (;;) Line 869  for (;;)
869        break;        break;
870
871        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
case OP_OPT:
ims = code[1];
break;

/*-----------------------------------------------------------------*/
872        case OP_SOD:        case OP_SOD:
873        if (ptr == start_subject) { ADD_ACTIVE(state_offset + 1, 0); }        if (ptr == start_subject) { ADD_ACTIVE(state_offset + 1, 0); }
874        break;        break;
# Line 890  for (;;) Line 913  for (;;)
913            could_continue = TRUE;            could_continue = TRUE;
914          else if (clen == 0 ||          else if (clen == 0 ||
915              ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr) &&              ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr) &&
916                 ((ims & PCRE_MULTILINE) != 0 || ptr == end_subject - md->nllen)                 (ptr == end_subject - md->nllen)
917              ))              ))
918            { ADD_ACTIVE(state_offset + 1, 0); }            { ADD_ACTIVE(state_offset + 1, 0); }
919          }          }
920        else if ((ims & PCRE_MULTILINE) != 0 && IS_NEWLINE(ptr))        break;
921
922          /*-----------------------------------------------------------------*/
923          case OP_DOLLM:
924          if ((md->moptions & PCRE_NOTEOL) == 0)
925            {
926            if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
927              could_continue = TRUE;
928            else if (clen == 0 ||
929                ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr)))
930              { ADD_ACTIVE(state_offset + 1, 0); }
931            }
932          else if (IS_NEWLINE(ptr))
933          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
934        break;        break;
935
# Line 925  for (;;) Line 960  for (;;)
960
961          if (ptr > start_subject)          if (ptr > start_subject)
962            {            {
963            const uschar *temp = ptr - 1;            const pcre_uchar *temp = ptr - 1;
964            if (temp < md->start_used_ptr) md->start_used_ptr = temp;            if (temp < md->start_used_ptr) md->start_used_ptr = temp;
965  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
966            if (utf8) BACKCHAR(temp);            if (utf) { BACKCHAR(temp); }
967  #endif  #endif
968            GETCHARTEST(d, temp);            GETCHARTEST(d, temp);
969  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 993  for (;;) Line 1028  for (;;)
1028            break;            break;
1029
1030            case PT_GC:            case PT_GC:
1031            OK = _pcre_ucp_gentype[prop->chartype] == code[2];            OK = PRIV(ucp_gentype)[prop->chartype] == code[2];
1032            break;            break;
1033
1034            case PT_PC:            case PT_PC:
# Line 1007  for (;;) Line 1042  for (;;)
1042            /* These are specials for combination cases. */            /* These are specials for combination cases. */
1043
1044            case PT_ALNUM:            case PT_ALNUM:
1045            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1046                 _pcre_ucp_gentype[prop->chartype] == ucp_N;                 PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1047            break;            break;
1048
1049            case PT_SPACE:    /* Perl space */            case PT_SPACE:    /* Perl space */
1050            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1051                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
1052            break;            break;
1053
1054            case PT_PXSPACE:  /* POSIX space */            case PT_PXSPACE:  /* POSIX space */
1055            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1056                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
1057                 c == CHAR_FF || c == CHAR_CR;                 c == CHAR_FF || c == CHAR_CR;
1058            break;            break;
1059
1060            case PT_WORD:            case PT_WORD:
1061            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1062                 _pcre_ucp_gentype[prop->chartype] == ucp_N ||                 PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1063                 c == CHAR_UNDERSCORE;                 c == CHAR_UNDERSCORE;
1064            break;            break;
1065
# Line 1126  for (;;) Line 1161  for (;;)
1161                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
1162            {            {
1163            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1164              { ADD_NEW(state_offset + 4, 0); }              { ADD_NEW(state_offset + 1 + IMM2_SIZE + 1, 0); }
1165            else            else
1167            }            }
# Line 1137  for (;;) Line 1172  for (;;)
1172        case OP_TYPEUPTO:        case OP_TYPEUPTO:
1173        case OP_TYPEMINUPTO:        case OP_TYPEMINUPTO:
1174        case OP_TYPEPOSUPTO:        case OP_TYPEPOSUPTO:
1176        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1177        if (clen > 0)        if (clen > 0)
1178          {          {
# Line 1152  for (;;) Line 1187  for (;;)
1187              next_active_state--;              next_active_state--;
1188              }              }
1189            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1190              { ADD_NEW(state_offset + 4, 0); }              { ADD_NEW(state_offset + 2 + IMM2_SIZE, 0); }
1191            else            else
1193            }            }
# Line 1187  for (;;) Line 1222  for (;;)
1222            break;            break;
1223
1224            case PT_GC:            case PT_GC:
1225            OK = _pcre_ucp_gentype[prop->chartype] == code[3];            OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
1226            break;            break;
1227
1228            case PT_PC:            case PT_PC:
# Line 1201  for (;;) Line 1236  for (;;)
1236            /* These are specials for combination cases. */            /* These are specials for combination cases. */
1237
1238            case PT_ALNUM:            case PT_ALNUM:
1239            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1240                 _pcre_ucp_gentype[prop->chartype] == ucp_N;                 PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1241            break;            break;
1242
1243            case PT_SPACE:    /* Perl space */            case PT_SPACE:    /* Perl space */
1244            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1245                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
1246            break;            break;
1247
1248            case PT_PXSPACE:  /* POSIX space */            case PT_PXSPACE:  /* POSIX space */
1249            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1250                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
1251                 c == CHAR_FF || c == CHAR_CR;                 c == CHAR_FF || c == CHAR_CR;
1252            break;            break;
1253
1254            case PT_WORD:            case PT_WORD:
1255            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1256                 _pcre_ucp_gentype[prop->chartype] == ucp_N ||                 PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1257                 c == CHAR_UNDERSCORE;                 c == CHAR_UNDERSCORE;
1258            break;            break;
1259
# Line 1250  for (;;) Line 1285  for (;;)
1285        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1286        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
1287          {          {
1288          const uschar *nptr = ptr + clen;          const pcre_uchar *nptr = ptr + clen;
1289          int ncount = 0;          int ncount = 0;
1290          if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)          if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
1291            {            {
# Line 1434  for (;;) Line 1469  for (;;)
1469            break;            break;
1470
1471            case PT_GC:            case PT_GC:
1472            OK = _pcre_ucp_gentype[prop->chartype] == code[3];            OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
1473            break;            break;
1474
1475            case PT_PC:            case PT_PC:
# Line 1448  for (;;) Line 1483  for (;;)
1483            /* These are specials for combination cases. */            /* These are specials for combination cases. */
1484
1485            case PT_ALNUM:            case PT_ALNUM:
1486            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1487                 _pcre_ucp_gentype[prop->chartype] == ucp_N;                 PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1488            break;            break;
1489
1490            case PT_SPACE:    /* Perl space */            case PT_SPACE:    /* Perl space */
1491            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1492                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
1493            break;            break;
1494
1495            case PT_PXSPACE:  /* POSIX space */            case PT_PXSPACE:  /* POSIX space */
1496            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1497                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
1498                 c == CHAR_FF || c == CHAR_CR;                 c == CHAR_FF || c == CHAR_CR;
1499            break;            break;
1500
1501            case PT_WORD:            case PT_WORD:
1502            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1503                 _pcre_ucp_gentype[prop->chartype] == ucp_N ||                 PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1504                 c == CHAR_UNDERSCORE;                 c == CHAR_UNDERSCORE;
1505            break;            break;
1506
# Line 1506  for (;;) Line 1541  for (;;)
1542        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
1543          {          {
1544          const uschar *nptr = ptr + clen;          const pcre_uchar *nptr = ptr + clen;
1545          int ncount = 0;          int ncount = 0;
1546          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
1547              codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)              codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)
# Line 1688  for (;;) Line 1723  for (;;)
1723        case OP_PROP_EXTRA + OP_TYPEMINUPTO:        case OP_PROP_EXTRA + OP_TYPEMINUPTO:
1724        case OP_PROP_EXTRA + OP_TYPEPOSUPTO:        case OP_PROP_EXTRA + OP_TYPEPOSUPTO:
1725        if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)
1726          { ADD_ACTIVE(state_offset + 6, 0); }          { ADD_ACTIVE(state_offset + 1 + IMM2_SIZE + 3, 0); }
1727        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1728        if (clen > 0)        if (clen > 0)
1729          {          {
1730          BOOL OK;          BOOL OK;
1731          const ucd_record * prop = GET_UCD(c);          const ucd_record * prop = GET_UCD(c);
1732          switch(code[4])          switch(code[1 + IMM2_SIZE + 1])
1733            {            {
1734            case PT_ANY:            case PT_ANY:
1735            OK = TRUE;            OK = TRUE;
# Line 1706  for (;;) Line 1741  for (;;)
1741            break;            break;
1742
1743            case PT_GC:            case PT_GC:
1744            OK = _pcre_ucp_gentype[prop->chartype] == code[5];            OK = PRIV(ucp_gentype)[prop->chartype] == code[1 + IMM2_SIZE + 2];
1745            break;            break;
1746
1747            case PT_PC:            case PT_PC:
1748            OK = prop->chartype == code[5];            OK = prop->chartype == code[1 + IMM2_SIZE + 2];
1749            break;            break;
1750
1751            case PT_SC:            case PT_SC:
1752            OK = prop->script == code[5];            OK = prop->script == code[1 + IMM2_SIZE + 2];
1753            break;            break;
1754
1755            /* These are specials for combination cases. */            /* These are specials for combination cases. */
1756
1757            case PT_ALNUM:            case PT_ALNUM:
1758            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1759                 _pcre_ucp_gentype[prop->chartype] == ucp_N;                 PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1760            break;            break;
1761
1762            case PT_SPACE:    /* Perl space */            case PT_SPACE:    /* Perl space */
1763            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1764                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
1765            break;            break;
1766
1767            case PT_PXSPACE:  /* POSIX space */            case PT_PXSPACE:  /* POSIX space */
1768            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1769                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
1770                 c == CHAR_FF || c == CHAR_CR;                 c == CHAR_FF || c == CHAR_CR;
1771            break;            break;
1772
1773            case PT_WORD:            case PT_WORD:
1774            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1775                 _pcre_ucp_gentype[prop->chartype] == ucp_N ||                 PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1776                 c == CHAR_UNDERSCORE;                 c == CHAR_UNDERSCORE;
1777            break;            break;
1778
# Line 1756  for (;;) Line 1791  for (;;)
1791              next_active_state--;              next_active_state--;
1792              }              }
1793            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1794              { ADD_NEW(state_offset + 6, 0); }              { ADD_NEW(state_offset + 1 + IMM2_SIZE + 3, 0); }
1795            else            else
1797            }            }
# Line 1769  for (;;) Line 1804  for (;;)
1804        case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:        case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:
1805        case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:        case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:
1806        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
1807          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1808        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1809        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
1810          {          {
1811          const uschar *nptr = ptr + clen;          const pcre_uchar *nptr = ptr + clen;
1812          int ncount = 0;          int ncount = 0;
1813          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
1814            {            {
# Line 1790  for (;;) Line 1825  for (;;)
1825            nptr += ndlen;            nptr += ndlen;
1826            }            }
1827          if (++count >= GET2(code, 1))          if (++count >= GET2(code, 1))
1828            { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }            { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
1829          else          else
1831          }          }
# Line 1803  for (;;) Line 1838  for (;;)
1838        case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:        case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:
1839        case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:        case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:
1840        if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)
1841          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1842        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1843        if (clen > 0)        if (clen > 0)
1844          {          {
# Line 1830  for (;;) Line 1865  for (;;)
1865              next_active_state--;              next_active_state--;
1866              }              }
1867            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1868              { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }              { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
1869            else            else
1871            break;            break;
# Line 1847  for (;;) Line 1882  for (;;)
1882        case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:        case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:
1883        case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:        case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:
1884        if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)
1885          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1886        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1887        if (clen > 0)        if (clen > 0)
1888          {          {
# Line 1876  for (;;) Line 1911  for (;;)
1911              next_active_state--;              next_active_state--;
1912              }              }
1913            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1914              { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }              { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
1915            else            else
1917            }            }
# Line 1889  for (;;) Line 1924  for (;;)
1924        case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:        case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:
1925        case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:        case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:
1926        if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)
1927          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1928        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1929        if (clen > 0)        if (clen > 0)
1930          {          {
# Line 1931  for (;;) Line 1966  for (;;)
1966              next_active_state--;              next_active_state--;
1967              }              }
1968            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1969              { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }              { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
1970            else            else
1972            }            }
# Line 1950  for (;;) Line 1985  for (;;)
1985        break;        break;
1986
1987        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1988        case OP_CHARNC:        case OP_CHARI:
1989        if (clen == 0) break;        if (clen == 0) break;
1990
1991  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
1992        if (utf8)        if (utf)
1993          {          {
1994          if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else          if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
1995            {            {
1996            unsigned int othercase;            unsigned int othercase;
1997            if (c < 128) othercase = fcc[c]; else            if (c < 128)
1998                othercase = fcc[c];
1999            /* If we have Unicode property support, we can use it to test the            else
2000            other case of the character. */              /* If we have Unicode property support, we can use it to test the
2001                other case of the character. */
2002  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2003            othercase = UCD_OTHERCASE(c);              othercase = UCD_OTHERCASE(c);
2004  #else  #else
2005            othercase = NOTACHAR;              othercase = NOTACHAR;
2006  #endif  #endif
2007
2008            if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }            if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }
2009            }            }
2010          }          }
2011        else        else
2012  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2013          /* Not UTF mode */
/* Non-UTF-8 mode */
2014          {          {
2015          if (lcc[c] == lcc[d]) { ADD_NEW(state_offset + 2, 0); }          if (TABLE_GET(c, lcc, c) == TABLE_GET(d, lcc, d))
2016              { ADD_NEW(state_offset + 2, 0); }
2017          }          }
2018        break;        break;
2019
# Line 1992  for (;;) Line 2027  for (;;)
2027        case OP_EXTUNI:        case OP_EXTUNI:
2028        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
2029          {          {
2030          const uschar *nptr = ptr + clen;          const pcre_uchar *nptr = ptr + clen;
2031          int ncount = 0;          int ncount = 0;
2032          while (nptr < end_subject)          while (nptr < end_subject)
2033            {            {
# Line 2136  for (;;) Line 2171  for (;;)
2171        break;        break;
2172
2173        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2174        /* Match a negated single character. This is only used for one-byte        /* Match a negated single character casefully. This is only used for
2175        characters, that is, we know that d < 256. The character we are        one-byte characters, that is, we know that d < 256. The character we are
2176        checking (c) can be multibyte. */        checking (c) can be multibyte. */
2177
2178        case OP_NOT:        case OP_NOT:
2179        if (clen > 0)        if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); }
{
unsigned int otherd = ((ims & PCRE_CASELESS) != 0)? fcc[d] : d;
if (c != d && c != otherd) { ADD_NEW(state_offset + dlen + 1, 0); }
}
2180        break;        break;
2181
2182        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2183          /* Match a negated single character caselessly. This is only used for
2184          one-byte characters, that is, we know that d < 256. The character we are
2185          checking (c) can be multibyte. */
2186
2187          case OP_NOTI:
2188          if (clen > 0 && c != d && c != fcc[d])
2189            { ADD_NEW(state_offset + dlen + 1, 0); }
2190          break;
2191
2192          /*-----------------------------------------------------------------*/
2193          case OP_PLUSI:
2194          case OP_MINPLUSI:
2195          case OP_POSPLUSI:
2196          case OP_NOTPLUSI:
2197          case OP_NOTMINPLUSI:
2198          case OP_NOTPOSPLUSI:
2199          caseless = TRUE;
2200          codevalue -= OP_STARI - OP_STAR;
2201
2202          /* Fall through */
2203        case OP_PLUS:        case OP_PLUS:
2204        case OP_MINPLUS:        case OP_MINPLUS:
2205        case OP_POSPLUS:        case OP_POSPLUS:
# Line 2160  for (;;) Line 2211  for (;;)
2211        if (clen > 0)        if (clen > 0)
2212          {          {
2213          unsigned int otherd = NOTACHAR;          unsigned int otherd = NOTACHAR;
2214          if ((ims & PCRE_CASELESS) != 0)          if (caseless)
2215            {            {
2216  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2217            if (utf8 && d >= 128)            if (utf && d >= 128)
2218              {              {
2219  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2220              otherd = UCD_OTHERCASE(d);              otherd = UCD_OTHERCASE(d);
2221  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2222              }              }
2223            else            else
2224  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2225            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2226            }            }
2227          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2228            {            {
# Line 2188  for (;;) Line 2239  for (;;)
2239        break;        break;
2240
2241        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2242          case OP_QUERYI:
2243          case OP_MINQUERYI:
2244          case OP_POSQUERYI:
2245          case OP_NOTQUERYI:
2246          case OP_NOTMINQUERYI:
2247          case OP_NOTPOSQUERYI:
2248          caseless = TRUE;
2249          codevalue -= OP_STARI - OP_STAR;
2250          /* Fall through */
2251        case OP_QUERY:        case OP_QUERY:
2252        case OP_MINQUERY:        case OP_MINQUERY:
2253        case OP_POSQUERY:        case OP_POSQUERY:
# Line 2198  for (;;) Line 2258  for (;;)
2258        if (clen > 0)        if (clen > 0)
2259          {          {
2260          unsigned int otherd = NOTACHAR;          unsigned int otherd = NOTACHAR;
2261          if ((ims & PCRE_CASELESS) != 0)          if (caseless)
2262            {            {
2263  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2264            if (utf8 && d >= 128)            if (utf && d >= 128)
2265              {              {
2266  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2267              otherd = UCD_OTHERCASE(d);              otherd = UCD_OTHERCASE(d);
2268  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2269              }              }
2270            else            else
2271  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2272            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2273            }            }
2274          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2275            {            {
# Line 2224  for (;;) Line 2284  for (;;)
2284        break;        break;
2285
2286        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2287          case OP_STARI:
2288          case OP_MINSTARI:
2289          case OP_POSSTARI:
2290          case OP_NOTSTARI:
2291          case OP_NOTMINSTARI:
2292          case OP_NOTPOSSTARI:
2293          caseless = TRUE;
2294          codevalue -= OP_STARI - OP_STAR;
2295          /* Fall through */
2296        case OP_STAR:        case OP_STAR:
2297        case OP_MINSTAR:        case OP_MINSTAR:
2298        case OP_POSSTAR:        case OP_POSSTAR:
# Line 2234  for (;;) Line 2303  for (;;)
2303        if (clen > 0)        if (clen > 0)
2304          {          {
2305          unsigned int otherd = NOTACHAR;          unsigned int otherd = NOTACHAR;
2306          if ((ims & PCRE_CASELESS) != 0)          if (caseless)
2307            {            {
2308  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2309            if (utf8 && d >= 128)            if (utf && d >= 128)
2310              {              {
2311  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2312              otherd = UCD_OTHERCASE(d);              otherd = UCD_OTHERCASE(d);
2313  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2314              }              }
2315            else            else
2316  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2317            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2318            }            }
2319          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2320            {            {
# Line 2260  for (;;) Line 2329  for (;;)
2329        break;        break;
2330
2331        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2332          case OP_EXACTI:
2333          case OP_NOTEXACTI:
2334          caseless = TRUE;
2335          codevalue -= OP_STARI - OP_STAR;
2336          /* Fall through */
2337        case OP_EXACT:        case OP_EXACT:
2338        case OP_NOTEXACT:        case OP_NOTEXACT:
2339        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
2340        if (clen > 0)        if (clen > 0)
2341          {          {
2342          unsigned int otherd = NOTACHAR;          unsigned int otherd = NOTACHAR;
2343          if ((ims & PCRE_CASELESS) != 0)          if (caseless)
2344            {            {
2345  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2346            if (utf8 && d >= 128)            if (utf && d >= 128)
2347              {              {
2348  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2349              otherd = UCD_OTHERCASE(d);              otherd = UCD_OTHERCASE(d);
2350  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2351              }              }
2352            else            else
2353  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2354            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2355            }            }
2356          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2357            {            {
2358            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
2359              { ADD_NEW(state_offset + dlen + 3, 0); }              { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
2360            else            else
2362            }            }
# Line 2290  for (;;) Line 2364  for (;;)
2364        break;        break;
2365
2366        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2367          case OP_UPTOI:
2368          case OP_MINUPTOI:
2369          case OP_POSUPTOI:
2370          case OP_NOTUPTOI:
2371          case OP_NOTMINUPTOI:
2372          case OP_NOTPOSUPTOI:
2373          caseless = TRUE;
2374          codevalue -= OP_STARI - OP_STAR;
2375          /* Fall through */
2376        case OP_UPTO:        case OP_UPTO:
2377        case OP_MINUPTO:        case OP_MINUPTO:
2378        case OP_POSUPTO:        case OP_POSUPTO:
2379        case OP_NOTUPTO:        case OP_NOTUPTO:
2380        case OP_NOTMINUPTO:        case OP_NOTMINUPTO:
2381        case OP_NOTPOSUPTO:        case OP_NOTPOSUPTO:
2382        ADD_ACTIVE(state_offset + dlen + 3, 0);        ADD_ACTIVE(state_offset + dlen + 1 + IMM2_SIZE, 0);
2383        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
2384        if (clen > 0)        if (clen > 0)
2385          {          {
2386          unsigned int otherd = NOTACHAR;          unsigned int otherd = NOTACHAR;
2387          if ((ims & PCRE_CASELESS) != 0)          if (caseless)
2388            {            {
2389  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2390            if (utf8 && d >= 128)            if (utf && d >= 128)
2391              {              {
2392  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2393              otherd = UCD_OTHERCASE(d);              otherd = UCD_OTHERCASE(d);
2394  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2395              }              }
2396            else            else
2397  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2398            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2399            }            }
2400          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2401            {            {
# Line 2322  for (;;) Line 2405  for (;;)
2405              next_active_state--;              next_active_state--;
2406              }              }
2407            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
2408              { ADD_NEW(state_offset + dlen + 3, 0); }              { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
2409            else            else
2411            }            }
# Line 2339  for (;;) Line 2422  for (;;)
2422          {          {
2423          BOOL isinclass = FALSE;          BOOL isinclass = FALSE;
2424          int next_state_offset;          int next_state_offset;
2425          const uschar *ecode;          const pcre_uchar *ecode;
2426
2427          /* For a simple class, there is always just a 32-byte table, and we          /* For a simple class, there is always just a 32-byte table, and we
2428          can set isinclass from it. */          can set isinclass from it. */
2429
2430          if (codevalue != OP_XCLASS)          if (codevalue != OP_XCLASS)
2431            {            {
2432            ecode = code + 33;            ecode = code + 1 + (32 / sizeof(pcre_uchar));
2433            if (clen > 0)            if (clen > 0)
2434              {              {
2435              isinclass = (c > 255)? (codevalue == OP_NCLASS) :              isinclass = (c > 255)? (codevalue == OP_NCLASS) :
2436                ((code[1 + c/8] & (1 << (c&7))) != 0);                ((((pcre_uint8 *)(code + 1))[c/8] & (1 << (c&7))) != 0);
2437              }              }
2438            }            }
2439
# Line 2361  for (;;) Line 2444  for (;;)
2444          else          else
2445           {           {
2446           ecode = code + GET(code, 1);           ecode = code + GET(code, 1);
2447           if (clen > 0) isinclass = _pcre_xclass(c, code + 1 + LINK_SIZE);           if (clen > 0) isinclass = PRIV(xclass)(c, code + 1 + LINK_SIZE, utf);
2448           }           }
2449
2450          /* At this point, isinclass is set for all kinds of class, and ecode          /* At this point, isinclass is set for all kinds of class, and ecode
# Line 2395  for (;;) Line 2478  for (;;)
2478            case OP_CRMINRANGE:            case OP_CRMINRANGE:
2479            count = current_state->count;  /* Already matched */            count = current_state->count;  /* Already matched */
2480            if (count >= GET2(ecode, 1))            if (count >= GET2(ecode, 1))
2481              { ADD_ACTIVE(next_state_offset + 5, 0); }              { ADD_ACTIVE(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
2482            if (isinclass)            if (isinclass)
2483              {              {
2484              int max = GET2(ecode, 3);              int max = GET2(ecode, 1 + IMM2_SIZE);
2485              if (++count >= max && max != 0)   /* Max 0 => no limit */              if (++count >= max && max != 0)   /* Max 0 => no limit */
2486                { ADD_NEW(next_state_offset + 5, 0); }                { ADD_NEW(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
2487              else              else
2489              }              }
# Line 2431  for (;;) Line 2514  for (;;)
2514          int rc;          int rc;
2515          int local_offsets[2];          int local_offsets[2];
2516          int local_workspace[1000];          int local_workspace[1000];
2517          const uschar *endasscode = code + GET(code, 1);          const pcre_uchar *endasscode = code + GET(code, 1);
2518
2519          while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);          while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
2520
# Line 2444  for (;;) Line 2527  for (;;)
2527            sizeof(local_offsets)/sizeof(int),    /* size of same */            sizeof(local_offsets)/sizeof(int),    /* size of same */
2528            local_workspace,                      /* workspace vector */            local_workspace,                      /* workspace vector */
2529            sizeof(local_workspace)/sizeof(int),  /* size of same */            sizeof(local_workspace)/sizeof(int),  /* size of same */
2530            ims,                                  /* the current ims flags */            rlevel);                              /* function recursion level */
rlevel,                               /* function recursion level */
recursing);                           /* pass on regex recursion */
2531
2532          if (rc == PCRE_ERROR_DFA_UITEM) return rc;          if (rc == PCRE_ERROR_DFA_UITEM) return rc;
2533          if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK))          if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK))
# Line 2470  for (;;) Line 2551  for (;;)
2552            {            {
2553            rrc = 0;            rrc = 0;
2554            if (pcre_callout != NULL)            if (PUBL(callout) != NULL)
2555              {              {
2556              pcre_callout_block cb;              PUBL(callout_block) cb;
2557              cb.version          = 1;   /* Version 1 of the callout block */              cb.version          = 1;   /* Version 1 of the callout block */
2559              cb.offset_vector    = offsets;              cb.offset_vector    = offsets;
# Line 2485  for (;;) Line 2566  for (;;)
2566              cb.capture_top      = 1;              cb.capture_top      = 1;
2567              cb.capture_last     = -1;              cb.capture_last     = -1;
2568              cb.callout_data     = md->callout_data;              cb.callout_data     = md->callout_data;
2569              if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */              cb.mark             = NULL;   /* No (*MARK) support */
2570                if ((rrc = (*PUBL(callout))(&cb)) < 0) return rrc;   /* Abandon */
2571              }              }
2572            if (rrc > 0) break;                      /* Fail this thread */            if (rrc > 0) break;                      /* Fail this thread */
2573            code += _pcre_OP_lengths[OP_CALLOUT];    /* Skip callout data */            code += PRIV(OP_lengths)[OP_CALLOUT];    /* Skip callout data */
2574            }            }
2575
# Line 2509  for (;;) Line 2591  for (;;)
2591
2592          else if (condcode == OP_RREF || condcode == OP_NRREF)          else if (condcode == OP_RREF || condcode == OP_NRREF)
2593            {            {
2594            int value = GET2(code, LINK_SIZE+2);            int value = GET2(code, LINK_SIZE + 2);
2595            if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;            if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
2596            if (recursing > 0)            if (md->recursive != NULL)
2599            }            }
2600
# Line 2521  for (;;) Line 2603  for (;;)
2603          else          else
2604            {            {
2605            int rc;            int rc;
2606            const uschar *asscode = code + LINK_SIZE + 1;            const pcre_uchar *asscode = code + LINK_SIZE + 1;
2607            const uschar *endasscode = asscode + GET(asscode, 1);            const pcre_uchar *endasscode = asscode + GET(asscode, 1);
2608
2609            while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);            while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
2610
# Line 2535  for (;;) Line 2617  for (;;)
2617              sizeof(local_offsets)/sizeof(int),    /* size of same */              sizeof(local_offsets)/sizeof(int),    /* size of same */
2618              local_workspace,                      /* workspace vector */              local_workspace,                      /* workspace vector */
2619              sizeof(local_workspace)/sizeof(int),  /* size of same */              sizeof(local_workspace)/sizeof(int),  /* size of same */
2620              ims,                                  /* the current ims flags */              rlevel);                              /* function recursion level */
rlevel,                               /* function recursion level */
recursing);                           /* pass on regex recursion */
2621
2622            if (rc == PCRE_ERROR_DFA_UITEM) return rc;            if (rc == PCRE_ERROR_DFA_UITEM) return rc;
2623            if ((rc >= 0) ==            if ((rc >= 0) ==
# Line 2552  for (;;) Line 2632  for (;;)
2632        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2633        case OP_RECURSE:        case OP_RECURSE:
2634          {          {
2635            dfa_recursion_info *ri;
2636          int local_offsets[1000];          int local_offsets[1000];
2637          int local_workspace[1000];          int local_workspace[1000];
2638            const pcre_uchar *callpat = start_code + GET(code, 1);
2639            int recno = (callpat == md->start_code)? 0 :
2641          int rc;          int rc;
2642
2643          DPRINTF(("%.*sStarting regex recursion %d\n", rlevel*2-2, SP,          DPRINTF(("%.*sStarting regex recursion\n", rlevel*2-2, SP));
2644            recursing + 1));
2645            /* Check for repeating a recursion without advancing the subject
2646            pointer. This should catch convoluted mutual recursions. (Some simple
2647            cases are caught at compile time.) */
2648
2649            for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
2650              if (recno == ri->group_num && ptr == ri->subject_position)
2651                return PCRE_ERROR_RECURSELOOP;
2652
2653            /* Remember this recursion and where we started it so as to
2654            catch infinite loops. */
2655
2656            new_recursive.group_num = recno;
2657            new_recursive.subject_position = ptr;
2658            new_recursive.prevrec = md->recursive;
2659            md->recursive = &new_recursive;
2660
2661          rc = internal_dfa_exec(          rc = internal_dfa_exec(
2662            md,                                   /* fixed match data */            md,                                   /* fixed match data */
2663            start_code + GET(code, 1),            /* this subexpression's code */            callpat,                              /* this subexpression's code */
2664            ptr,                                  /* where we currently are */            ptr,                                  /* where we currently are */
2665            (int)(ptr - start_subject),           /* start offset */            (int)(ptr - start_subject),           /* start offset */
2666            local_offsets,                        /* offset vector */            local_offsets,                        /* offset vector */
2667            sizeof(local_offsets)/sizeof(int),    /* size of same */            sizeof(local_offsets)/sizeof(int),    /* size of same */
2668            local_workspace,                      /* workspace vector */            local_workspace,                      /* workspace vector */
2669            sizeof(local_workspace)/sizeof(int),  /* size of same */            sizeof(local_workspace)/sizeof(int),  /* size of same */
2670            ims,                                  /* the current ims flags */            rlevel);                              /* function recursion level */
rlevel,                               /* function recursion level */
recursing + 1);                       /* regex recurse level */
2671
2672          DPRINTF(("%.*sReturn from regex recursion %d: rc=%d\n", rlevel*2-2, SP,          md->recursive = new_recursive.prevrec;  /* Done this recursion */
2673            recursing + 1, rc));
2674            DPRINTF(("%.*sReturn from regex recursion: rc=%d\n", rlevel*2-2, SP,
2675              rc));
2676
2677          /* Ran out of internal offsets */          /* Ran out of internal offsets */
2678
# Line 2587  for (;;) Line 2686  for (;;)
2686            {            {
2687            for (rc = rc*2 - 2; rc >= 0; rc -= 2)            for (rc = rc*2 - 2; rc >= 0; rc -= 2)
2688              {              {
2689              const uschar *p = start_subject + local_offsets[rc];              const pcre_uchar *p = start_subject + local_offsets[rc];
2690              const uschar *pp = start_subject + local_offsets[rc+1];              const pcre_uchar *pp = start_subject + local_offsets[rc+1];
2691              int charcount = local_offsets[rc+1] - local_offsets[rc];              int charcount = local_offsets[rc+1] - local_offsets[rc];
2692              while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;  #ifdef SUPPORT_UTF
2693                while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2694    #endif
2695              if (charcount > 0)              if (charcount > 0)
2696                {                {
# Line 2606  for (;;) Line 2707  for (;;)
2707        break;        break;
2708
2709        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2710          case OP_BRAPOS:
2711          case OP_SBRAPOS:
2712          case OP_CBRAPOS:
2713          case OP_SCBRAPOS:
2714          case OP_BRAPOSZERO:
2715            {
2716            int charcount, matched_count;
2717            const pcre_uchar *local_ptr = ptr;
2718            BOOL allow_zero;
2719
2720            if (codevalue == OP_BRAPOSZERO)
2721              {
2722              allow_zero = TRUE;
2723              codevalue = *(++code);  /* Codevalue will be one of above BRAs */
2724              }
2725            else allow_zero = FALSE;
2726
2727            /* Loop to match the subpattern as many times as possible as if it were
2728            a complete pattern. */
2729
2730            for (matched_count = 0;; matched_count++)
2731              {
2732              int local_offsets[2];
2733              int local_workspace[1000];
2734
2735              int rc = internal_dfa_exec(
2736                md,                                   /* fixed match data */
2737                code,                                 /* this subexpression's code */
2738                local_ptr,                            /* where we currently are */
2739                (int)(ptr - start_subject),           /* start offset */
2740                local_offsets,                        /* offset vector */
2741                sizeof(local_offsets)/sizeof(int),    /* size of same */
2742                local_workspace,                      /* workspace vector */
2743                sizeof(local_workspace)/sizeof(int),  /* size of same */
2744                rlevel);                              /* function recursion level */
2745
2746              /* Failed to match */
2747
2748              if (rc < 0)
2749                {
2750                if (rc != PCRE_ERROR_NOMATCH) return rc;
2751                break;
2752                }
2753
2754              /* Matched: break the loop if zero characters matched. */
2755
2756              charcount = local_offsets[1] - local_offsets[0];
2757              if (charcount == 0) break;
2758              local_ptr += charcount;    /* Advance temporary position ptr */
2759              }
2760
2761            /* At this point we have matched the subpattern matched_count
2762            times, and local_ptr is pointing to the character after the end of the
2763            last match. */
2764
2765            if (matched_count > 0 || allow_zero)
2766              {
2767              const pcre_uchar *end_subpattern = code;
2768              int next_state_offset;
2769
2770              do { end_subpattern += GET(end_subpattern, 1); }
2771                while (*end_subpattern == OP_ALT);
2772              next_state_offset =
2773                (int)(end_subpattern - start_code + LINK_SIZE + 1);
2774
2775              /* Optimization: if there are no more active states, and there
2776              are no new states yet set up, then skip over the subject string
2777              right here, to save looping. Otherwise, set up the new state to swing
2778              into action when the end of the matched substring is reached. */
2779
2780              if (i + 1 >= active_count && new_count == 0)
2781                {
2782                ptr = local_ptr;
2783                clen = 0;
2785                }
2786              else
2787                {
2788                const pcre_uchar *p = ptr;
2789                const pcre_uchar *pp = local_ptr;
2790                charcount = (int)(pp - p);
2791    #ifdef SUPPORT_UTF
2792                while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2793    #endif
2794                ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
2795                }
2796              }
2797            }
2798          break;
2799
2800          /*-----------------------------------------------------------------*/
2801        case OP_ONCE:        case OP_ONCE:
2802          case OP_ONCE_NC:
2803          {          {
2804          int local_offsets[2];          int local_offsets[2];
2805          int local_workspace[1000];          int local_workspace[1000];
# Line 2620  for (;;) Line 2813  for (;;)
2813            sizeof(local_offsets)/sizeof(int),    /* size of same */            sizeof(local_offsets)/sizeof(int),    /* size of same */
2814            local_workspace,                      /* workspace vector */            local_workspace,                      /* workspace vector */
2815            sizeof(local_workspace)/sizeof(int),  /* size of same */            sizeof(local_workspace)/sizeof(int),  /* size of same */
2816            ims,                                  /* the current ims flags */            rlevel);                              /* function recursion level */
rlevel,                               /* function recursion level */
recursing);                           /* pass on regex recursion */
2817
2818          if (rc >= 0)          if (rc >= 0)
2819            {            {
2820            const uschar *end_subpattern = code;            const pcre_uchar *end_subpattern = code;
2821            int charcount = local_offsets[1] - local_offsets[0];            int charcount = local_offsets[1] - local_offsets[0];
2822            int next_state_offset, repeat_state_offset;            int next_state_offset, repeat_state_offset;
2823
# Line 2656  for (;;) Line 2847  for (;;)
2847            /* Optimization: if there are no more active states, and there            /* Optimization: if there are no more active states, and there
2848            are no new states yet set up, then skip over the subject string            are no new states yet set up, then skip over the subject string
2849            right here, to save looping. Otherwise, set up the new state to swing            right here, to save looping. Otherwise, set up the new state to swing
2850            into action when the end of the substring is reached. */            into action when the end of the matched substring is reached. */
2851
2852            else if (i + 1 >= active_count && new_count == 0)            else if (i + 1 >= active_count && new_count == 0)
2853              {              {
# Line 2679  for (;;) Line 2870  for (;;)
2870              }              }
2871            else            else
2872              {              {
2873              const uschar *p = start_subject + local_offsets[0];  #ifdef SUPPORT_UTF
2874              const uschar *pp = start_subject + local_offsets[1];              const pcre_uchar *p = start_subject + local_offsets[0];
2875              while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;              const pcre_uchar *pp = start_subject + local_offsets[1];
2876                while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2877    #endif
2879              if (repeat_state_offset >= 0)              if (repeat_state_offset >= 0)
2880                { ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); }                { ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); }
2881              }              }

2882            }            }
2883          else if (rc != PCRE_ERROR_NOMATCH) return rc;          else if (rc != PCRE_ERROR_NOMATCH) return rc;
2884          }          }
# Line 2698  for (;;) Line 2890  for (;;)
2890
2891        case OP_CALLOUT:        case OP_CALLOUT:
2892        rrc = 0;        rrc = 0;
2893        if (pcre_callout != NULL)        if (PUBL(callout) != NULL)
2894          {          {
2895          pcre_callout_block cb;          PUBL(callout_block) cb;
2896          cb.version          = 1;   /* Version 1 of the callout block */          cb.version          = 1;   /* Version 1 of the callout block */
2897          cb.callout_number   = code[1];          cb.callout_number   = code[1];
2898          cb.offset_vector    = offsets;          cb.offset_vector    = offsets;
# Line 2713  for (;;) Line 2905  for (;;)
2905          cb.capture_top      = 1;          cb.capture_top      = 1;
2906          cb.capture_last     = -1;          cb.capture_last     = -1;
2907          cb.callout_data     = md->callout_data;          cb.callout_data     = md->callout_data;
2908          if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */          cb.mark             = NULL;   /* No (*MARK) support */
2909            if ((rrc = (*PUBL(callout))(&cb)) < 0) return rrc;   /* Abandon */
2910          }          }
2911        if (rrc == 0)        if (rrc == 0)
2912          { ADD_ACTIVE(state_offset + _pcre_OP_lengths[OP_CALLOUT], 0); }          { ADD_ACTIVE(state_offset + PRIV(OP_lengths)[OP_CALLOUT], 0); }
2913        break;        break;
2914
2915
# Line 2813  Returns:          > 0 => number of match Line 3006  Returns:          > 0 => number of match
3006                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
3007  */  */
3008
3009    #ifdef COMPILE_PCRE8
3010  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
3011  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
3012    const char *subject, int length, int start_offset, int options, int *offsets,    const char *subject, int length, int start_offset, int options, int *offsets,
3013    int offsetcount, int *workspace, int wscount)    int offsetcount, int *workspace, int wscount)
3014    #else
3015    PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
3016    pcre16_dfa_exec(const pcre *argument_re, const pcre16_extra *extra_data,
3017      PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
3018      int offsetcount, int *workspace, int wscount)
3019    #endif
3020  {  {
3021  real_pcre *re = (real_pcre *)argument_re;  real_pcre *re = (real_pcre *)argument_re;
3022  dfa_match_data match_block;  dfa_match_data match_block;
3023  dfa_match_data *md = &match_block;  dfa_match_data *md = &match_block;
3024  BOOL utf8, anchored, startline, firstline;  BOOL utf, anchored, startline, firstline;
3025  const uschar *current_subject, *end_subject, *lcc;  const pcre_uchar *current_subject, *end_subject;
3026    const pcre_uint8 *lcc;
3027
pcre_study_data internal_study;
3028  const pcre_study_data *study = NULL;  const pcre_study_data *study = NULL;
real_pcre internal_re;
3029
3030  const uschar *req_byte_ptr;  const pcre_uchar *req_char_ptr;
3031  const uschar *start_bits = NULL;  const pcre_uint8 *start_bits = NULL;
3032  BOOL first_byte_caseless = FALSE;  BOOL has_first_char = FALSE;
3033  BOOL req_byte_caseless = FALSE;  BOOL has_req_char = FALSE;
3034  int first_byte = -1;  pcre_uchar first_char = 0;
3035  int req_byte = -1;  pcre_uchar first_char2 = 0;
3036  int req_byte2 = -1;  pcre_uchar req_char = 0;
3037    pcre_uchar req_char2 = 0;
3038  int newline;  int newline;
3039
3040  /* Plausibility checks */  /* Plausibility checks */
# Line 2844  if (re == NULL || subject == NULL || wor Line 3044  if (re == NULL || subject == NULL || wor
3044     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
3045  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
3046  if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;  if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;
3047    if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
3048
3049  /* We need to find the pointer to any study data before we test for byte  /* We need to find the pointer to any study data before we test for byte
3050  flipping, so we scan the extra_data block first. This may set two fields in the  flipping, so we scan the extra_data block first. This may set two fields in the
# Line 2868  if (extra_data != NULL) Line 3069  if (extra_data != NULL)
3069    }    }
3070
3071  /* Check that the first field in the block is the magic number. If it is not,  /* Check that the first field in the block is the magic number. If it is not,
3072  test for a regex that was compiled on a host of opposite endianness. If this is  return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
3073  the case, flipped values are put in internal_re and internal_study if there was  REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
3074  study data too. */  means that the pattern is likely compiled with different endianness. */
3075
3076  if (re->magic_number != MAGIC_NUMBER)  if (re->magic_number != MAGIC_NUMBER)
3077    {    return re->magic_number == REVERSED_MAGIC_NUMBER?
3079    if (re == NULL) return PCRE_ERROR_BADMAGIC;  if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
if (study != NULL) study = &internal_study;
}
3080
3081  /* Set some local values */  /* Set some local values */
3082
3083  current_subject = (const unsigned char *)subject + start_offset;  current_subject = (const pcre_uchar *)subject + start_offset;
3084  end_subject = (const unsigned char *)subject + length;  end_subject = (const pcre_uchar *)subject + length;
3085  req_byte_ptr = current_subject - 1;  req_char_ptr = current_subject - 1;
3086
3087  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3088  utf8 = (re->options & PCRE_UTF8) != 0;  /* PCRE_UTF16 has the same value as PCRE_UTF8. */
3089    utf = (re->options & PCRE_UTF8) != 0;
3090  #else  #else
3091  utf8 = FALSE;  utf = FALSE;
3092  #endif  #endif
3093
3094  anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||  anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||
# Line 2896  anchored = (options & (PCRE_ANCHORED|PCR Line 3096  anchored = (options & (PCRE_ANCHORED|PCR
3096
3097  /* The remaining fixed data for passing around. */  /* The remaining fixed data for passing around. */
3098
3099  md->start_code = (const uschar *)argument_re +  md->start_code = (const pcre_uchar *)argument_re +
3100      re->name_table_offset + re->name_count * re->name_entry_size;      re->name_table_offset + re->name_count * re->name_entry_size;
3101  md->start_subject = (const unsigned char *)subject;  md->start_subject = (const pcre_uchar *)subject;
3102  md->end_subject = end_subject;  md->end_subject = end_subject;
3103  md->start_offset = start_offset;  md->start_offset = start_offset;
3104  md->moptions = options;  md->moptions = options;
# Line 2959  else Line 3159  else
3159  /* Check a UTF-8 string if required. Unfortunately there's no way of passing  /* Check a UTF-8 string if required. Unfortunately there's no way of passing
3160  back the character offset. */  back the character offset. */
3161
3162  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3163  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
3164    {    {
3165    if (_pcre_valid_utf8((uschar *)subject, length) >= 0)    int erroroffset;
3166      return PCRE_ERROR_BADUTF8;    int errorcode = PRIV(valid_utf)((pcre_uchar *)subject, length, &erroroffset);
3167    if (start_offset > 0 && start_offset < length)    if (errorcode != 0)
3168      {      {
3169      int tb = ((uschar *)subject)[start_offset];      if (offsetcount >= 2)
if (tb > 127)
3170        {        {
3171        tb &= 0xc0;        offsets[0] = erroroffset;
3172        if (tb != 0 && tb != 0xc0) return PCRE_ERROR_BADUTF8_OFFSET;        offsets[1] = errorcode;
3173        }        }
3174        return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != 0)?
3176      }      }
3177      if (start_offset > 0 && start_offset < length &&
3178            NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
3180    }    }
3181  #endif  #endif
3182
# Line 2980  if (utf8 && (options & PCRE_NO_UTF8_CHEC Line 3184  if (utf8 && (options & PCRE_NO_UTF8_CHEC
3184  is a feature that makes it possible to save compiled regex and re-use them  is a feature that makes it possible to save compiled regex and re-use them
3185  in other programs later. */  in other programs later. */
3186
3187  if (md->tables == NULL) md->tables = _pcre_default_tables;  if (md->tables == NULL) md->tables = PRIV(default_tables);
3188
3189  /* The lower casing table and the "must be at the start of a line" flag are  /* The lower casing table and the "must be at the start of a line" flag are
3190  used in a loop when finding where to start. */  used in a loop when finding where to start. */
# Line 2999  if (!anchored) Line 3203  if (!anchored)
3203    {    {
3204    if ((re->flags & PCRE_FIRSTSET) != 0)    if ((re->flags & PCRE_FIRSTSET) != 0)
3205      {      {
3206      first_byte = re->first_byte & 255;      has_first_char = TRUE;
3207      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)      first_char = first_char2 = re->first_char;
3208        first_byte = lcc[first_byte];      if ((re->flags & PCRE_FCH_CASELESS) != 0)
3209          {
3210          first_char2 = TABLE_GET(first_char, md->tables + fcc_offset, first_char);
3211    #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3212          if (utf && first_char > 127)
3213            first_char2 = UCD_OTHERCASE(first_char);
3214    #endif
3215          }
3216      }      }
3217    else    else
3218      {      {
# Line 3016  character" set. */ Line 3227  character" set. */
3227
3228  if ((re->flags & PCRE_REQCHSET) != 0)  if ((re->flags & PCRE_REQCHSET) != 0)
3229    {    {
3230    req_byte = re->req_byte & 255;    has_req_char = TRUE;
3231    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;    req_char = req_char2 = re->req_char;
3232    req_byte2 = (md->tables + fcc_offset)[req_byte];  /* case flipped */    if ((re->flags & PCRE_RCH_CASELESS) != 0)
3233        {
3234        req_char2 = TABLE_GET(req_char, md->tables + fcc_offset, req_char);
3235    #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3236        if (utf && req_char > 127)
3237          req_char2 = UCD_OTHERCASE(req_char);
3238    #endif
3239        }
3240    }    }
3241
3242  /* Call the main matching function, looping for a non-anchored regex after a  /* Call the main matching function, looping for a non-anchored regex after a
# Line 3031  for (;;) Line 3249  for (;;)
3249
3250    if ((options & PCRE_DFA_RESTART) == 0)    if ((options & PCRE_DFA_RESTART) == 0)
3251      {      {
3252      const uschar *save_end_subject = end_subject;      const pcre_uchar *save_end_subject = end_subject;
3253
3254      /* If firstline is TRUE, the start of the match is constrained to the first      /* If firstline is TRUE, the start of the match is constrained to the first
3255      line of a multiline string. Implement this by temporarily adjusting      line of a multiline string. Implement this by temporarily adjusting
# Line 3040  for (;;) Line 3258  for (;;)
3258
3259      if (firstline)      if (firstline)
3260        {        {
3261        USPTR t = current_subject;        PCRE_PUCHAR t = current_subject;
3262  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3263        if (utf8)        if (utf)
3264          {          {
3265          while (t < md->end_subject && !IS_NEWLINE(t))          while (t < md->end_subject && !IS_NEWLINE(t))
3266            {            {
3267            t++;            t++;
3268            while (t < end_subject && (*t & 0xc0) == 0x80) t++;            ACROSSCHAR(t < end_subject, *t, t++);
3269            }            }
3270          }          }
3271        else        else
# Line 3058  for (;;) Line 3276  for (;;)
3276
3277      /* There are some optimizations that avoid running the match if a known      /* There are some optimizations that avoid running the match if a known
3278      starting point is not found. However, there is an option that disables      starting point is not found. However, there is an option that disables
3279      these, for testing and for ensuring that all callouts do actually occur. */      these, for testing and for ensuring that all callouts do actually occur.
3280        The option can be set in the regex by (*NO_START_OPT) or passed in
3281        match-time options. */
3282
3283      if ((options & PCRE_NO_START_OPTIMIZE) == 0)      if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
3284        {        {
3285        /* Advance to a known first byte. */        /* Advance to a known first char. */
3286
3287        if (first_byte >= 0)        if (has_first_char)
3288          {          {
3289          if (first_byte_caseless)          if (first_char != first_char2)
3290            while (current_subject < end_subject &&            while (current_subject < end_subject &&
3291                   lcc[*current_subject] != first_byte)                *current_subject != first_char && *current_subject != first_char2)
3292              current_subject++;              current_subject++;
3293          else          else
3294            while (current_subject < end_subject &&            while (current_subject < end_subject &&
3295                   *current_subject != first_byte)                   *current_subject != first_char)
3296              current_subject++;              current_subject++;
3297          }          }
3298
# Line 3082  for (;;) Line 3302  for (;;)
3302          {          {
3303          if (current_subject > md->start_subject + start_offset)          if (current_subject > md->start_subject + start_offset)
3304            {            {
3305  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3306            if (utf8)            if (utf)
3307              {              {
3308              while (current_subject < end_subject &&              while (current_subject < end_subject &&
3309                     !WAS_NEWLINE(current_subject))                     !WAS_NEWLINE(current_subject))
3310                {                {
3311                current_subject++;                current_subject++;
3312                while(current_subject < end_subject &&                ACROSSCHAR(current_subject < end_subject, *current_subject,
3313                      (*current_subject & 0xc0) == 0x80)                  current_subject++);
current_subject++;
3314                }                }
3315              }              }
3316            else            else
# Line 3118  for (;;) Line 3337  for (;;)
3337          while (current_subject < end_subject)          while (current_subject < end_subject)
3338            {            {
3339            register unsigned int c = *current_subject;            register unsigned int c = *current_subject;
3340    #ifndef COMPILE_PCRE8
3341              if (c > 255) c = 255;
3342    #endif
3343            if ((start_bits[c/8] & (1 << (c&7))) == 0)            if ((start_bits[c/8] & (1 << (c&7))) == 0)
3344              {              {
3345              current_subject++;              current_subject++;
3346  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3347              if (utf8)              /* In non 8-bit mode, the iteration will stop for
3348                while(current_subject < end_subject &&              characters > 255 at the beginning or not stop at all. */
3349                      (*current_subject & 0xc0) == 0x80) current_subject++;              if (utf)
3350                  ACROSSCHAR(current_subject < end_subject, *current_subject,
3351                    current_subject++);
3352  #endif  #endif
3353              }              }
3354            else break;            else break;
# Line 3140  for (;;) Line 3364  for (;;)
3364      disabling is explicitly requested (and of course, by the test above, this      disabling is explicitly requested (and of course, by the test above, this
3365      code is not obeyed when restarting after a partial match). */      code is not obeyed when restarting after a partial match). */
3366
3367      if ((options & PCRE_NO_START_OPTIMIZE) == 0 &&      if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0 &&
3368          (options & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) == 0)          (options & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) == 0)
3369        {        {
3370        /* If the pattern was studied, a minimum subject length may be set. This        /* If the pattern was studied, a minimum subject length may be set. This
# Line 3152  for (;;) Line 3376  for (;;)
3376            (pcre_uint32)(end_subject - current_subject) < study->minlength)            (pcre_uint32)(end_subject - current_subject) < study->minlength)
3377          return PCRE_ERROR_NOMATCH;          return PCRE_ERROR_NOMATCH;
3378
3379        /* If req_byte is set, we know that that character must appear in the        /* If req_char is set, we know that that character must appear in the
3380        subject for the match to succeed. If the first character is set, req_byte        subject for the match to succeed. If the first character is set, req_char
3381        must be later in the subject; otherwise the test starts at the match        must be later in the subject; otherwise the test starts at the match
3382        point. This optimization can save a huge amount of work in patterns with        point. This optimization can save a huge amount of work in patterns with
3383        nested unlimited repeats that aren't going to match. Writing separate        nested unlimited repeats that aren't going to match. Writing separate
# Line 3165  for (;;) Line 3389  for (;;)
3389        patterns. This showed up when somebody was matching /^C/ on a 32-megabyte        patterns. This showed up when somebody was matching /^C/ on a 32-megabyte
3390        string... so we don't do this when the string is sufficiently long. */        string... so we don't do this when the string is sufficiently long. */
3391
3392        if (req_byte >= 0 && end_subject - current_subject < REQ_BYTE_MAX)        if (has_req_char && end_subject - current_subject < REQ_BYTE_MAX)
3393          {          {
3394          register const uschar *p = current_subject + ((first_byte >= 0)? 1 : 0);          register PCRE_PUCHAR p = current_subject + (has_first_char? 1:0);
3395
3396          /* We don't need to repeat the search if we haven't yet reached the          /* We don't need to repeat the search if we haven't yet reached the
3397          place we found it at last time. */          place we found it at last time. */
3398
3399          if (p > req_byte_ptr)          if (p > req_char_ptr)
3400            {            {
3401            if (req_byte_caseless)            if (req_char != req_char2)
3402              {              {
3403              while (p < end_subject)              while (p < end_subject)
3404                {                {
3405                register int pp = *p++;                register int pp = *p++;
3406                if (pp == req_byte || pp == req_byte2) { p--; break; }                if (pp == req_char || pp == req_char2) { p--; break; }
3407                }                }
3408              }              }
3409            else            else
3410              {              {
3411              while (p < end_subject)              while (p < end_subject)
3412                {                {
3413                if (*p++ == req_byte) { p--; break; }                if (*p++ == req_char) { p--; break; }
3414                }                }
3415              }              }
3416
# Line 3199  for (;;) Line 3423  for (;;)
3423            found it, so that we don't search again next time round the loop if            found it, so that we don't search again next time round the loop if
3424            the start hasn't passed this character yet. */            the start hasn't passed this character yet. */
3425
3426            req_byte_ptr = p;            req_char_ptr = p;
3427            }            }
3428          }          }
3429        }        }
# Line 3208  for (;;) Line 3432  for (;;)
3432    /* OK, now we can do the business */    /* OK, now we can do the business */
3433
3434    md->start_used_ptr = current_subject;    md->start_used_ptr = current_subject;
3435      md->recursive = NULL;
3436
3437    rc = internal_dfa_exec(    rc = internal_dfa_exec(
3438      md,                                /* fixed match data */      md,                                /* fixed match data */
# Line 3218  for (;;) Line 3443  for (;;)
3443      offsetcount,                       /* size of same */      offsetcount,                       /* size of same */
3444      workspace,                         /* workspace vector */      workspace,                         /* workspace vector */
3445      wscount,                           /* size of same */      wscount,                           /* size of same */
3446      re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL), /* ims flags */      0);                                /* function recurse level */
0,                                 /* function recurse level */
0);                                /* regex recurse level */
3447
3448    /* Anything other than "no match" means we are done, always; otherwise, carry    /* Anything other than "no match" means we are done, always; otherwise, carry
3449    on only if not anchored. */    on only if not anchored. */
# Line 3232  for (;;) Line 3455  for (;;)
3455
3456    if (firstline && IS_NEWLINE(current_subject)) break;    if (firstline && IS_NEWLINE(current_subject)) break;
3457    current_subject++;    current_subject++;
3458    if (utf8)  #ifdef SUPPORT_UTF
3459      if (utf)
3460      {      {
3461      while (current_subject < end_subject && (*current_subject & 0xc0) == 0x80)      ACROSSCHAR(current_subject < end_subject, *current_subject,
3462        current_subject++;        current_subject++);
3463      }      }
3464    #endif
3465    if (current_subject > end_subject) break;    if (current_subject > end_subject) break;
3466
3467    /* If we have just passed a CR and we are now at a LF, and the pattern does    /* If we have just passed a CR and we are now at a LF, and the pattern does

Legend:
 Removed from v.553 changed lines Added in v.850