# Diff of /code/trunk/pcre_dfa_exec.c

revision 642 by ph10, Thu Jul 28 18:59:40 2011 UTC revision 1425 by ph10, Tue Dec 31 17:44:40 2013 UTC
# Line 7  and semantics are as close as possible t Line 7  and semantics are as close as possible t
7  below for why this module is different).  below for why this module is different).
8
9                         Written by Philip Hazel                         Written by Philip Hazel
10             Copyright (c) 1997-2011 University of Cambridge             Copyright (c) 1997-2013 University of Cambridge
11
12  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
13  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 38  POSSIBILITY OF SUCH DAMAGE. Line 38  POSSIBILITY OF SUCH DAMAGE.
38  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
39  */  */
40

41  /* This module contains the external function pcre_dfa_exec(), which is an  /* This module contains the external function pcre_dfa_exec(), which is an
42  alternative matching function that uses a sort of DFA algorithm (not a true  alternative matching function that uses a sort of DFA algorithm (not a true
43  FSM). This is NOT Perl- compatible, but it has advantages in certain  FSM). This is NOT Perl-compatible, but it has advantages in certain
44  applications. */  applications. */
45
46
# Line 113  small value. Non-zero values in the tabl Line 112  small value. Non-zero values in the tabl
112  the character is to be found. ***NOTE*** If the start of this table is  the character is to be found. ***NOTE*** If the start of this table is
113  modified, the three tables that follow must also be modified. */  modified, the three tables that follow must also be modified. */
114
115  static const uschar coptable[] = {  static const pcre_uint8 coptable[] = {
116    0,                             /* End                                    */    0,                             /* End                                    */
117    0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */    0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */
118    0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */    0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */
# Line 121  static const uschar coptable[] = { Line 120  static const uschar coptable[] = {
120    0, 0,                          /* \P, \p                                 */    0, 0,                          /* \P, \p                                 */
121    0, 0, 0, 0, 0,                 /* \R, \H, \h, \V, \v                     */    0, 0, 0, 0, 0,                 /* \R, \H, \h, \V, \v                     */
122    0,                             /* \X                                     */    0,                             /* \X                                     */
123    0, 0, 0, 0, 0, 0,              /* \Z, \z, ^, ^M, \$, \$M                   */    0, 0, 0, 0, 0, 0,              /* \Z, \z, \$, \$M, ^, ^M                   */
124    1,                             /* Char                                   */    1,                             /* Char                                   */
125    1,                             /* Chari                                  */    1,                             /* Chari                                  */
126    1,                             /* not                                    */    1,                             /* not                                    */
127    1,                             /* noti                                   */    1,                             /* noti                                   */
128    /* Positive single-char repeats                                          */    /* Positive single-char repeats                                          */
129    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
130    3, 3, 3,                       /* upto, minupto, exact                   */    1+IMM2_SIZE, 1+IMM2_SIZE,      /* upto, minupto                          */
131    1, 1, 1, 3,                    /* *+, ++, ?+, upto+                      */    1+IMM2_SIZE,                   /* exact                                  */
132      1, 1, 1, 1+IMM2_SIZE,          /* *+, ++, ?+, upto+                      */
133    1, 1, 1, 1, 1, 1,              /* *I, *?I, +I, +?I, ?I, ??I              */    1, 1, 1, 1, 1, 1,              /* *I, *?I, +I, +?I, ?I, ??I              */
134    3, 3, 3,                       /* upto I, minupto I, exact I             */    1+IMM2_SIZE, 1+IMM2_SIZE,      /* upto I, minupto I                      */
135    1, 1, 1, 3,                    /* *+I, ++I, ?+I, upto+I                  */    1+IMM2_SIZE,                   /* exact I                                */
136      1, 1, 1, 1+IMM2_SIZE,          /* *+I, ++I, ?+I, upto+I                  */
137    /* Negative single-char repeats - only for chars < 256                   */    /* Negative single-char repeats - only for chars < 256                   */
138    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */
139    3, 3, 3,                       /* NOT upto, minupto, exact               */    1+IMM2_SIZE, 1+IMM2_SIZE,      /* NOT upto, minupto                      */
140    1, 1, 1, 3,                    /* NOT *+, ++, ?+, upto+                  */    1+IMM2_SIZE,                   /* NOT exact                              */
141      1, 1, 1, 1+IMM2_SIZE,          /* NOT *+, ++, ?+, upto+                  */
142    1, 1, 1, 1, 1, 1,              /* NOT *I, *?I, +I, +?I, ?I, ??I          */    1, 1, 1, 1, 1, 1,              /* NOT *I, *?I, +I, +?I, ?I, ??I          */
143    3, 3, 3,                       /* NOT upto I, minupto I, exact I         */    1+IMM2_SIZE, 1+IMM2_SIZE,      /* NOT upto I, minupto I                  */
144    1, 1, 1, 3,                    /* NOT *+I, ++I, ?+I, upto+I              */    1+IMM2_SIZE,                   /* NOT exact I                            */
145      1, 1, 1, 1+IMM2_SIZE,          /* NOT *+I, ++I, ?+I, upto+I              */
146    /* Positive type repeats                                                 */    /* Positive type repeats                                                 */
147    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */
148    3, 3, 3,                       /* Type upto, minupto, exact              */    1+IMM2_SIZE, 1+IMM2_SIZE,      /* Type upto, minupto                     */
149    1, 1, 1, 3,                    /* Type *+, ++, ?+, upto+                 */    1+IMM2_SIZE,                   /* Type exact                             */
150      1, 1, 1, 1+IMM2_SIZE,          /* Type *+, ++, ?+, upto+                 */
151    /* Character class & ref repeats                                         */    /* Character class & ref repeats                                         */
152    0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */    0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */
153    0, 0,                          /* CRRANGE, CRMINRANGE                    */    0, 0,                          /* CRRANGE, CRMINRANGE                    */
154      0, 0, 0, 0,                    /* Possessive *+, ++, ?+, CRPOSRANGE      */
155    0,                             /* CLASS                                  */    0,                             /* CLASS                                  */
156    0,                             /* NCLASS                                 */    0,                             /* NCLASS                                 */
157    0,                             /* XCLASS - variable length               */    0,                             /* XCLASS - variable length               */
158    0,                             /* REF                                    */    0,                             /* REF                                    */
159    0,                             /* REFI                                   */    0,                             /* REFI                                   */
160      0,                             /* DNREF                                  */
161      0,                             /* DNREFI                                 */
162    0,                             /* RECURSE                                */    0,                             /* RECURSE                                */
163    0,                             /* CALLOUT                                */    0,                             /* CALLOUT                                */
164    0,                             /* Alt                                    */    0,                             /* Alt                                    */
# Line 164  static const uschar coptable[] = { Line 171  static const uschar coptable[] = {
171    0,                             /* Assert not                             */    0,                             /* Assert not                             */
172    0,                             /* Assert behind                          */    0,                             /* Assert behind                          */
173    0,                             /* Assert behind not                      */    0,                             /* Assert behind not                      */
174    0, 0, 0, 0, 0, 0,              /* ONCE, BRA, BRAPOS, CBRA, CBRAPOS, COND */    0, 0,                          /* ONCE, ONCE_NC                          */
175      0, 0, 0, 0, 0,                 /* BRA, BRAPOS, CBRA, CBRAPOS, COND       */
176    0, 0, 0, 0, 0,                 /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND  */    0, 0, 0, 0, 0,                 /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND  */
177    0, 0,                          /* CREF, NCREF                            */    0, 0,                          /* CREF, DNCREF                           */
178    0, 0,                          /* RREF, NRREF                            */    0, 0,                          /* RREF, DNRREF                           */
179    0,                             /* DEF                                    */    0,                             /* DEF                                    */
180    0, 0, 0,                       /* BRAZERO, BRAMINZERO, BRAPOSZERO        */    0, 0, 0,                       /* BRAZERO, BRAMINZERO, BRAPOSZERO        */
181    0, 0, 0,                       /* MARK, PRUNE, PRUNE_ARG                 */    0, 0, 0,                       /* MARK, PRUNE, PRUNE_ARG                 */
# Line 181  remember the fact that a character could Line 189  remember the fact that a character could
189  the subject is reached. ***NOTE*** If the start of this table is modified, the  the subject is reached. ***NOTE*** If the start of this table is modified, the
190  two tables that follow must also be modified. */  two tables that follow must also be modified. */
191
192  static const uschar poptable[] = {  static const pcre_uint8 poptable[] = {
193    0,                             /* End                                    */    0,                             /* End                                    */
194    0, 0, 0, 1, 1,                 /* \A, \G, \K, \B, \b                     */    0, 0, 0, 1, 1,                 /* \A, \G, \K, \B, \b                     */
195    1, 1, 1, 1, 1, 1,              /* \D, \d, \S, \s, \W, \w                 */    1, 1, 1, 1, 1, 1,              /* \D, \d, \S, \s, \W, \w                 */
# Line 189  static const uschar poptable[] = { Line 197  static const uschar poptable[] = {
197    1, 1,                          /* \P, \p                                 */    1, 1,                          /* \P, \p                                 */
198    1, 1, 1, 1, 1,                 /* \R, \H, \h, \V, \v                     */    1, 1, 1, 1, 1,                 /* \R, \H, \h, \V, \v                     */
199    1,                             /* \X                                     */    1,                             /* \X                                     */
200    0, 0, 0, 0, 0, 0,              /* \Z, \z, ^, ^M, \$, \$M                   */    0, 0, 0, 0, 0, 0,              /* \Z, \z, \$, \$M, ^, ^M                   */
201    1,                             /* Char                                   */    1,                             /* Char                                   */
202    1,                             /* Chari                                  */    1,                             /* Chari                                  */
203    1,                             /* not                                    */    1,                             /* not                                    */
# Line 215  static const uschar poptable[] = { Line 223  static const uschar poptable[] = {
223    /* Character class & ref repeats                                         */    /* Character class & ref repeats                                         */
224    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
225    1, 1,                          /* CRRANGE, CRMINRANGE                    */    1, 1,                          /* CRRANGE, CRMINRANGE                    */
226      1, 1, 1, 1,                    /* Possessive *+, ++, ?+, CRPOSRANGE      */
227    1,                             /* CLASS                                  */    1,                             /* CLASS                                  */
228    1,                             /* NCLASS                                 */    1,                             /* NCLASS                                 */
229    1,                             /* XCLASS - variable length               */    1,                             /* XCLASS - variable length               */
230    0,                             /* REF                                    */    0,                             /* REF                                    */
231    0,                             /* REFI                                   */    0,                             /* REFI                                   */
232      0,                             /* DNREF                                  */
233      0,                             /* DNREFI                                 */
234    0,                             /* RECURSE                                */    0,                             /* RECURSE                                */
235    0,                             /* CALLOUT                                */    0,                             /* CALLOUT                                */
236    0,                             /* Alt                                    */    0,                             /* Alt                                    */
# Line 232  static const uschar poptable[] = { Line 243  static const uschar poptable[] = {
243    0,                             /* Assert not                             */    0,                             /* Assert not                             */
244    0,                             /* Assert behind                          */    0,                             /* Assert behind                          */
245    0,                             /* Assert behind not                      */    0,                             /* Assert behind not                      */
246    0, 0, 0, 0, 0, 0,              /* ONCE, BRA, BRAPOS, CBRA, CBRAPOS, COND */    0, 0,                          /* ONCE, ONCE_NC                          */
247      0, 0, 0, 0, 0,                 /* BRA, BRAPOS, CBRA, CBRAPOS, COND       */
248    0, 0, 0, 0, 0,                 /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND  */    0, 0, 0, 0, 0,                 /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND  */
249    0, 0,                          /* CREF, NCREF                            */    0, 0,                          /* CREF, DNCREF                           */
250    0, 0,                          /* RREF, NRREF                            */    0, 0,                          /* RREF, DNRREF                           */
251    0,                             /* DEF                                    */    0,                             /* DEF                                    */
252    0, 0, 0,                       /* BRAZERO, BRAMINZERO, BRAPOSZERO        */    0, 0, 0,                       /* BRAZERO, BRAMINZERO, BRAPOSZERO        */
253    0, 0, 0,                       /* MARK, PRUNE, PRUNE_ARG                 */    0, 0, 0,                       /* MARK, PRUNE, PRUNE_ARG                 */
# Line 247  static const uschar poptable[] = { Line 259  static const uschar poptable[] = {
259  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
260  and \w */  and \w */
261
262  static const uschar toptable1[] = {  static const pcre_uint8 toptable1[] = {
263    0, 0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
264    ctype_digit, ctype_digit,    ctype_digit, ctype_digit,
265    ctype_space, ctype_space,    ctype_space, ctype_space,
# Line 255  static const uschar toptable1[] = { Line 267  static const uschar toptable1[] = {
267    0, 0                            /* OP_ANY, OP_ALLANY */    0, 0                            /* OP_ANY, OP_ALLANY */
268  };  };
269
270  static const uschar toptable2[] = {  static const pcre_uint8 toptable2[] = {
271    0, 0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
272    ctype_digit, 0,    ctype_digit, 0,
273    ctype_space, 0,    ctype_space, 0,
# Line 275  typedef struct stateblock { Line 287  typedef struct stateblock {
287    int data;                       /* Some use extra data */    int data;                       /* Some use extra data */
288  } stateblock;  } stateblock;
289
290  #define INTS_PER_STATEBLOCK  (sizeof(stateblock)/sizeof(int))  #define INTS_PER_STATEBLOCK  (int)(sizeof(stateblock)/sizeof(int))
291
292
293  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
# Line 294  Returns:       nothing Line 306  Returns:       nothing
306  */  */
307
308  static void  static void
309  pchars(unsigned char *p, int length, FILE *f)  pchars(const pcre_uchar *p, int length, FILE *f)
310  {  {
311  int c;  pcre_uint32 c;
312  while (length-- > 0)  while (length-- > 0)
313    {    {
314    if (isprint(c = *(p++)))    if (isprint(c = *(p++)))
315      fprintf(f, "%c", c);      fprintf(f, "%c", c);
316    else    else
317      fprintf(f, "\\x%02x", c);      fprintf(f, "\\x{%02x}", c);
318    }    }
319  }  }
320  #endif  #endif
# Line 375  for the current character, one for the f Line 387  for the current character, one for the f
387      next_new_state->count  = (y); \      next_new_state->count  = (y); \
388      next_new_state->data   = (z); \      next_new_state->data   = (z); \
389      next_new_state++; \      next_new_state++; \
390      DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \      DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d) line %d\n", rlevel*2-2, SP, \
391          (x), (y), (z), __LINE__)); \
392      } \      } \
393    else return PCRE_ERROR_DFA_WSSIZE    else return PCRE_ERROR_DFA_WSSIZE
394
# Line 384  for the current character, one for the f Line 397  for the current character, one for the f
397  static int  static int
398  internal_dfa_exec(  internal_dfa_exec(
399    dfa_match_data *md,    dfa_match_data *md,
400    const uschar *this_start_code,    const pcre_uchar *this_start_code,
401    const uschar *current_subject,    const pcre_uchar *current_subject,
402    int start_offset,    int start_offset,
403    int *offsets,    int *offsets,
404    int offsetcount,    int offsetcount,
# Line 396  internal_dfa_exec( Line 409  internal_dfa_exec(
409  stateblock *active_states, *new_states, *temp_states;  stateblock *active_states, *new_states, *temp_states;
410  stateblock *next_active_state, *next_new_state;  stateblock *next_active_state, *next_new_state;
411
412  const uschar *ctypes, *lcc, *fcc;  const pcre_uint8 *ctypes, *lcc, *fcc;
413  const uschar *ptr;  const pcre_uchar *ptr;
414  const uschar *end_code, *first_op;  const pcre_uchar *end_code, *first_op;
415
416  dfa_recursion_info new_recursive;  dfa_recursion_info new_recursive;
417
# Line 407  int active_count, new_count, match_count Line 420  int active_count, new_count, match_count
420  /* Some fields in the md block are frequently referenced, so we load them into  /* Some fields in the md block are frequently referenced, so we load them into
421  independent variables in the hope that this will perform better. */  independent variables in the hope that this will perform better. */
422
423  const uschar *start_subject = md->start_subject;  const pcre_uchar *start_subject = md->start_subject;
424  const uschar *end_subject = md->end_subject;  const pcre_uchar *end_subject = md->end_subject;
425  const uschar *start_code = md->start_code;  const pcre_uchar *start_code = md->start_code;
426
427  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
428  BOOL utf8 = (md->poptions & PCRE_UTF8) != 0;  BOOL utf = (md->poptions & PCRE_UTF8) != 0;
429  #else  #else
430  BOOL utf8 = FALSE;  BOOL utf = FALSE;
431  #endif  #endif
432
433    BOOL reset_could_continue = FALSE;
434
435  rlevel++;  rlevel++;
436  offsetcount &= (-2);  offsetcount &= (-2);
437
# Line 440  new_count = 0; Line 455  new_count = 0;
455
456  first_op = this_start_code + 1 + LINK_SIZE +  first_op = this_start_code + 1 + LINK_SIZE +
457    ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||    ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
458      *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)? 2:0);      *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
459        ? IMM2_SIZE:0);
460
461  /* The first thing in any (sub) pattern is a bracket of some sort. Push all  /* The first thing in any (sub) pattern is a bracket of some sort. Push all
462  the alternative states onto the list, and find out where the end is. This  the alternative states onto the list, and find out where the end is. This
# Line 468  if (*first_op == OP_REVERSE) Line 484  if (*first_op == OP_REVERSE)
484    /* If we can't go back the amount required for the longest lookbehind    /* If we can't go back the amount required for the longest lookbehind
485    pattern, go back as far as we can; some alternatives may still be viable. */    pattern, go back as far as we can; some alternatives may still be viable. */
486
487  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
488    /* In character mode we have to step back character by character */    /* In character mode we have to step back character by character */
489
490    if (utf8)    if (utf)
491      {      {
492      for (gone_back = 0; gone_back < max_back; gone_back++)      for (gone_back = 0; gone_back < max_back; gone_back++)
493        {        {
494        if (current_subject <= start_subject) break;        if (current_subject <= start_subject) break;
495        current_subject--;        current_subject--;
496        while (current_subject > start_subject &&        ACROSSCHAR(current_subject > start_subject, *current_subject, current_subject--);
(*current_subject & 0xc0) == 0x80)
current_subject--;
497        }        }
498      }      }
499    else    else
# Line 540  else Line 554  else
554      {      {
555      int length = 1 + LINK_SIZE +      int length = 1 + LINK_SIZE +
556        ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||        ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
557          *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)?          *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
558          2:0);          ? IMM2_SIZE:0);
559      do      do
560        {        {
561        ADD_NEW((int)(end_code - start_code + length), 0);        ADD_NEW((int)(end_code - start_code + length), 0);
# Line 554  else Line 568  else
568
569  workspace[0] = 0;    /* Bit indicating which vector is current */  workspace[0] = 0;    /* Bit indicating which vector is current */
570
571  DPRINTF(("%.*sEnd state = %d\n", rlevel*2-2, SP, end_code - start_code));  DPRINTF(("%.*sEnd state = %d\n", rlevel*2-2, SP, (int)(end_code - start_code)));
572
573  /* Loop for scanning the subject */  /* Loop for scanning the subject */
574
# Line 563  for (;;) Line 577  for (;;)
577    {    {
578    int i, j;    int i, j;
579    int clen, dlen;    int clen, dlen;
580    unsigned int c, d;    pcre_uint32 c, d;
581    int forced_fail = 0;    int forced_fail = 0;
582    BOOL could_continue = FALSE;    BOOL partial_newline = FALSE;
583      BOOL could_continue = reset_could_continue;
584      reset_could_continue = FALSE;
585
586    /* Make the new state list into the active state list and empty the    /* Make the new state list into the active state list and empty the
587    new state list. */    new state list. */
# Line 581  for (;;) Line 597  for (;;)
597
598  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
599    printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP);    printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP);
600    pchars((uschar *)ptr, strlen((char *)ptr), stdout);    pchars(ptr, STRLEN_UC(ptr), stdout);
601    printf("\"\n");    printf("\"\n");
602
603    printf("%.*sActive states: ", rlevel*2-2, SP);    printf("%.*sActive states: ", rlevel*2-2, SP);
# Line 601  for (;;) Line 617  for (;;)
617
618    if (ptr < end_subject)    if (ptr < end_subject)
619      {      {
620      clen = 1;        /* Number of bytes in the character */      clen = 1;        /* Number of data items in the character */
621  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
622      if (utf8) { GETCHARLEN(c, ptr, clen); } else      GETCHARLENTEST(c, ptr, clen);
623  #endif  /* SUPPORT_UTF8 */  #else
624      c = *ptr;      c = *ptr;
625    #endif  /* SUPPORT_UTF */
626      }      }
627    else    else
628      {      {
# Line 621  for (;;) Line 638  for (;;)
638    for (i = 0; i < active_count; i++)    for (i = 0; i < active_count; i++)
639      {      {
640      stateblock *current_state = active_states + i;      stateblock *current_state = active_states + i;
641      BOOL caseless = FALSE;      BOOL caseless = FALSE;
642      const uschar *code;      const pcre_uchar *code;
643      int state_offset = current_state->offset;      int state_offset = current_state->offset;
644      int count, codevalue, rrc;      int codevalue, rrc;
645        int count;
646
647  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
648      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
# Line 635  for (;;) Line 653  for (;;)
653
654      /* A negative offset is a special case meaning "hold off going to this      /* A negative offset is a special case meaning "hold off going to this
655      (negated) state until the number of characters in the data field have      (negated) state until the number of characters in the data field have
656      been skipped". */      been skipped". If the could_continue flag was passed over from a previous
657        state, arrange for it to passed on. */
658
659      if (state_offset < 0)      if (state_offset < 0)
660        {        {
# Line 644  for (;;) Line 663  for (;;)
663          DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP));          DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP));
665            current_state->data - 1);            current_state->data - 1);
666            if (could_continue) reset_could_continue = TRUE;
667          continue;          continue;
668          }          }
669        else        else
# Line 683  for (;;) Line 703  for (;;)
703      permitted.      permitted.
704
705      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
706      argument that is not a data character - but is always one byte long. We      argument that is not a data character - but is always one byte long because
707      have to take special action to deal with  \P, \p, \H, \h, \V, \v and \X in      the values are small. We have to take special action to deal with  \P, \p,
708      this case. To keep the other cases fast, convert these ones to new opcodes.      \H, \h, \V, \v and \X in this case. To keep the other cases fast, convert
709      */      these ones to new opcodes. */
710
711      if (coptable[codevalue] > 0)      if (coptable[codevalue] > 0)
712        {        {
713        dlen = 1;        dlen = 1;
714  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
715        if (utf8) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else        if (utf) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else
716  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
717        d = code[coptable[codevalue]];        d = code[coptable[codevalue]];
718        if (codevalue >= OP_TYPESTAR)        if (codevalue >= OP_TYPESTAR)
719          {          {
# Line 738  for (;;) Line 758  for (;;)
758
759  /* ========================================================================== */  /* ========================================================================== */
760        /* Reached a closing bracket. If not at the end of the pattern, carry        /* Reached a closing bracket. If not at the end of the pattern, carry
761        on with the next opcode. For repeating opcodes, also add the repeat        on with the next opcode. For repeating opcodes, also add the repeat
762        state. Note that KETRPOS will always be encountered at the end of the        state. Note that KETRPOS will always be encountered at the end of the
763        subpattern, because the possessive subpattern repeats are always handled        subpattern, because the possessive subpattern repeats are always handled
764        using recursive calls. Thus, it never adds any new states.        using recursive calls. Thus, it never adds any new states.
765
766        At the end of the (sub)pattern, unless we have an empty string and        At the end of the (sub)pattern, unless we have an empty string and
767        PCRE_NOTEMPTY is set, or PCRE_NOTEMPTY_ATSTART is set and we are at the        PCRE_NOTEMPTY is set, or PCRE_NOTEMPTY_ATSTART is set and we are at the
768        start of the subject, save the match data, shifting up all previous        start of the subject, save the match data, shifting up all previous
# Line 751  for (;;) Line 771  for (;;)
771        case OP_KET:        case OP_KET:
772        case OP_KETRMIN:        case OP_KETRMIN:
773        case OP_KETRMAX:        case OP_KETRMAX:
774        case OP_KETRPOS:        case OP_KETRPOS:
775        if (code != end_code)        if (code != end_code)
776          {          {
# Line 768  for (;;) Line 788  for (;;)
788                  current_subject > start_subject + md->start_offset)))                  current_subject > start_subject + md->start_offset)))
789            {            {
790            if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;            if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;
791              else if (match_count > 0 && ++match_count * 2 >= offsetcount)              else if (match_count > 0 && ++match_count * 2 > offsetcount)
792                match_count = 0;                match_count = 0;
793            count = ((match_count == 0)? offsetcount : match_count * 2) - 2;            count = ((match_count == 0)? offsetcount : match_count * 2) - 2;
794            if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int));            if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int));
# Line 777  for (;;) Line 797  for (;;)
797              offsets[0] = (int)(current_subject - start_subject);              offsets[0] = (int)(current_subject - start_subject);
798              offsets[1] = (int)(ptr - start_subject);              offsets[1] = (int)(ptr - start_subject);
799              DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,              DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,
800                offsets[1] - offsets[0], current_subject));                offsets[1] - offsets[0], (char *)current_subject));
801              }              }
802            if ((md->moptions & PCRE_DFA_SHORTEST) != 0)            if ((md->moptions & PCRE_DFA_SHORTEST) != 0)
803              {              {
# Line 814  for (;;) Line 834  for (;;)
834        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
835        case OP_CBRA:        case OP_CBRA:
836        case OP_SCBRA:        case OP_SCBRA:
838        code += GET(code, 1);        code += GET(code, 1);
839        while (*code == OP_ALT)        while (*code == OP_ALT)
840          {          {
# Line 882  for (;;) Line 902  for (;;)
902        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
903        case OP_ANY:        case OP_ANY:
904        if (clen > 0 && !IS_NEWLINE(ptr))        if (clen > 0 && !IS_NEWLINE(ptr))
905          { ADD_NEW(state_offset + 1, 0); }          {
906            if (ptr + 1 >= md->end_subject &&
907                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
908                NLBLOCK->nltype == NLTYPE_FIXED &&
909                NLBLOCK->nllen == 2 &&
910                c == NLBLOCK->nl[0])
911              {
912              could_continue = partial_newline = TRUE;
913              }
914            else
915              {
917              }
918            }
919        break;        break;
920
921        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
# Line 910  for (;;) Line 943  for (;;)
943                 (ptr == end_subject - md->nllen)                 (ptr == end_subject - md->nllen)
944              ))              ))
945            { ADD_ACTIVE(state_offset + 1, 0); }            { ADD_ACTIVE(state_offset + 1, 0); }
946            else if (ptr + 1 >= md->end_subject &&
947                     (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
948                     NLBLOCK->nltype == NLTYPE_FIXED &&
949                     NLBLOCK->nllen == 2 &&
950                     c == NLBLOCK->nl[0])
951              {
952              if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
953                {
954                reset_could_continue = TRUE;
955                ADD_NEW_DATA(-(state_offset + 1), 0, 1);
956                }
957              else could_continue = partial_newline = TRUE;
958              }
959          }          }
960        break;        break;
961
# Line 922  for (;;) Line 968  for (;;)
968          else if (clen == 0 ||          else if (clen == 0 ||
969              ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr)))              ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr)))
970            { ADD_ACTIVE(state_offset + 1, 0); }            { ADD_ACTIVE(state_offset + 1, 0); }
971            else if (ptr + 1 >= md->end_subject &&
972                     (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
973                     NLBLOCK->nltype == NLTYPE_FIXED &&
974                     NLBLOCK->nllen == 2 &&
975                     c == NLBLOCK->nl[0])
976              {
977              if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
978                {
979                reset_could_continue = TRUE;
980                ADD_NEW_DATA(-(state_offset + 1), 0, 1);
981                }
982              else could_continue = partial_newline = TRUE;
983              }
984          }          }
985        else if (IS_NEWLINE(ptr))        else if (IS_NEWLINE(ptr))
986          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
# Line 954  for (;;) Line 1013  for (;;)
1013
1014          if (ptr > start_subject)          if (ptr > start_subject)
1015            {            {
1016            const uschar *temp = ptr - 1;            const pcre_uchar *temp = ptr - 1;
1017            if (temp < md->start_used_ptr) md->start_used_ptr = temp;            if (temp < md->start_used_ptr) md->start_used_ptr = temp;
1018  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
1019            if (utf8) BACKCHAR(temp);            if (utf) { BACKCHAR(temp); }
1020  #endif  #endif
1021            GETCHARTEST(d, temp);            GETCHARTEST(d, temp);
1022  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 1009  for (;;) Line 1068  for (;;)
1068        if (clen > 0)        if (clen > 0)
1069          {          {
1070          BOOL OK;          BOOL OK;
1071            const pcre_uint32 *cp;
1072          const ucd_record * prop = GET_UCD(c);          const ucd_record * prop = GET_UCD(c);
1073          switch(code[1])          switch(code[1])
1074            {            {
# Line 1022  for (;;) Line 1082  for (;;)
1082            break;            break;
1083
1084            case PT_GC:            case PT_GC:
1085            OK = _pcre_ucp_gentype[prop->chartype] == code[2];            OK = PRIV(ucp_gentype)[prop->chartype] == code[2];
1086            break;            break;
1087
1088            case PT_PC:            case PT_PC:
# Line 1036  for (;;) Line 1096  for (;;)
1096            /* These are specials for combination cases. */            /* These are specials for combination cases. */
1097
1098            case PT_ALNUM:            case PT_ALNUM:
1099            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1100                 _pcre_ucp_gentype[prop->chartype] == ucp_N;                 PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1101            break;            break;
1102
1103            case PT_SPACE:    /* Perl space */            /* Perl space used to exclude VT, but from Perl 5.18 it is included,
1104            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            which means that Perl space and POSIX space are now identical. PCRE
1105                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;            was changed at release 8.34. */
break;
1106
1107              case PT_SPACE:    /* Perl space */
1108            case PT_PXSPACE:  /* POSIX space */            case PT_PXSPACE:  /* POSIX space */
1109            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            switch(c)
1110                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||              {
1111                 c == CHAR_FF || c == CHAR_CR;              HSPACE_CASES:
1112                VSPACE_CASES:
1113                OK = TRUE;
1114                break;
1115
1116                default:
1117                OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
1118                break;
1119                }
1120            break;            break;
1121
1122            case PT_WORD:            case PT_WORD:
1123            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1124                 _pcre_ucp_gentype[prop->chartype] == ucp_N ||                 PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1125                 c == CHAR_UNDERSCORE;                 c == CHAR_UNDERSCORE;
1126            break;            break;
1127
1128              case PT_CLIST:
1129              cp = PRIV(ucd_caseless_sets) + code[2];
1130              for (;;)
1131                {
1132                if (c < *cp) { OK = FALSE; break; }
1133                if (c == *cp++) { OK = TRUE; break; }
1134                }
1135              break;
1136
1137              case PT_UCNC:
1138              OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
1139                   c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
1140                   c >= 0xe000;
1141              break;
1142
1143            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
1144
1145            default:            default:
# Line 1084  for (;;) Line 1167  for (;;)
1167        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1168        if (clen > 0)        if (clen > 0)
1169          {          {
1170          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1171                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1172                NLBLOCK->nltype == NLTYPE_FIXED &&
1173                NLBLOCK->nllen == 2 &&
1174                c == NLBLOCK->nl[0])
1175              {
1176              could_continue = partial_newline = TRUE;
1177              }
1178            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1179              (c < 256 &&              (c < 256 &&
1180                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1181                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
# Line 1107  for (;;) Line 1198  for (;;)
1199        if (clen > 0)        if (clen > 0)
1200          {          {
1201          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1202                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1203                NLBLOCK->nltype == NLTYPE_FIXED &&
1204                NLBLOCK->nllen == 2 &&
1205                c == NLBLOCK->nl[0])
1206              {
1207              could_continue = partial_newline = TRUE;
1208              }
1209            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1210              (c < 256 &&              (c < 256 &&
1211                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1212                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
# Line 1129  for (;;) Line 1228  for (;;)
1229        if (clen > 0)        if (clen > 0)
1230          {          {
1231          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1232                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1233                NLBLOCK->nltype == NLTYPE_FIXED &&
1234                NLBLOCK->nllen == 2 &&
1235                c == NLBLOCK->nl[0])
1236              {
1237              could_continue = partial_newline = TRUE;
1238              }
1239            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1240              (c < 256 &&              (c < 256 &&
1241                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1242                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
# Line 1149  for (;;) Line 1256  for (;;)
1256        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1257        if (clen > 0)        if (clen > 0)
1258          {          {
1259          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1260                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1261                NLBLOCK->nltype == NLTYPE_FIXED &&
1262                NLBLOCK->nllen == 2 &&
1263                c == NLBLOCK->nl[0])
1264              {
1265              could_continue = partial_newline = TRUE;
1266              }
1267            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1268              (c < 256 &&              (c < 256 &&
1269                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1270                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
1271            {            {
1272            if (++count >= GET2(code, 1))            if (++count >= (int)GET2(code, 1))
1273              { ADD_NEW(state_offset + 4, 0); }              { ADD_NEW(state_offset + 1 + IMM2_SIZE + 1, 0); }
1274            else            else
1276            }            }
# Line 1166  for (;;) Line 1281  for (;;)
1281        case OP_TYPEUPTO:        case OP_TYPEUPTO:
1282        case OP_TYPEMINUPTO:        case OP_TYPEMINUPTO:
1283        case OP_TYPEPOSUPTO:        case OP_TYPEPOSUPTO:
1285        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1286        if (clen > 0)        if (clen > 0)
1287          {          {
1288          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1289                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1290                NLBLOCK->nltype == NLTYPE_FIXED &&
1291                NLBLOCK->nllen == 2 &&
1292                c == NLBLOCK->nl[0])
1293              {
1294              could_continue = partial_newline = TRUE;
1295              }
1296            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1297              (c < 256 &&              (c < 256 &&
1298                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1299                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
# Line 1180  for (;;) Line 1303  for (;;)
1303              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
1304              next_active_state--;              next_active_state--;
1305              }              }
1306            if (++count >= GET2(code, 1))            if (++count >= (int)GET2(code, 1))
1307              { ADD_NEW(state_offset + 4, 0); }              { ADD_NEW(state_offset + 2 + IMM2_SIZE, 0); }
1308            else            else
1310            }            }
# Line 1203  for (;;) Line 1326  for (;;)
1326        if (clen > 0)        if (clen > 0)
1327          {          {
1328          BOOL OK;          BOOL OK;
1329            const pcre_uint32 *cp;
1330          const ucd_record * prop = GET_UCD(c);          const ucd_record * prop = GET_UCD(c);
1331          switch(code[2])          switch(code[2])
1332            {            {
# Line 1216  for (;;) Line 1340  for (;;)
1340            break;            break;
1341
1342            case PT_GC:            case PT_GC:
1343            OK = _pcre_ucp_gentype[prop->chartype] == code[3];            OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
1344            break;            break;
1345
1346            case PT_PC:            case PT_PC:
# Line 1230  for (;;) Line 1354  for (;;)
1354            /* These are specials for combination cases. */            /* These are specials for combination cases. */
1355
1356            case PT_ALNUM:            case PT_ALNUM:
1357            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1358                 _pcre_ucp_gentype[prop->chartype] == ucp_N;                 PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1359            break;            break;
1360
1361            case PT_SPACE:    /* Perl space */            /* Perl space used to exclude VT, but from Perl 5.18 it is included,
1362            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            which means that Perl space and POSIX space are now identical. PCRE
1363                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;            was changed at release 8.34. */
break;
1364
1365              case PT_SPACE:    /* Perl space */
1366            case PT_PXSPACE:  /* POSIX space */            case PT_PXSPACE:  /* POSIX space */
1367            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            switch(c)
1368                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||              {
1369                 c == CHAR_FF || c == CHAR_CR;              HSPACE_CASES:
1370                VSPACE_CASES:
1371                OK = TRUE;
1372                break;
1373
1374                default:
1375                OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
1376                break;
1377                }
1378            break;            break;
1379
1380            case PT_WORD:            case PT_WORD:
1381            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1382                 _pcre_ucp_gentype[prop->chartype] == ucp_N ||                 PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1383                 c == CHAR_UNDERSCORE;                 c == CHAR_UNDERSCORE;
1384            break;            break;
1385
1386              case PT_CLIST:
1387              cp = PRIV(ucd_caseless_sets) + code[3];
1388              for (;;)
1389                {
1390                if (c < *cp) { OK = FALSE; break; }
1391                if (c == *cp++) { OK = TRUE; break; }
1392                }
1393              break;
1394
1395              case PT_UCNC:
1396              OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
1397                   c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
1398                   c >= 0xe000;
1399              break;
1400
1401            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
1402
1403            default:            default:
# Line 1277  for (;;) Line 1424  for (;;)
1424        case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:        case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
1425        count = current_state->count;  /* Already matched */        count = current_state->count;  /* Already matched */
1426        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1427        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0)
1428          {          {
1429          const uschar *nptr = ptr + clen;          int lgb, rgb;
1430            const pcre_uchar *nptr = ptr + clen;
1431          int ncount = 0;          int ncount = 0;
1432          if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)          if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
1433            {            {
1434            active_count--;           /* Remove non-match possibility */            active_count--;           /* Remove non-match possibility */
1435            next_active_state--;            next_active_state--;
1436            }            }
1437            lgb = UCD_GRAPHBREAK(c);
1438          while (nptr < end_subject)          while (nptr < end_subject)
1439            {            {
1440            int nd;            dlen = 1;
1441            int ndlen = 1;            if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
1442            GETCHARLEN(nd, nptr, ndlen);            rgb = UCD_GRAPHBREAK(d);
1443            if (UCD_CATEGORY(nd) != ucp_M) break;            if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
1444            ncount++;            ncount++;
1445            nptr += ndlen;            lgb = rgb;
1446              nptr += dlen;
1447            }            }
1448          count++;          count++;
# Line 1312  for (;;) Line 1462  for (;;)
1462          int ncount = 0;          int ncount = 0;
1463          switch (c)          switch (c)
1464            {            {
1465            case 0x000b:            case CHAR_VT:
1466            case 0x000c:            case CHAR_FF:
1467            case 0x0085:            case CHAR_NEL:
1468    #ifndef EBCDIC
1469            case 0x2028:            case 0x2028:
1470            case 0x2029:            case 0x2029:
1471    #endif  /* Not EBCDIC */
1472            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1473            goto ANYNL01;            goto ANYNL01;
1474
1475            case 0x000d:            case CHAR_CR:
1476            if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;            if (ptr + 1 < end_subject && ptr[1] == CHAR_LF) ncount = 1;
1477            /* Fall through */            /* Fall through */
1478
1479            ANYNL01:            ANYNL01:
1480            case 0x000a:            case CHAR_LF:
1481            if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)            if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
1482              {              {
1483              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
# Line 1352  for (;;) Line 1504  for (;;)
1504          BOOL OK;          BOOL OK;
1505          switch (c)          switch (c)
1506            {            {
1507            case 0x000a:            VSPACE_CASES:
case 0x000b:
case 0x000c:
case 0x000d:
case 0x0085:
case 0x2028:
case 0x2029:
1508            OK = TRUE;            OK = TRUE;
1509            break;            break;
1510
# Line 1391  for (;;) Line 1537  for (;;)
1537          BOOL OK;          BOOL OK;
1538          switch (c)          switch (c)
1539            {            {
1540            case 0x09:      /* HT */            HSPACE_CASES:
case 0x20:      /* SPACE */
case 0xa0:      /* NBSP */
case 0x1680:    /* OGHAM SPACE MARK */
case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
case 0x2000:    /* EN QUAD */
case 0x2001:    /* EM QUAD */
case 0x2002:    /* EN SPACE */
case 0x2003:    /* EM SPACE */
case 0x2004:    /* THREE-PER-EM SPACE */
case 0x2005:    /* FOUR-PER-EM SPACE */
case 0x2006:    /* SIX-PER-EM SPACE */
case 0x2007:    /* FIGURE SPACE */
case 0x2008:    /* PUNCTUATION SPACE */
case 0x2009:    /* THIN SPACE */
case 0x200A:    /* HAIR SPACE */
case 0x202f:    /* NARROW NO-BREAK SPACE */
case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
case 0x3000:    /* IDEOGRAPHIC SPACE */
1541            OK = TRUE;            OK = TRUE;
1542            break;            break;
1543
# Line 1450  for (;;) Line 1578  for (;;)
1578        if (clen > 0)        if (clen > 0)
1579          {          {
1580          BOOL OK;          BOOL OK;
1581            const pcre_uint32 *cp;
1582          const ucd_record * prop = GET_UCD(c);          const ucd_record * prop = GET_UCD(c);
1583          switch(code[2])          switch(code[2])
1584            {            {
# Line 1463  for (;;) Line 1592  for (;;)
1592            break;            break;
1593
1594            case PT_GC:            case PT_GC:
1595            OK = _pcre_ucp_gentype[prop->chartype] == code[3];            OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
1596            break;            break;
1597
1598            case PT_PC:            case PT_PC:
# Line 1477  for (;;) Line 1606  for (;;)
1606            /* These are specials for combination cases. */            /* These are specials for combination cases. */
1607
1608            case PT_ALNUM:            case PT_ALNUM:
1609            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1610                 _pcre_ucp_gentype[prop->chartype] == ucp_N;                 PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1611            break;            break;
1612
1613            case PT_SPACE:    /* Perl space */            /* Perl space used to exclude VT, but from Perl 5.18 it is included,
1614            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            which means that Perl space and POSIX space are now identical. PCRE
1615                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;            was changed at release 8.34. */
break;
1616
1617              case PT_SPACE:    /* Perl space */
1618            case PT_PXSPACE:  /* POSIX space */            case PT_PXSPACE:  /* POSIX space */
1619            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            switch(c)
1620                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||              {
1621                 c == CHAR_FF || c == CHAR_CR;              HSPACE_CASES:
1622                VSPACE_CASES:
1623                OK = TRUE;
1624                break;
1625
1626                default:
1627                OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
1628                break;
1629                }
1630            break;            break;
1631
1632            case PT_WORD:            case PT_WORD:
1633            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1634                 _pcre_ucp_gentype[prop->chartype] == ucp_N ||                 PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1635                 c == CHAR_UNDERSCORE;                 c == CHAR_UNDERSCORE;
1636            break;            break;
1637
1638              case PT_CLIST:
1639              cp = PRIV(ucd_caseless_sets) + code[3];
1640              for (;;)
1641                {
1642                if (c < *cp) { OK = FALSE; break; }
1643                if (c == *cp++) { OK = TRUE; break; }
1644                }
1645              break;
1646
1647              case PT_UCNC:
1648              OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
1649                   c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
1650                   c >= 0xe000;
1651              break;
1652
1653            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
1654
1655            default:            default:
# Line 1533  for (;;) Line 1685  for (;;)
1685        QS2:        QS2:
1686
1688        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0)
1689          {          {
1690          const uschar *nptr = ptr + clen;          int lgb, rgb;
1691            const pcre_uchar *nptr = ptr + clen;
1692          int ncount = 0;          int ncount = 0;
1693          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
1694              codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)              codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)
# Line 1543  for (;;) Line 1696  for (;;)
1696            active_count--;           /* Remove non-match possibility */            active_count--;           /* Remove non-match possibility */
1697            next_active_state--;            next_active_state--;
1698            }            }
1699            lgb = UCD_GRAPHBREAK(c);
1700          while (nptr < end_subject)          while (nptr < end_subject)
1701            {            {
1702            int nd;            dlen = 1;
1703            int ndlen = 1;            if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
1704            GETCHARLEN(nd, nptr, ndlen);            rgb = UCD_GRAPHBREAK(d);
1705            if (UCD_CATEGORY(nd) != ucp_M) break;            if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
1706            ncount++;            ncount++;
1707            nptr += ndlen;            lgb = rgb;
1708              nptr += dlen;
1709            }            }
1711          }          }
# Line 1576  for (;;) Line 1731  for (;;)
1731          int ncount = 0;          int ncount = 0;
1732          switch (c)          switch (c)
1733            {            {
1734            case 0x000b:            case CHAR_VT:
1735            case 0x000c:            case CHAR_FF:
1736            case 0x0085:            case CHAR_NEL:
1737    #ifndef EBCDIC
1738            case 0x2028:            case 0x2028:
1739            case 0x2029:            case 0x2029:
1740    #endif  /* Not EBCDIC */
1741            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1742            goto ANYNL02;            goto ANYNL02;
1743
1744            case 0x000d:            case CHAR_CR:
1745            if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;            if (ptr + 1 < end_subject && ptr[1] == CHAR_LF) ncount = 1;
1746            /* Fall through */            /* Fall through */
1747
1748            ANYNL02:            ANYNL02:
1749            case 0x000a:            case CHAR_LF:
1750            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||
1751                codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)                codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
1752              {              {
1753              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
1754              next_active_state--;              next_active_state--;
1755              }              }
1757            break;            break;
1758
1759            default:            default:
# Line 1624  for (;;) Line 1781  for (;;)
1781          BOOL OK;          BOOL OK;
1782          switch (c)          switch (c)
1783            {            {
1784            case 0x000a:            VSPACE_CASES:
case 0x000b:
case 0x000c:
case 0x000d:
case 0x0085:
case 0x2028:
case 0x2029:
1785            OK = TRUE;            OK = TRUE;
1786            break;            break;
1787
# Line 1646  for (;;) Line 1797  for (;;)
1797              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
1798              next_active_state--;              next_active_state--;
1799              }              }
1801            }            }
1802          }          }
1803        break;        break;
# Line 1670  for (;;) Line 1821  for (;;)
1821          BOOL OK;          BOOL OK;
1822          switch (c)          switch (c)
1823            {            {
1824            case 0x09:      /* HT */            HSPACE_CASES:
case 0x20:      /* SPACE */
case 0xa0:      /* NBSP */
case 0x1680:    /* OGHAM SPACE MARK */
case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
case 0x2000:    /* EN QUAD */
case 0x2001:    /* EM QUAD */
case 0x2002:    /* EN SPACE */
case 0x2003:    /* EM SPACE */
case 0x2004:    /* THREE-PER-EM SPACE */
case 0x2005:    /* FOUR-PER-EM SPACE */
case 0x2006:    /* SIX-PER-EM SPACE */
case 0x2007:    /* FIGURE SPACE */
case 0x2008:    /* PUNCTUATION SPACE */
case 0x2009:    /* THIN SPACE */
case 0x200A:    /* HAIR SPACE */
case 0x202f:    /* NARROW NO-BREAK SPACE */
case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
case 0x3000:    /* IDEOGRAPHIC SPACE */
1825            OK = TRUE;            OK = TRUE;
1826            break;            break;
1827
# Line 1705  for (;;) Line 1838  for (;;)
1838              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
1839              next_active_state--;              next_active_state--;
1840              }              }
1842            }            }
1843          }          }
1844        break;        break;
# Line 1717  for (;;) Line 1850  for (;;)
1850        case OP_PROP_EXTRA + OP_TYPEMINUPTO:        case OP_PROP_EXTRA + OP_TYPEMINUPTO:
1851        case OP_PROP_EXTRA + OP_TYPEPOSUPTO:        case OP_PROP_EXTRA + OP_TYPEPOSUPTO:
1852        if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)
1853          { ADD_ACTIVE(state_offset + 6, 0); }          { ADD_ACTIVE(state_offset + 1 + IMM2_SIZE + 3, 0); }
1854        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1855        if (clen > 0)        if (clen > 0)
1856          {          {
1857          BOOL OK;          BOOL OK;
1858            const pcre_uint32 *cp;
1859          const ucd_record * prop = GET_UCD(c);          const ucd_record * prop = GET_UCD(c);
1860          switch(code[4])          switch(code[1 + IMM2_SIZE + 1])
1861            {            {
1862            case PT_ANY:            case PT_ANY:
1863            OK = TRUE;            OK = TRUE;
# Line 1735  for (;;) Line 1869  for (;;)
1869            break;            break;
1870
1871            case PT_GC:            case PT_GC:
1872            OK = _pcre_ucp_gentype[prop->chartype] == code[5];            OK = PRIV(ucp_gentype)[prop->chartype] == code[1 + IMM2_SIZE + 2];
1873            break;            break;
1874
1875            case PT_PC:            case PT_PC:
1876            OK = prop->chartype == code[5];            OK = prop->chartype == code[1 + IMM2_SIZE + 2];
1877            break;            break;
1878
1879            case PT_SC:            case PT_SC:
1880            OK = prop->script == code[5];            OK = prop->script == code[1 + IMM2_SIZE + 2];
1881            break;            break;
1882
1883            /* These are specials for combination cases. */            /* These are specials for combination cases. */
1884
1885            case PT_ALNUM:            case PT_ALNUM:
1886            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1887                 _pcre_ucp_gentype[prop->chartype] == ucp_N;                 PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1888            break;            break;
1889
1890            case PT_SPACE:    /* Perl space */            /* Perl space used to exclude VT, but from Perl 5.18 it is included,
1891            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            which means that Perl space and POSIX space are now identical. PCRE
1892                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;            was changed at release 8.34. */
break;
1893
1894              case PT_SPACE:    /* Perl space */
1895            case PT_PXSPACE:  /* POSIX space */            case PT_PXSPACE:  /* POSIX space */
1896            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            switch(c)
1897                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||              {
1898                 c == CHAR_FF || c == CHAR_CR;              HSPACE_CASES:
1899                VSPACE_CASES:
1900                OK = TRUE;
1901                break;
1902
1903                default:
1904                OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
1905                break;
1906                }
1907            break;            break;
1908
1909            case PT_WORD:            case PT_WORD:
1910            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1911                 _pcre_ucp_gentype[prop->chartype] == ucp_N ||                 PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1912                 c == CHAR_UNDERSCORE;                 c == CHAR_UNDERSCORE;
1913            break;            break;
1914
1915              case PT_CLIST:
1916              cp = PRIV(ucd_caseless_sets) + code[1 + IMM2_SIZE + 2];
1917              for (;;)
1918                {
1919                if (c < *cp) { OK = FALSE; break; }
1920                if (c == *cp++) { OK = TRUE; break; }
1921                }
1922              break;
1923
1924              case PT_UCNC:
1925              OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
1926                   c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
1927                   c >= 0xe000;
1928              break;
1929
1930            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
1931
1932            default:            default:
# Line 1784  for (;;) Line 1941  for (;;)
1941              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
1942              next_active_state--;              next_active_state--;
1943              }              }
1944            if (++count >= GET2(code, 1))            if (++count >= (int)GET2(code, 1))
1945              { ADD_NEW(state_offset + 6, 0); }              { ADD_NEW(state_offset + 1 + IMM2_SIZE + 3, 0); }
1946            else            else
1948            }            }
# Line 1798  for (;;) Line 1955  for (;;)
1955        case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:        case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:
1956        case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:        case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:
1957        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
1958          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1959        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1960        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0)
1961          {          {
1962          const uschar *nptr = ptr + clen;          int lgb, rgb;
1963            const pcre_uchar *nptr = ptr + clen;
1964          int ncount = 0;          int ncount = 0;
1965          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
1966            {            {
1967            active_count--;           /* Remove non-match possibility */            active_count--;           /* Remove non-match possibility */
1968            next_active_state--;            next_active_state--;
1969            }            }
1970            lgb = UCD_GRAPHBREAK(c);
1971          while (nptr < end_subject)          while (nptr < end_subject)
1972            {            {
1973            int nd;            dlen = 1;
1974            int ndlen = 1;            if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
1975            GETCHARLEN(nd, nptr, ndlen);            rgb = UCD_GRAPHBREAK(d);
1976            if (UCD_CATEGORY(nd) != ucp_M) break;            if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
1977            ncount++;            ncount++;
1978            nptr += ndlen;            lgb = rgb;
1979              nptr += dlen;
1980            }            }
1981          if (++count >= GET2(code, 1))          if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
1982            { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }              reset_could_continue = TRUE;
1983            if (++count >= (int)GET2(code, 1))
1984              { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
1985          else          else
1987          }          }
# Line 1832  for (;;) Line 1994  for (;;)
1994        case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:        case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:
1995        case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:        case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:
1996        if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)
1997          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1998        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1999        if (clen > 0)        if (clen > 0)
2000          {          {
2001          int ncount = 0;          int ncount = 0;
2002          switch (c)          switch (c)
2003            {            {
2004            case 0x000b:            case CHAR_VT:
2005            case 0x000c:            case CHAR_FF:
2006            case 0x0085:            case CHAR_NEL:
2007    #ifndef EBCDIC
2008            case 0x2028:            case 0x2028:
2009            case 0x2029:            case 0x2029:
2010    #endif  /* Not EBCDIC */
2011            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
2012            goto ANYNL03;            goto ANYNL03;
2013
2014            case 0x000d:            case CHAR_CR:
2015            if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;            if (ptr + 1 < end_subject && ptr[1] == CHAR_LF) ncount = 1;
2016            /* Fall through */            /* Fall through */
2017
2018            ANYNL03:            ANYNL03:
2019            case 0x000a:            case CHAR_LF:
2020            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)
2021              {              {
2022              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
2023              next_active_state--;              next_active_state--;
2024              }              }
2025            if (++count >= GET2(code, 1))            if (++count >= (int)GET2(code, 1))
2026              { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }              { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
2027            else            else
2029            break;            break;
# Line 1876  for (;;) Line 2040  for (;;)
2040        case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:        case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:
2041        case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:        case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:
2042        if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)
2043          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
2044        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
2045        if (clen > 0)        if (clen > 0)
2046          {          {
2047          BOOL OK;          BOOL OK;
2048          switch (c)          switch (c)
2049            {            {
2050            case 0x000a:            VSPACE_CASES:
case 0x000b:
case 0x000c:
case 0x000d:
case 0x0085:
case 0x2028:
case 0x2029:
2051            OK = TRUE;            OK = TRUE;
2052            break;            break;
2053
# Line 1904  for (;;) Line 2062  for (;;)
2062              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
2063              next_active_state--;              next_active_state--;
2064              }              }
2065            if (++count >= GET2(code, 1))            if (++count >= (int)GET2(code, 1))
2066              { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }              { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
2067            else            else
2069            }            }
# Line 1918  for (;;) Line 2076  for (;;)
2076        case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:        case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:
2077        case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:        case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:
2078        if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)
2079          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
2080        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
2081        if (clen > 0)        if (clen > 0)
2082          {          {
2083          BOOL OK;          BOOL OK;
2084          switch (c)          switch (c)
2085            {            {
2086            case 0x09:      /* HT */            HSPACE_CASES:
case 0x20:      /* SPACE */
case 0xa0:      /* NBSP */
case 0x1680:    /* OGHAM SPACE MARK */
case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
case 0x2000:    /* EN QUAD */
case 0x2001:    /* EM QUAD */
case 0x2002:    /* EN SPACE */
case 0x2003:    /* EM SPACE */
case 0x2004:    /* THREE-PER-EM SPACE */
case 0x2005:    /* FOUR-PER-EM SPACE */
case 0x2006:    /* SIX-PER-EM SPACE */
case 0x2007:    /* FIGURE SPACE */
case 0x2008:    /* PUNCTUATION SPACE */
case 0x2009:    /* THIN SPACE */
case 0x200A:    /* HAIR SPACE */
case 0x202f:    /* NARROW NO-BREAK SPACE */
case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
case 0x3000:    /* IDEOGRAPHIC SPACE */
2087            OK = TRUE;            OK = TRUE;
2088            break;            break;
2089
# Line 1959  for (;;) Line 2099  for (;;)
2099              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
2100              next_active_state--;              next_active_state--;
2101              }              }
2102            if (++count >= GET2(code, 1))            if (++count >= (int)GET2(code, 1))
2103              { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }              { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
2104            else            else
2106            }            }
# Line 1982  for (;;) Line 2122  for (;;)
2122        case OP_CHARI:        case OP_CHARI:
2123        if (clen == 0) break;        if (clen == 0) break;
2124
2125  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2126        if (utf8)        if (utf)
2127          {          {
2128          if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else          if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
2129            {            {
2130            unsigned int othercase;            unsigned int othercase;
2131            if (c < 128) othercase = fcc[c]; else            if (c < 128)
2132                othercase = fcc[c];
2133            /* If we have Unicode property support, we can use it to test the            else
2134            other case of the character. */              /* If we have Unicode property support, we can use it to test the
2135                other case of the character. */
2136  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2137            othercase = UCD_OTHERCASE(c);              othercase = UCD_OTHERCASE(c);
2138  #else  #else
2139            othercase = NOTACHAR;              othercase = NOTACHAR;
2140  #endif  #endif
2141
2142            if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }            if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }
2143            }            }
2144          }          }
2145        else        else
2146  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2147          /* Not UTF mode */
/* Non-UTF-8 mode */
2148          {          {
2149          if (lcc[c] == lcc[d]) { ADD_NEW(state_offset + 2, 0); }          if (TABLE_GET(c, lcc, c) == TABLE_GET(d, lcc, d))
2150              { ADD_NEW(state_offset + 2, 0); }
2151          }          }
2152        break;        break;
2153
# Line 2019  for (;;) Line 2159  for (;;)
2159        to wait for them to pass before continuing. */        to wait for them to pass before continuing. */
2160
2161        case OP_EXTUNI:        case OP_EXTUNI:
2162        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0)
2163          {          {
2164          const uschar *nptr = ptr + clen;          int lgb, rgb;
2165            const pcre_uchar *nptr = ptr + clen;
2166          int ncount = 0;          int ncount = 0;
2167            lgb = UCD_GRAPHBREAK(c);
2168          while (nptr < end_subject)          while (nptr < end_subject)
2169            {            {
2170            int nclen = 1;            dlen = 1;
2171            GETCHARLEN(c, nptr, nclen);            if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
2172            if (UCD_CATEGORY(c) != ucp_M) break;            rgb = UCD_GRAPHBREAK(d);
2173              if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
2174            ncount++;            ncount++;
2175            nptr += nclen;            lgb = rgb;
2176              nptr += dlen;
2177            }            }
2178            if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
2179                reset_could_continue = TRUE;
2181          }          }
2182        break;        break;
# Line 2044  for (;;) Line 2190  for (;;)
2190        case OP_ANYNL:        case OP_ANYNL:
2191        if (clen > 0) switch(c)        if (clen > 0) switch(c)
2192          {          {
2193          case 0x000b:          case CHAR_VT:
2194          case 0x000c:          case CHAR_FF:
2195          case 0x0085:          case CHAR_NEL:
2196    #ifndef EBCDIC
2197          case 0x2028:          case 0x2028:
2198          case 0x2029:          case 0x2029:
2199    #endif  /* Not EBCDIC */
2200          if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;          if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
2201
2202          case 0x000a:          case CHAR_LF:
2204          break;          break;
2205
2206          case 0x000d:          case CHAR_CR:
2207          if (ptr + 1 < end_subject && ptr[1] == 0x0a)          if (ptr + 1 >= end_subject)
2208              {
2210              if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
2211                reset_could_continue = TRUE;
2212              }
2213            else if (ptr[1] == CHAR_LF)
2214            {            {
2216            }            }
# Line 2072  for (;;) Line 2226  for (;;)
2226        case OP_NOT_VSPACE:        case OP_NOT_VSPACE:
2227        if (clen > 0) switch(c)        if (clen > 0) switch(c)
2228          {          {
2229          case 0x000a:          VSPACE_CASES:
case 0x000b:
case 0x000c:
case 0x000d:
case 0x0085:
case 0x2028:
case 0x2029:
2230          break;          break;
2231
2232          default:          default:
# Line 2091  for (;;) Line 2239  for (;;)
2239        case OP_VSPACE:        case OP_VSPACE:
2240        if (clen > 0) switch(c)        if (clen > 0) switch(c)
2241          {          {
2242          case 0x000a:          VSPACE_CASES:
case 0x000b:
case 0x000c:
case 0x000d:
case 0x0085:
case 0x2028:
case 0x2029:
2244          break;          break;
2245
2246          default: break;          default:
2247            break;
2248          }          }
2249        break;        break;
2250
# Line 2109  for (;;) Line 2252  for (;;)
2252        case OP_NOT_HSPACE:        case OP_NOT_HSPACE:
2253        if (clen > 0) switch(c)        if (clen > 0) switch(c)
2254          {          {
2255          case 0x09:      /* HT */          HSPACE_CASES:
case 0x20:      /* SPACE */
case 0xa0:      /* NBSP */
case 0x1680:    /* OGHAM SPACE MARK */
case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
case 0x2000:    /* EN QUAD */
case 0x2001:    /* EM QUAD */
case 0x2002:    /* EN SPACE */
case 0x2003:    /* EM SPACE */
case 0x2004:    /* THREE-PER-EM SPACE */
case 0x2005:    /* FOUR-PER-EM SPACE */
case 0x2006:    /* SIX-PER-EM SPACE */
case 0x2007:    /* FIGURE SPACE */
case 0x2008:    /* PUNCTUATION SPACE */
case 0x2009:    /* THIN SPACE */
case 0x200A:    /* HAIR SPACE */
case 0x202f:    /* NARROW NO-BREAK SPACE */
case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
case 0x3000:    /* IDEOGRAPHIC SPACE */
2256          break;          break;
2257
2258          default:          default:
# Line 2140  for (;;) Line 2265  for (;;)
2265        case OP_HSPACE:        case OP_HSPACE:
2266        if (clen > 0) switch(c)        if (clen > 0) switch(c)
2267          {          {
2268          case 0x09:      /* HT */          HSPACE_CASES:
case 0x20:      /* SPACE */
case 0xa0:      /* NBSP */
case 0x1680:    /* OGHAM SPACE MARK */
case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
case 0x2000:    /* EN QUAD */
case 0x2001:    /* EM QUAD */
case 0x2002:    /* EN SPACE */
case 0x2003:    /* EM SPACE */
case 0x2004:    /* THREE-PER-EM SPACE */
case 0x2005:    /* FOUR-PER-EM SPACE */
case 0x2006:    /* SIX-PER-EM SPACE */
case 0x2007:    /* FIGURE SPACE */
case 0x2008:    /* PUNCTUATION SPACE */
case 0x2009:    /* THIN SPACE */
case 0x200A:    /* HAIR SPACE */
case 0x202f:    /* NARROW NO-BREAK SPACE */
case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
case 0x3000:    /* IDEOGRAPHIC SPACE */
2270          break;          break;
2271
2272            default:
2273            break;
2274          }          }
2275        break;        break;
2276
2277        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2278        /* Match a negated single character casefully. This is only used for        /* Match a negated single character casefully. */
one-byte characters, that is, we know that d < 256. The character we are
checking (c) can be multibyte. */
2279
2280        case OP_NOT:        case OP_NOT:
2281        if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); }        if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); }
2282        break;        break;
2283
2284        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2285        /* Match a negated single character caselessly. This is only used for        /* Match a negated single character caselessly. */
one-byte characters, that is, we know that d < 256. The character we are
checking (c) can be multibyte. */
2286
2287        case OP_NOTI:        case OP_NOTI:
2288        if (clen > 0 && c != d && c != fcc[d])        if (clen > 0)
2289          { ADD_NEW(state_offset + dlen + 1, 0); }          {
2290            unsigned int otherd;
2291    #ifdef SUPPORT_UTF
2292            if (utf && d >= 128)
2293              {
2294    #ifdef SUPPORT_UCP
2295              otherd = UCD_OTHERCASE(d);
2296    #endif  /* SUPPORT_UCP */
2297              }
2298            else
2299    #endif  /* SUPPORT_UTF */
2300            otherd = TABLE_GET(d, fcc, d);
2301            if (c != d && c != otherd)
2302              { ADD_NEW(state_offset + dlen + 1, 0); }
2303            }
2304        break;        break;
2305
2306        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
# Line 2192  for (;;) Line 2312  for (;;)
2312        case OP_NOTPOSPLUSI:        case OP_NOTPOSPLUSI:
2313        caseless = TRUE;        caseless = TRUE;
2314        codevalue -= OP_STARI - OP_STAR;        codevalue -= OP_STARI - OP_STAR;
2315
2316        /* Fall through */        /* Fall through */
2317        case OP_PLUS:        case OP_PLUS:
2318        case OP_MINPLUS:        case OP_MINPLUS:
# Line 2204  for (;;) Line 2324  for (;;)
2324        if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }
2325        if (clen > 0)        if (clen > 0)
2326          {          {
2327          unsigned int otherd = NOTACHAR;          pcre_uint32 otherd = NOTACHAR;
2328          if (caseless)          if (caseless)
2329            {            {
2330  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2331            if (utf8 && d >= 128)            if (utf && d >= 128)
2332              {              {
2333  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2334              otherd = UCD_OTHERCASE(d);              otherd = UCD_OTHERCASE(d);
2335  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2336              }              }
2337            else            else
2338  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2339            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2340            }            }
2341          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2342            {            {
# Line 2251  for (;;) Line 2371  for (;;)
2371        ADD_ACTIVE(state_offset + dlen + 1, 0);        ADD_ACTIVE(state_offset + dlen + 1, 0);
2372        if (clen > 0)        if (clen > 0)
2373          {          {
2374          unsigned int otherd = NOTACHAR;          pcre_uint32 otherd = NOTACHAR;
2375          if (caseless)          if (caseless)
2376            {            {
2377  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2378            if (utf8 && d >= 128)            if (utf && d >= 128)
2379              {              {
2380  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2381              otherd = UCD_OTHERCASE(d);              otherd = UCD_OTHERCASE(d);
2382  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2383              }              }
2384            else            else
2385  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2386            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2387            }            }
2388          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2389            {            {
# Line 2296  for (;;) Line 2416  for (;;)
2416        ADD_ACTIVE(state_offset + dlen + 1, 0);        ADD_ACTIVE(state_offset + dlen + 1, 0);
2417        if (clen > 0)        if (clen > 0)
2418          {          {
2419          unsigned int otherd = NOTACHAR;          pcre_uint32 otherd = NOTACHAR;
2420          if (caseless)          if (caseless)
2421            {            {
2422  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2423            if (utf8 && d >= 128)            if (utf && d >= 128)
2424              {              {
2425  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2426              otherd = UCD_OTHERCASE(d);              otherd = UCD_OTHERCASE(d);
2427  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2428              }              }
2429            else            else
2430  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2431            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2432            }            }
2433          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2434            {            {
# Line 2333  for (;;) Line 2453  for (;;)
2453        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
2454        if (clen > 0)        if (clen > 0)
2455          {          {
2456          unsigned int otherd = NOTACHAR;          pcre_uint32 otherd = NOTACHAR;
2457          if (caseless)          if (caseless)
2458            {            {
2459  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2460            if (utf8 && d >= 128)            if (utf && d >= 128)
2461              {              {
2462  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2463              otherd = UCD_OTHERCASE(d);              otherd = UCD_OTHERCASE(d);
2464  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2465              }              }
2466            else            else
2467  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2468            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2469            }            }
2470          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2471            {            {
2472            if (++count >= GET2(code, 1))            if (++count >= (int)GET2(code, 1))
2473              { ADD_NEW(state_offset + dlen + 3, 0); }              { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
2474            else            else
2476            }            }
# Line 2373  for (;;) Line 2493  for (;;)
2493        case OP_NOTUPTO:        case OP_NOTUPTO:
2494        case OP_NOTMINUPTO:        case OP_NOTMINUPTO:
2495        case OP_NOTPOSUPTO:        case OP_NOTPOSUPTO:
2496        ADD_ACTIVE(state_offset + dlen + 3, 0);        ADD_ACTIVE(state_offset + dlen + 1 + IMM2_SIZE, 0);
2497        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
2498        if (clen > 0)        if (clen > 0)
2499          {          {
2500          unsigned int otherd = NOTACHAR;          pcre_uint32 otherd = NOTACHAR;
2501          if (caseless)          if (caseless)
2502            {            {
2503  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2504            if (utf8 && d >= 128)            if (utf && d >= 128)
2505              {              {
2506  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2507              otherd = UCD_OTHERCASE(d);              otherd = UCD_OTHERCASE(d);
2508  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2509              }              }
2510            else            else
2511  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2512            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2513            }            }
2514          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2515            {            {
# Line 2398  for (;;) Line 2518  for (;;)
2518              active_count--;             /* Remove non-match possibility */              active_count--;             /* Remove non-match possibility */
2519              next_active_state--;              next_active_state--;
2520              }              }
2521            if (++count >= GET2(code, 1))            if (++count >= (int)GET2(code, 1))
2522              { ADD_NEW(state_offset + dlen + 3, 0); }              { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
2523            else            else
2525            }            }
# Line 2416  for (;;) Line 2536  for (;;)
2536          {          {
2537          BOOL isinclass = FALSE;          BOOL isinclass = FALSE;
2538          int next_state_offset;          int next_state_offset;
2539          const uschar *ecode;          const pcre_uchar *ecode;
2540
2541          /* For a simple class, there is always just a 32-byte table, and we          /* For a simple class, there is always just a 32-byte table, and we
2542          can set isinclass from it. */          can set isinclass from it. */
2543
2544          if (codevalue != OP_XCLASS)          if (codevalue != OP_XCLASS)
2545            {            {
2546            ecode = code + 33;            ecode = code + 1 + (32 / sizeof(pcre_uchar));
2547            if (clen > 0)            if (clen > 0)
2548              {              {
2549              isinclass = (c > 255)? (codevalue == OP_NCLASS) :              isinclass = (c > 255)? (codevalue == OP_NCLASS) :
2550                ((code[1 + c/8] & (1 << (c&7))) != 0);                ((((pcre_uint8 *)(code + 1))[c/8] & (1 << (c&7))) != 0);
2551              }              }
2552            }            }
2553
# Line 2438  for (;;) Line 2558  for (;;)
2558          else          else
2559           {           {
2560           ecode = code + GET(code, 1);           ecode = code + GET(code, 1);
2561           if (clen > 0) isinclass = _pcre_xclass(c, code + 1 + LINK_SIZE);           if (clen > 0) isinclass = PRIV(xclass)(c, code + 1 + LINK_SIZE, utf);
2562           }           }
2563
2564          /* At this point, isinclass is set for all kinds of class, and ecode          /* At this point, isinclass is set for all kinds of class, and ecode
# Line 2451  for (;;) Line 2571  for (;;)
2571            {            {
2572            case OP_CRSTAR:            case OP_CRSTAR:
2573            case OP_CRMINSTAR:            case OP_CRMINSTAR:
2574              case OP_CRPOSSTAR:
2576            if (isinclass) { ADD_NEW(state_offset, 0); }            if (isinclass)
2577                {
2578                if (*ecode == OP_CRPOSSTAR)
2579                  {
2580                  active_count--;           /* Remove non-match possibility */
2581                  next_active_state--;
2582                  }
2584                }
2585            break;            break;
2586
2587            case OP_CRPLUS:            case OP_CRPLUS:
2588            case OP_CRMINPLUS:            case OP_CRMINPLUS:
2589              case OP_CRPOSPLUS:
2590            count = current_state->count;  /* Already matched */            count = current_state->count;  /* Already matched */
2591            if (count > 0) { ADD_ACTIVE(next_state_offset + 1, 0); }            if (count > 0) { ADD_ACTIVE(next_state_offset + 1, 0); }
2592            if (isinclass) { count++; ADD_NEW(state_offset, count); }            if (isinclass)
2593                {
2594                if (count > 0 && *ecode == OP_CRPOSPLUS)
2595                  {
2596                  active_count--;           /* Remove non-match possibility */
2597                  next_active_state--;
2598                  }
2599                count++;
2601                }
2602            break;            break;
2603
2604            case OP_CRQUERY:            case OP_CRQUERY:
2605            case OP_CRMINQUERY:            case OP_CRMINQUERY:
2606              case OP_CRPOSQUERY:
2608            if (isinclass) { ADD_NEW(next_state_offset + 1, 0); }            if (isinclass)
2609                {
2610                if (*ecode == OP_CRPOSQUERY)
2611                  {
2612                  active_count--;           /* Remove non-match possibility */
2613                  next_active_state--;
2614                  }
2616                }
2617            break;            break;
2618
2619            case OP_CRRANGE:            case OP_CRRANGE:
2620            case OP_CRMINRANGE:            case OP_CRMINRANGE:
2621              case OP_CRPOSRANGE:
2622            count = current_state->count;  /* Already matched */            count = current_state->count;  /* Already matched */
2623            if (count >= GET2(ecode, 1))            if (count >= (int)GET2(ecode, 1))
2624              { ADD_ACTIVE(next_state_offset + 5, 0); }              { ADD_ACTIVE(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
2625            if (isinclass)            if (isinclass)
2626              {              {
2627              int max = GET2(ecode, 3);              int max = (int)GET2(ecode, 1 + IMM2_SIZE);
2628                if (*ecode == OP_CRPOSRANGE)
2629                  {
2630                  active_count--;           /* Remove non-match possibility */
2631                  next_active_state--;
2632                  }
2633              if (++count >= max && max != 0)   /* Max 0 => no limit */              if (++count >= max && max != 0)   /* Max 0 => no limit */
2634                { ADD_NEW(next_state_offset + 5, 0); }                { ADD_NEW(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
2635              else              else
2637              }              }
# Line 2508  for (;;) Line 2662  for (;;)
2662          int rc;          int rc;
2663          int local_offsets[2];          int local_offsets[2];
2664          int local_workspace[1000];          int local_workspace[1000];
2665          const uschar *endasscode = code + GET(code, 1);          const pcre_uchar *endasscode = code + GET(code, 1);
2666
2667          while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);          while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
2668
# Line 2545  for (;;) Line 2699  for (;;)
2700            {            {
2701            rrc = 0;            rrc = 0;
2702            if (pcre_callout != NULL)            if (PUBL(callout) != NULL)
2703              {              {
2704              pcre_callout_block cb;              PUBL(callout_block) cb;
2705              cb.version          = 1;   /* Version 1 of the callout block */              cb.version          = 1;   /* Version 1 of the callout block */
2707              cb.offset_vector    = offsets;              cb.offset_vector    = offsets;
2708    #if defined COMPILE_PCRE8
2709              cb.subject          = (PCRE_SPTR)start_subject;              cb.subject          = (PCRE_SPTR)start_subject;
2710    #elif defined COMPILE_PCRE16
2711                cb.subject          = (PCRE_SPTR16)start_subject;
2712    #elif defined COMPILE_PCRE32
2713                cb.subject          = (PCRE_SPTR32)start_subject;
2714    #endif
2715              cb.subject_length   = (int)(end_subject - start_subject);              cb.subject_length   = (int)(end_subject - start_subject);
2716              cb.start_match      = (int)(current_subject - start_subject);              cb.start_match      = (int)(current_subject - start_subject);
2717              cb.current_position = (int)(ptr - start_subject);              cb.current_position = (int)(ptr - start_subject);
# Line 2560  for (;;) Line 2720  for (;;)
2720              cb.capture_top      = 1;              cb.capture_top      = 1;
2721              cb.capture_last     = -1;              cb.capture_last     = -1;
2722              cb.callout_data     = md->callout_data;              cb.callout_data     = md->callout_data;
2723              if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */              cb.mark             = NULL;   /* No (*MARK) support */
2724                if ((rrc = (*PUBL(callout))(&cb)) < 0) return rrc;   /* Abandon */
2725              }              }
2726            if (rrc > 0) break;                      /* Fail this thread */            if (rrc > 0) break;                      /* Fail this thread */
2727            code += _pcre_OP_lengths[OP_CALLOUT];    /* Skip callout data */            code += PRIV(OP_lengths)[OP_CALLOUT];    /* Skip callout data */
2728            }            }
2729
2731
2732          /* Back reference conditions are not supported */          /* Back reference conditions and duplicate named recursion conditions
2733            are not supported */
2734
2735          if (condcode == OP_CREF || condcode == OP_NCREF)          if (condcode == OP_CREF || condcode == OP_DNCREF ||
2736                condcode == OP_DNRREF)
2737            return PCRE_ERROR_DFA_UCOND;            return PCRE_ERROR_DFA_UCOND;
2738
2739          /* The DEFINE condition is always false */          /* The DEFINE condition is always false */
# Line 2582  for (;;) Line 2745  for (;;)
2745          which means "test if in any recursion". We can't test for specifically          which means "test if in any recursion". We can't test for specifically
2746          recursed groups. */          recursed groups. */
2747
2748          else if (condcode == OP_RREF || condcode == OP_NRREF)          else if (condcode == OP_RREF)
2749            {            {
2750            int value = GET2(code, LINK_SIZE+2);            int value = GET2(code, LINK_SIZE + 2);
2751            if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;            if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
2752            if (md->recursive != NULL)            if (md->recursive != NULL)
2755            }            }
2756
# Line 2596  for (;;) Line 2759  for (;;)
2759          else          else
2760            {            {
2761            int rc;            int rc;
2762            const uschar *asscode = code + LINK_SIZE + 1;            const pcre_uchar *asscode = code + LINK_SIZE + 1;
2763            const uschar *endasscode = asscode + GET(asscode, 1);            const pcre_uchar *endasscode = asscode + GET(asscode, 1);
2764
2765            while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);            while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
2766
# Line 2625  for (;;) Line 2788  for (;;)
2788        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2789        case OP_RECURSE:        case OP_RECURSE:
2790          {          {
2791          dfa_recursion_info *ri;          dfa_recursion_info *ri;
2792          int local_offsets[1000];          int local_offsets[1000];
2793          int local_workspace[1000];          int local_workspace[1000];
2794          const uschar *callpat = start_code + GET(code, 1);          const pcre_uchar *callpat = start_code + GET(code, 1);
2795          int recno = (callpat == md->start_code)? 0 :          int recno = (callpat == md->start_code)? 0 :
2797          int rc;          int rc;
2798
2799          DPRINTF(("%.*sStarting regex recursion\n", rlevel*2-2, SP));          DPRINTF(("%.*sStarting regex recursion\n", rlevel*2-2, SP));
2800
2801          /* Check for repeating a recursion without advancing the subject          /* Check for repeating a recursion without advancing the subject
2802          pointer. This should catch convoluted mutual recursions. (Some simple          pointer. This should catch convoluted mutual recursions. (Some simple
2803          cases are caught at compile time.) */          cases are caught at compile time.) */

for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
if (recno == ri->group_num && ptr == ri->subject_position)
return PCRE_ERROR_RECURSELOOP;
2804
2805          /* Remember this recursion and where we started it so as to          for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
2806              if (recno == ri->group_num && ptr == ri->subject_position)
2807                return PCRE_ERROR_RECURSELOOP;
2808
2809            /* Remember this recursion and where we started it so as to
2810          catch infinite loops. */          catch infinite loops. */
2811
2812          new_recursive.group_num = recno;          new_recursive.group_num = recno;
2813          new_recursive.subject_position = ptr;          new_recursive.subject_position = ptr;
2814          new_recursive.prevrec = md->recursive;          new_recursive.prevrec = md->recursive;
2815          md->recursive = &new_recursive;          md->recursive = &new_recursive;
2816
2817          rc = internal_dfa_exec(          rc = internal_dfa_exec(
2818            md,                                   /* fixed match data */            md,                                   /* fixed match data */
# Line 2664  for (;;) Line 2827  for (;;)
2827
2828          md->recursive = new_recursive.prevrec;  /* Done this recursion */          md->recursive = new_recursive.prevrec;  /* Done this recursion */
2829
2830          DPRINTF(("%.*sReturn from regex recursion: rc=%d\n", rlevel*2-2, SP,          DPRINTF(("%.*sReturn from regex recursion: rc=%d\n", rlevel*2-2, SP,
2831            rc));            rc));
2832
2833          /* Ran out of internal offsets */          /* Ran out of internal offsets */
# Line 2679  for (;;) Line 2842  for (;;)
2842            {            {
2843            for (rc = rc*2 - 2; rc >= 0; rc -= 2)            for (rc = rc*2 - 2; rc >= 0; rc -= 2)
2844              {              {
const uschar *p = start_subject + local_offsets[rc];
const uschar *pp = start_subject + local_offsets[rc+1];
2845              int charcount = local_offsets[rc+1] - local_offsets[rc];              int charcount = local_offsets[rc+1] - local_offsets[rc];
2846              while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2847                if (utf)
2848                  {
2849                  const pcre_uchar *p = start_subject + local_offsets[rc];
2850                  const pcre_uchar *pp = start_subject + local_offsets[rc+1];
2851                  while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2852                  }
2853    #endif
2854              if (charcount > 0)              if (charcount > 0)
2855                {                {
# Line 2702  for (;;) Line 2870  for (;;)
2870        case OP_SBRAPOS:        case OP_SBRAPOS:
2871        case OP_CBRAPOS:        case OP_CBRAPOS:
2872        case OP_SCBRAPOS:        case OP_SCBRAPOS:
2873        case OP_BRAPOSZERO:        case OP_BRAPOSZERO:
2874          {          {
2875          int charcount, matched_count;          int charcount, matched_count;
2876          const uschar *local_ptr = ptr;          const pcre_uchar *local_ptr = ptr;
2877          BOOL allow_zero;          BOOL allow_zero;
2878
2879          if (codevalue == OP_BRAPOSZERO)          if (codevalue == OP_BRAPOSZERO)
2880            {            {
2881            allow_zero = TRUE;            allow_zero = TRUE;
2882            codevalue = *(++code);  /* Codevalue will be one of above BRAs */            codevalue = *(++code);  /* Codevalue will be one of above BRAs */
2883            }            }
2884          else allow_zero = FALSE;          else allow_zero = FALSE;
2885
2886          /* Loop to match the subpattern as many times as possible as if it were          /* Loop to match the subpattern as many times as possible as if it were
2887          a complete pattern. */          a complete pattern. */
2888
2889          for (matched_count = 0;; matched_count++)          for (matched_count = 0;; matched_count++)
2890            {            {
2891            int local_offsets[2];            int local_offsets[2];
2892            int local_workspace[1000];            int local_workspace[1000];
2893
2894            int rc = internal_dfa_exec(            int rc = internal_dfa_exec(
2895              md,                                   /* fixed match data */              md,                                   /* fixed match data */
2896              code,                                 /* this subexpression's code */              code,                                 /* this subexpression's code */
# Line 2733  for (;;) Line 2901  for (;;)
2901              local_workspace,                      /* workspace vector */              local_workspace,                      /* workspace vector */
2902              sizeof(local_workspace)/sizeof(int),  /* size of same */              sizeof(local_workspace)/sizeof(int),  /* size of same */
2903              rlevel);                              /* function recursion level */              rlevel);                              /* function recursion level */
2904
2905            /* Failed to match */            /* Failed to match */
2906
2907            if (rc < 0)            if (rc < 0)
2908              {              {
2909              if (rc != PCRE_ERROR_NOMATCH) return rc;              if (rc != PCRE_ERROR_NOMATCH) return rc;
2910              break;              break;
2911              }              }
2912
2913            /* Matched: break the loop if zero characters matched. */            /* Matched: break the loop if zero characters matched. */
2914
2915            charcount = local_offsets[1] - local_offsets[0];            charcount = local_offsets[1] - local_offsets[0];
2916            if (charcount == 0) break;            if (charcount == 0) break;
2917            local_ptr += charcount;    /* Advance temporary position ptr */            local_ptr += charcount;    /* Advance temporary position ptr */
2918            }            }
2919
2920          /* At this point we have matched the subpattern matched_count          /* At this point we have matched the subpattern matched_count
2921          times, and local_ptr is pointing to the character after the end of the          times, and local_ptr is pointing to the character after the end of the
2922          last match. */          last match. */
2923
2924          if (matched_count > 0 || allow_zero)          if (matched_count > 0 || allow_zero)
2925            {            {
2926            const uschar *end_subpattern = code;            const pcre_uchar *end_subpattern = code;
2927            int next_state_offset;            int next_state_offset;
2928
2929            do { end_subpattern += GET(end_subpattern, 1); }            do { end_subpattern += GET(end_subpattern, 1); }
2930              while (*end_subpattern == OP_ALT);              while (*end_subpattern == OP_ALT);
2931            next_state_offset =            next_state_offset =
# Line 2776  for (;;) Line 2944  for (;;)
2944              }              }
2945            else            else
2946              {              {
2947              const uschar *p = ptr;              const pcre_uchar *p = ptr;
2948              const uschar *pp = local_ptr;              const pcre_uchar *pp = local_ptr;
2949              charcount = pp - p;              charcount = (int)(pp - p);
2950              while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2951                if (utf) while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2952    #endif
2954              }              }
2955            }            }
2956          }          }
2957        break;        break;
2958
2959        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2960        case OP_ONCE:        case OP_ONCE:
2961          case OP_ONCE_NC:
2962          {          {
2963          int local_offsets[2];          int local_offsets[2];
2964          int local_workspace[1000];          int local_workspace[1000];
# Line 2805  for (;;) Line 2976  for (;;)
2976
2977          if (rc >= 0)          if (rc >= 0)
2978            {            {
2979            const uschar *end_subpattern = code;            const pcre_uchar *end_subpattern = code;
2980            int charcount = local_offsets[1] - local_offsets[0];            int charcount = local_offsets[1] - local_offsets[0];
2981            int next_state_offset, repeat_state_offset;            int next_state_offset, repeat_state_offset;
2982
# Line 2858  for (;;) Line 3029  for (;;)
3029              }              }
3030            else            else
3031              {              {
3032              const uschar *p = start_subject + local_offsets[0];  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3033              const uschar *pp = start_subject + local_offsets[1];              if (utf)
3034              while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;                {
3035                  const pcre_uchar *p = start_subject + local_offsets[0];
3036                  const pcre_uchar *pp = start_subject + local_offsets[1];
3037                  while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
3038                  }
3039    #endif
3041              if (repeat_state_offset >= 0)              if (repeat_state_offset >= 0)
3042                { ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); }                { ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); }
# Line 2876  for (;;) Line 3052  for (;;)
3052
3053        case OP_CALLOUT:        case OP_CALLOUT:
3054        rrc = 0;        rrc = 0;
3055        if (pcre_callout != NULL)        if (PUBL(callout) != NULL)
3056          {          {
3057          pcre_callout_block cb;          PUBL(callout_block) cb;
3058          cb.version          = 1;   /* Version 1 of the callout block */          cb.version          = 1;   /* Version 1 of the callout block */
3059          cb.callout_number   = code[1];          cb.callout_number   = code[1];
3060          cb.offset_vector    = offsets;          cb.offset_vector    = offsets;
3061    #if defined COMPILE_PCRE8
3062          cb.subject          = (PCRE_SPTR)start_subject;          cb.subject          = (PCRE_SPTR)start_subject;
3063    #elif defined COMPILE_PCRE16
3064            cb.subject          = (PCRE_SPTR16)start_subject;
3065    #elif defined COMPILE_PCRE32
3066            cb.subject          = (PCRE_SPTR32)start_subject;
3067    #endif
3068          cb.subject_length   = (int)(end_subject - start_subject);          cb.subject_length   = (int)(end_subject - start_subject);
3069          cb.start_match      = (int)(current_subject - start_subject);          cb.start_match      = (int)(current_subject - start_subject);
3070          cb.current_position = (int)(ptr - start_subject);          cb.current_position = (int)(ptr - start_subject);
# Line 2891  for (;;) Line 3073  for (;;)
3073          cb.capture_top      = 1;          cb.capture_top      = 1;
3074          cb.capture_last     = -1;          cb.capture_last     = -1;
3075          cb.callout_data     = md->callout_data;          cb.callout_data     = md->callout_data;
3076          if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */          cb.mark             = NULL;   /* No (*MARK) support */
3077            if ((rrc = (*PUBL(callout))(&cb)) < 0) return rrc;   /* Abandon */
3078          }          }
3079        if (rrc == 0)        if (rrc == 0)
3080          { ADD_ACTIVE(state_offset + _pcre_OP_lengths[OP_CALLOUT], 0); }          { ADD_ACTIVE(state_offset + PRIV(OP_lengths)[OP_CALLOUT], 0); }
3081        break;        break;
3082
3083
# Line 2923  for (;;) Line 3106  for (;;)
3106    if (new_count <= 0)    if (new_count <= 0)
3107      {      {
3108      if (rlevel == 1 &&                               /* Top level, and */      if (rlevel == 1 &&                               /* Top level, and */
3109          could_continue &&                            /* Some could go on */          could_continue &&                            /* Some could go on, and */
3110          forced_fail != workspace[1] &&               /* Not all forced fail & */          forced_fail != workspace[1] &&               /* Not all forced fail & */
3111          (                                            /* either... */          (                                            /* either... */
3112          (md->moptions & PCRE_PARTIAL_HARD) != 0      /* Hard partial */          (md->moptions & PCRE_PARTIAL_HARD) != 0      /* Hard partial */
# Line 2931  for (;;) Line 3114  for (;;)
3114          ((md->moptions & PCRE_PARTIAL_SOFT) != 0 &&  /* Soft partial and */          ((md->moptions & PCRE_PARTIAL_SOFT) != 0 &&  /* Soft partial and */
3115           match_count < 0)                            /* no matches */           match_count < 0)                            /* no matches */
3116          ) &&                                         /* And... */          ) &&                                         /* And... */
3117          ptr >= end_subject &&                  /* Reached end of subject */          (
3118          ptr > md->start_used_ptr)              /* Inspected non-empty string */          partial_newline ||                           /* Either partial NL */
3119        {            (                                          /* or ... */
3120        if (offsetcount >= 2)            ptr >= end_subject &&                /* End of subject and */
3121          {            ptr > md->start_used_ptr)            /* Inspected non-empty string */
3122          offsets[0] = (int)(md->start_used_ptr - start_subject);            )
3123          offsets[1] = (int)(end_subject - start_subject);          )
}
3124        match_count = PCRE_ERROR_PARTIAL;        match_count = PCRE_ERROR_PARTIAL;
}

3125      DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"      DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
3126        "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count,        "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count,
3127        rlevel*2-2, SP));        rlevel*2-2, SP));
# Line 2991  Returns:          > 0 => number of match Line 3171  Returns:          > 0 => number of match
3171                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
3172  */  */
3173
3174    #if defined COMPILE_PCRE8
3175  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
3176  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
3177    const char *subject, int length, int start_offset, int options, int *offsets,    const char *subject, int length, int start_offset, int options, int *offsets,
3178    int offsetcount, int *workspace, int wscount)    int offsetcount, int *workspace, int wscount)
3179    #elif defined COMPILE_PCRE16
3180    PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
3181    pcre16_dfa_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
3182      PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
3183      int offsetcount, int *workspace, int wscount)
3184    #elif defined COMPILE_PCRE32
3185    PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
3186    pcre32_dfa_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
3187      PCRE_SPTR32 subject, int length, int start_offset, int options, int *offsets,
3188      int offsetcount, int *workspace, int wscount)
3189    #endif
3190  {  {
3191  real_pcre *re = (real_pcre *)argument_re;  REAL_PCRE *re = (REAL_PCRE *)argument_re;
3192  dfa_match_data match_block;  dfa_match_data match_block;
3193  dfa_match_data *md = &match_block;  dfa_match_data *md = &match_block;
3194  BOOL utf8, anchored, startline, firstline;  BOOL utf, anchored, startline, firstline;
3195  const uschar *current_subject, *end_subject, *lcc;  const pcre_uchar *current_subject, *end_subject;

pcre_study_data internal_study;
3196  const pcre_study_data *study = NULL;  const pcre_study_data *study = NULL;
real_pcre internal_re;
3197
3198  const uschar *req_byte_ptr;  const pcre_uchar *req_char_ptr;
3199  const uschar *start_bits = NULL;  const pcre_uint8 *start_bits = NULL;
3200  BOOL first_byte_caseless = FALSE;  BOOL has_first_char = FALSE;
3201  BOOL req_byte_caseless = FALSE;  BOOL has_req_char = FALSE;
3202  int first_byte = -1;  pcre_uchar first_char = 0;
3203  int req_byte = -1;  pcre_uchar first_char2 = 0;
3204  int req_byte2 = -1;  pcre_uchar req_char = 0;
3205    pcre_uchar req_char2 = 0;
3206  int newline;  int newline;
3207
3208  /* Plausibility checks */  /* Plausibility checks */
# Line 3022  if (re == NULL || subject == NULL || wor Line 3212  if (re == NULL || subject == NULL || wor
3212     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
3213  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
3214  if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;  if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;
3215    if (length < 0) return PCRE_ERROR_BADLENGTH;
3216  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
3217
3218  /* We need to find the pointer to any study data before we test for byte  /* Check that the first field in the block is the magic number. If it is not,
3219  flipping, so we scan the extra_data block first. This may set two fields in the  return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
3220  match block, so we must initialize them beforehand. However, the other fields  REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
3221  in the match block must not be set until after the byte flipping. */  means that the pattern is likely compiled with different endianness. */
3222
3223    if (re->magic_number != MAGIC_NUMBER)
3224      return re->magic_number == REVERSED_MAGIC_NUMBER?
3226    if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
3227
3228    /* If restarting after a partial match, do some sanity checks on the contents
3229    of the workspace. */
3230
3231    if ((options & PCRE_DFA_RESTART) != 0)
3232      {
3233      if ((workspace[0] & (-2)) != 0 || workspace[1] < 1 ||
3234        workspace[1] > (wscount - 2)/INTS_PER_STATEBLOCK)
3236      }
3237
3238    /* Set up study, callout, and table data */
3239
3240  md->tables = re->tables;  md->tables = re->tables;
3241  md->callout_data = NULL;  md->callout_data = NULL;
# Line 3046  if (extra_data != NULL) Line 3254  if (extra_data != NULL)
3254      md->tables = extra_data->tables;      md->tables = extra_data->tables;
3255    }    }
3256
/* Check that the first field in the block is the magic number. If it is not,
test for a regex that was compiled on a host of opposite endianness. If this is
the case, flipped values are put in internal_re and internal_study if there was
study data too. */

if (re->magic_number != MAGIC_NUMBER)
{
re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
if (re == NULL) return PCRE_ERROR_BADMAGIC;
if (study != NULL) study = &internal_study;
}

3257  /* Set some local values */  /* Set some local values */
3258
3259  current_subject = (const unsigned char *)subject + start_offset;  current_subject = (const pcre_uchar *)subject + start_offset;
3260  end_subject = (const unsigned char *)subject + length;  end_subject = (const pcre_uchar *)subject + length;
3261  req_byte_ptr = current_subject - 1;  req_char_ptr = current_subject - 1;
3262
3263  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3264  utf8 = (re->options & PCRE_UTF8) != 0;  /* PCRE_UTF(16|32) have the same value as PCRE_UTF8. */
3265    utf = (re->options & PCRE_UTF8) != 0;
3266  #else  #else
3267  utf8 = FALSE;  utf = FALSE;
3268  #endif  #endif
3269
3270  anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||  anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||
# Line 3075  anchored = (options & (PCRE_ANCHORED|PCR Line 3272  anchored = (options & (PCRE_ANCHORED|PCR
3272
3273  /* The remaining fixed data for passing around. */  /* The remaining fixed data for passing around. */
3274
3275  md->start_code = (const uschar *)argument_re +  md->start_code = (const pcre_uchar *)argument_re +
3276      re->name_table_offset + re->name_count * re->name_entry_size;      re->name_table_offset + re->name_count * re->name_entry_size;
3277  md->start_subject = (const unsigned char *)subject;  md->start_subject = (const pcre_uchar *)subject;
3278  md->end_subject = end_subject;  md->end_subject = end_subject;
3279  md->start_offset = start_offset;  md->start_offset = start_offset;
3280  md->moptions = options;  md->moptions = options;
# Line 3138  else Line 3335  else
3335  /* Check a UTF-8 string if required. Unfortunately there's no way of passing  /* Check a UTF-8 string if required. Unfortunately there's no way of passing
3336  back the character offset. */  back the character offset. */
3337
3338  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3339  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
3340    {    {
3341    int erroroffset;    int erroroffset;
3342    int errorcode = _pcre_valid_utf8((uschar *)subject, length, &erroroffset);    int errorcode = PRIV(valid_utf)((pcre_uchar *)subject, length, &erroroffset);
3343    if (errorcode != 0)    if (errorcode != 0)
3344      {      {
3345      if (offsetcount >= 2)      if (offsetcount >= 2)
3346        {        {
3347        offsets[0] = erroroffset;        offsets[0] = erroroffset;
3348        offsets[1] = errorcode;        offsets[1] = errorcode;
3349        }        }
3350      return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != 0)?  #if defined COMPILE_PCRE8
3351        return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != 0) ?
3353      }  #elif defined COMPILE_PCRE16
3354        return (errorcode <= PCRE_UTF16_ERR1 && (options & PCRE_PARTIAL_HARD) != 0) ?
3356    #elif defined COMPILE_PCRE32
3358    #endif
3359        }
3360    #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
3361    if (start_offset > 0 && start_offset < length &&    if (start_offset > 0 && start_offset < length &&
3362          (((USPTR)subject)[start_offset] & 0xc0) == 0x80)          NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
3364    #endif
3365    }    }
3366  #endif  #endif
3367
# Line 3163  if (utf8 && (options & PCRE_NO_UTF8_CHEC Line 3369  if (utf8 && (options & PCRE_NO_UTF8_CHEC
3369  is a feature that makes it possible to save compiled regex and re-use them  is a feature that makes it possible to save compiled regex and re-use them
3370  in other programs later. */  in other programs later. */
3371
3372  if (md->tables == NULL) md->tables = _pcre_default_tables;  if (md->tables == NULL) md->tables = PRIV(default_tables);
3373
3374  /* The lower casing table and the "must be at the start of a line" flag are  /* The "must be at the start of a line" flags are used in a loop when finding
3375  used in a loop when finding where to start. */  where to start. */
3376
lcc = md->tables + lcc_offset;
3377  startline = (re->flags & PCRE_STARTLINE) != 0;  startline = (re->flags & PCRE_STARTLINE) != 0;
3378  firstline = (re->options & PCRE_FIRSTLINE) != 0;  firstline = (re->options & PCRE_FIRSTLINE) != 0;
3379
# Line 3182  if (!anchored) Line 3387  if (!anchored)
3387    {    {
3388    if ((re->flags & PCRE_FIRSTSET) != 0)    if ((re->flags & PCRE_FIRSTSET) != 0)
3389      {      {
3390      first_byte = re->first_byte & 255;      has_first_char = TRUE;
3391      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)      first_char = first_char2 = (pcre_uchar)(re->first_char);
3392        first_byte = lcc[first_byte];      if ((re->flags & PCRE_FCH_CASELESS) != 0)
3393          {
3394          first_char2 = TABLE_GET(first_char, md->tables + fcc_offset, first_char);
3395    #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3396          if (utf && first_char > 127)
3397            first_char2 = UCD_OTHERCASE(first_char);
3398    #endif
3399          }
3400      }      }
3401    else    else
3402      {      {
# Line 3199  character" set. */ Line 3411  character" set. */
3411
3412  if ((re->flags & PCRE_REQCHSET) != 0)  if ((re->flags & PCRE_REQCHSET) != 0)
3413    {    {
3414    req_byte = re->req_byte & 255;    has_req_char = TRUE;
3415    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;    req_char = req_char2 = (pcre_uchar)(re->req_char);
3416    req_byte2 = (md->tables + fcc_offset)[req_byte];  /* case flipped */    if ((re->flags & PCRE_RCH_CASELESS) != 0)
3417        {
3418        req_char2 = TABLE_GET(req_char, md->tables + fcc_offset, req_char);
3419    #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3420        if (utf && req_char > 127)
3421          req_char2 = UCD_OTHERCASE(req_char);
3422    #endif
3423        }
3424    }    }
3425
3426  /* Call the main matching function, looping for a non-anchored regex after a  /* Call the main matching function, looping for a non-anchored regex after a
# Line 3214  for (;;) Line 3433  for (;;)
3433
3434    if ((options & PCRE_DFA_RESTART) == 0)    if ((options & PCRE_DFA_RESTART) == 0)
3435      {      {
3436      const uschar *save_end_subject = end_subject;      const pcre_uchar *save_end_subject = end_subject;
3437
3438      /* If firstline is TRUE, the start of the match is constrained to the first      /* If firstline is TRUE, the start of the match is constrained to the first
3439      line of a multiline string. Implement this by temporarily adjusting      line of a multiline string. Implement this by temporarily adjusting
# Line 3223  for (;;) Line 3442  for (;;)
3442
3443      if (firstline)      if (firstline)
3444        {        {
3445        USPTR t = current_subject;        PCRE_PUCHAR t = current_subject;
3446  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3447        if (utf8)        if (utf)
3448          {          {
3449          while (t < md->end_subject && !IS_NEWLINE(t))          while (t < md->end_subject && !IS_NEWLINE(t))
3450            {            {
3451            t++;            t++;
3452            while (t < end_subject && (*t & 0xc0) == 0x80) t++;            ACROSSCHAR(t < end_subject, *t, t++);
3453            }            }
3454          }          }
3455        else        else
# Line 3247  for (;;) Line 3466  for (;;)
3466
3467      if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)      if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
3468        {        {
3469        /* Advance to a known first byte. */        /* Advance to a known first char. */
3470
3471        if (first_byte >= 0)        if (has_first_char)
3472          {          {
3473          if (first_byte_caseless)          if (first_char != first_char2)
3474              {
3475              pcre_uchar csc;
3476            while (current_subject < end_subject &&            while (current_subject < end_subject &&
3477                   lcc[*current_subject] != first_byte)                   (csc = *current_subject) != first_char && csc != first_char2)
3478              current_subject++;              current_subject++;
3479              }
3480          else          else
3481            while (current_subject < end_subject &&            while (current_subject < end_subject &&
3482                   *current_subject != first_byte)                   *current_subject != first_char)
3483              current_subject++;              current_subject++;
3484          }          }
3485
# Line 3267  for (;;) Line 3489  for (;;)
3489          {          {
3490          if (current_subject > md->start_subject + start_offset)          if (current_subject > md->start_subject + start_offset)
3491            {            {
3492  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3493            if (utf8)            if (utf)
3494              {              {
3495              while (current_subject < end_subject &&              while (current_subject < end_subject &&
3496                     !WAS_NEWLINE(current_subject))                     !WAS_NEWLINE(current_subject))
3497                {                {
3498                current_subject++;                current_subject++;
3499                while(current_subject < end_subject &&                ACROSSCHAR(current_subject < end_subject, *current_subject,
3500                      (*current_subject & 0xc0) == 0x80)                  current_subject++);
current_subject++;
3501                }                }
3502              }              }
3503            else            else
# Line 3290  for (;;) Line 3511  for (;;)
3511
3512            if (current_subject[-1] == CHAR_CR &&            if (current_subject[-1] == CHAR_CR &&
3513                 (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&                 (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
3514                 current_subject < end_subject &&                 current_subject < end_subject && *current_subject == CHAR_NL)
*current_subject == CHAR_NL)
3515              current_subject++;              current_subject++;
3516            }            }
3517          }          }
# Line 3302  for (;;) Line 3522  for (;;)
3522          {          {
3523          while (current_subject < end_subject)          while (current_subject < end_subject)
3524            {            {
3525            register unsigned int c = *current_subject;            register pcre_uint32 c = *current_subject;
3526    #ifndef COMPILE_PCRE8
3527              if (c > 255) c = 255;
3528    #endif
3529            if ((start_bits[c/8] & (1 << (c&7))) == 0)            if ((start_bits[c/8] & (1 << (c&7))) == 0)
3530              {              {
3531              current_subject++;              current_subject++;
3532  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3533              if (utf8)              /* In non 8-bit mode, the iteration will stop for
3534                while(current_subject < end_subject &&              characters > 255 at the beginning or not stop at all. */
3535                      (*current_subject & 0xc0) == 0x80) current_subject++;              if (utf)
3536                  ACROSSCHAR(current_subject < end_subject, *current_subject,
3537                    current_subject++);
3538  #endif  #endif
3539              }              }
3540            else break;            else break;
# Line 3325  for (;;) Line 3550  for (;;)
3550      disabling is explicitly requested (and of course, by the test above, this      disabling is explicitly requested (and of course, by the test above, this
3551      code is not obeyed when restarting after a partial match). */      code is not obeyed when restarting after a partial match). */
3552
3553      if ((options & PCRE_NO_START_OPTIMIZE) == 0 &&      if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0 &&
3554          (options & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) == 0)          (options & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) == 0)
3555        {        {
3556        /* If the pattern was studied, a minimum subject length may be set. This        /* If the pattern was studied, a minimum subject length may be set. This
# Line 3337  for (;;) Line 3562  for (;;)
3562            (pcre_uint32)(end_subject - current_subject) < study->minlength)            (pcre_uint32)(end_subject - current_subject) < study->minlength)
3563          return PCRE_ERROR_NOMATCH;          return PCRE_ERROR_NOMATCH;
3564
3565        /* If req_byte is set, we know that that character must appear in the        /* If req_char is set, we know that that character must appear in the
3566        subject for the match to succeed. If the first character is set, req_byte        subject for the match to succeed. If the first character is set, req_char
3567        must be later in the subject; otherwise the test starts at the match        must be later in the subject; otherwise the test starts at the match
3568        point. This optimization can save a huge amount of work in patterns with        point. This optimization can save a huge amount of work in patterns with
3569        nested unlimited repeats that aren't going to match. Writing separate        nested unlimited repeats that aren't going to match. Writing separate
# Line 3350  for (;;) Line 3575  for (;;)
3575        patterns. This showed up when somebody was matching /^C/ on a 32-megabyte        patterns. This showed up when somebody was matching /^C/ on a 32-megabyte
3576        string... so we don't do this when the string is sufficiently long. */        string... so we don't do this when the string is sufficiently long. */
3577
3578        if (req_byte >= 0 && end_subject - current_subject < REQ_BYTE_MAX)        if (has_req_char && end_subject - current_subject < REQ_BYTE_MAX)
3579          {          {
3580          register const uschar *p = current_subject + ((first_byte >= 0)? 1 : 0);          register PCRE_PUCHAR p = current_subject + (has_first_char? 1:0);
3581
3582          /* We don't need to repeat the search if we haven't yet reached the          /* We don't need to repeat the search if we haven't yet reached the
3583          place we found it at last time. */          place we found it at last time. */
3584
3585          if (p > req_byte_ptr)          if (p > req_char_ptr)
3586            {            {
3587            if (req_byte_caseless)            if (req_char != req_char2)
3588              {              {
3589              while (p < end_subject)              while (p < end_subject)
3590                {                {
3591                register int pp = *p++;                register pcre_uint32 pp = *p++;
3592                if (pp == req_byte || pp == req_byte2) { p--; break; }                if (pp == req_char || pp == req_char2) { p--; break; }
3593                }                }
3594              }              }
3595            else            else
3596              {              {
3597              while (p < end_subject)              while (p < end_subject)
3598                {                {
3599                if (*p++ == req_byte) { p--; break; }                if (*p++ == req_char) { p--; break; }
3600                }                }
3601              }              }
3602
# Line 3384  for (;;) Line 3609  for (;;)
3609            found it, so that we don't search again next time round the loop if            found it, so that we don't search again next time round the loop if
3610            the start hasn't passed this character yet. */            the start hasn't passed this character yet. */
3611
3612            req_byte_ptr = p;            req_char_ptr = p;
3613            }            }
3614          }          }
3615        }        }
# Line 3393  for (;;) Line 3618  for (;;)
3618    /* OK, now we can do the business */    /* OK, now we can do the business */
3619
3620    md->start_used_ptr = current_subject;    md->start_used_ptr = current_subject;
3621    md->recursive = NULL;    md->recursive = NULL;
3622
3623    rc = internal_dfa_exec(    rc = internal_dfa_exec(
3624      md,                                /* fixed match data */      md,                                /* fixed match data */
# Line 3409  for (;;) Line 3634  for (;;)
3634    /* Anything other than "no match" means we are done, always; otherwise, carry    /* Anything other than "no match" means we are done, always; otherwise, carry
3635    on only if not anchored. */    on only if not anchored. */
3636
3637    if (rc != PCRE_ERROR_NOMATCH || anchored) return rc;    if (rc != PCRE_ERROR_NOMATCH || anchored)
3638        {
3639        if (rc == PCRE_ERROR_PARTIAL && offsetcount >= 2)
3640          {
3641          offsets[0] = (int)(md->start_used_ptr - (PCRE_PUCHAR)subject);
3642          offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
3643          if (offsetcount > 2)
3644            offsets[2] = (int)(current_subject - (PCRE_PUCHAR)subject);
3645          }
3646        return rc;
3647        }
3648
3649    /* Advance to the next subject character unless we are at the end of a line    /* Advance to the next subject character unless we are at the end of a line
3650    and firstline is set. */    and firstline is set. */
3651
3652    if (firstline && IS_NEWLINE(current_subject)) break;    if (firstline && IS_NEWLINE(current_subject)) break;
3653    current_subject++;    current_subject++;
3654    if (utf8)  #ifdef SUPPORT_UTF
3655      if (utf)
3656      {      {
3657      while (current_subject < end_subject && (*current_subject & 0xc0) == 0x80)      ACROSSCHAR(current_subject < end_subject, *current_subject,
3658        current_subject++;        current_subject++);
3659      }      }
3660    #endif
3661    if (current_subject > end_subject) break;    if (current_subject > end_subject) break;
3662
3663    /* If we have just passed a CR and we are now at a LF, and the pattern does    /* If we have just passed a CR and we are now at a LF, and the pattern does

Legend:
 Removed from v.642 changed lines Added in v.1425