/[pcre]/code/trunk/pcre_dfa_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_dfa_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 728 by ph10, Mon Oct 10 16:01:03 2011 UTC revision 1364 by ph10, Sat Oct 5 15:45:11 2013 UTC
# Line 7  and semantics are as close as possible t Line 7  and semantics are as close as possible t
7  below for why this module is different).  below for why this module is different).
8    
9                         Written by Philip Hazel                         Written by Philip Hazel
10             Copyright (c) 1997-2011 University of Cambridge             Copyright (c) 1997-2013 University of Cambridge
11    
12  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
13  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 38  POSSIBILITY OF SUCH DAMAGE. Line 38  POSSIBILITY OF SUCH DAMAGE.
38  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
39  */  */
40    
   
41  /* This module contains the external function pcre_dfa_exec(), which is an  /* This module contains the external function pcre_dfa_exec(), which is an
42  alternative matching function that uses a sort of DFA algorithm (not a true  alternative matching function that uses a sort of DFA algorithm (not a true
43  FSM). This is NOT Perl- compatible, but it has advantages in certain  FSM). This is NOT Perl-compatible, but it has advantages in certain
44  applications. */  applications. */
45    
46    
# Line 113  small value. Non-zero values in the tabl Line 112  small value. Non-zero values in the tabl
112  the character is to be found. ***NOTE*** If the start of this table is  the character is to be found. ***NOTE*** If the start of this table is
113  modified, the three tables that follow must also be modified. */  modified, the three tables that follow must also be modified. */
114    
115  static const uschar coptable[] = {  static const pcre_uint8 coptable[] = {
116    0,                             /* End                                    */    0,                             /* End                                    */
117    0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */    0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */
118    0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */    0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */
# Line 121  static const uschar coptable[] = { Line 120  static const uschar coptable[] = {
120    0, 0,                          /* \P, \p                                 */    0, 0,                          /* \P, \p                                 */
121    0, 0, 0, 0, 0,                 /* \R, \H, \h, \V, \v                     */    0, 0, 0, 0, 0,                 /* \R, \H, \h, \V, \v                     */
122    0,                             /* \X                                     */    0,                             /* \X                                     */
123    0, 0, 0, 0, 0, 0,              /* \Z, \z, ^, ^M, $, $M                   */    0, 0, 0, 0, 0, 0,              /* \Z, \z, $, $M, ^, ^M                   */
124    1,                             /* Char                                   */    1,                             /* Char                                   */
125    1,                             /* Chari                                  */    1,                             /* Chari                                  */
126    1,                             /* not                                    */    1,                             /* not                                    */
127    1,                             /* noti                                   */    1,                             /* noti                                   */
128    /* Positive single-char repeats                                          */    /* Positive single-char repeats                                          */
129    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
130    3, 3, 3,                       /* upto, minupto, exact                   */    1+IMM2_SIZE, 1+IMM2_SIZE,      /* upto, minupto                          */
131    1, 1, 1, 3,                    /* *+, ++, ?+, upto+                      */    1+IMM2_SIZE,                   /* exact                                  */
132      1, 1, 1, 1+IMM2_SIZE,          /* *+, ++, ?+, upto+                      */
133    1, 1, 1, 1, 1, 1,              /* *I, *?I, +I, +?I, ?I, ??I              */    1, 1, 1, 1, 1, 1,              /* *I, *?I, +I, +?I, ?I, ??I              */
134    3, 3, 3,                       /* upto I, minupto I, exact I             */    1+IMM2_SIZE, 1+IMM2_SIZE,      /* upto I, minupto I                      */
135    1, 1, 1, 3,                    /* *+I, ++I, ?+I, upto+I                  */    1+IMM2_SIZE,                   /* exact I                                */
136      1, 1, 1, 1+IMM2_SIZE,          /* *+I, ++I, ?+I, upto+I                  */
137    /* Negative single-char repeats - only for chars < 256                   */    /* Negative single-char repeats - only for chars < 256                   */
138    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */
139    3, 3, 3,                       /* NOT upto, minupto, exact               */    1+IMM2_SIZE, 1+IMM2_SIZE,      /* NOT upto, minupto                      */
140    1, 1, 1, 3,                    /* NOT *+, ++, ?+, upto+                  */    1+IMM2_SIZE,                   /* NOT exact                              */
141      1, 1, 1, 1+IMM2_SIZE,          /* NOT *+, ++, ?+, upto+                  */
142    1, 1, 1, 1, 1, 1,              /* NOT *I, *?I, +I, +?I, ?I, ??I          */    1, 1, 1, 1, 1, 1,              /* NOT *I, *?I, +I, +?I, ?I, ??I          */
143    3, 3, 3,                       /* NOT upto I, minupto I, exact I         */    1+IMM2_SIZE, 1+IMM2_SIZE,      /* NOT upto I, minupto I                  */
144    1, 1, 1, 3,                    /* NOT *+I, ++I, ?+I, upto+I              */    1+IMM2_SIZE,                   /* NOT exact I                            */
145      1, 1, 1, 1+IMM2_SIZE,          /* NOT *+I, ++I, ?+I, upto+I              */
146    /* Positive type repeats                                                 */    /* Positive type repeats                                                 */
147    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */
148    3, 3, 3,                       /* Type upto, minupto, exact              */    1+IMM2_SIZE, 1+IMM2_SIZE,      /* Type upto, minupto                     */
149    1, 1, 1, 3,                    /* Type *+, ++, ?+, upto+                 */    1+IMM2_SIZE,                   /* Type exact                             */
150      1, 1, 1, 1+IMM2_SIZE,          /* Type *+, ++, ?+, upto+                 */
151    /* Character class & ref repeats                                         */    /* Character class & ref repeats                                         */
152    0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */    0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */
153    0, 0,                          /* CRRANGE, CRMINRANGE                    */    0, 0,                          /* CRRANGE, CRMINRANGE                    */
# Line 152  static const uschar coptable[] = { Line 156  static const uschar coptable[] = {
156    0,                             /* XCLASS - variable length               */    0,                             /* XCLASS - variable length               */
157    0,                             /* REF                                    */    0,                             /* REF                                    */
158    0,                             /* REFI                                   */    0,                             /* REFI                                   */
159      0,                             /* DNREF                                  */
160      0,                             /* DNREFI                                 */
161    0,                             /* RECURSE                                */    0,                             /* RECURSE                                */
162    0,                             /* CALLOUT                                */    0,                             /* CALLOUT                                */
163    0,                             /* Alt                                    */    0,                             /* Alt                                    */
# Line 182  remember the fact that a character could Line 188  remember the fact that a character could
188  the subject is reached. ***NOTE*** If the start of this table is modified, the  the subject is reached. ***NOTE*** If the start of this table is modified, the
189  two tables that follow must also be modified. */  two tables that follow must also be modified. */
190    
191  static const uschar poptable[] = {  static const pcre_uint8 poptable[] = {
192    0,                             /* End                                    */    0,                             /* End                                    */
193    0, 0, 0, 1, 1,                 /* \A, \G, \K, \B, \b                     */    0, 0, 0, 1, 1,                 /* \A, \G, \K, \B, \b                     */
194    1, 1, 1, 1, 1, 1,              /* \D, \d, \S, \s, \W, \w                 */    1, 1, 1, 1, 1, 1,              /* \D, \d, \S, \s, \W, \w                 */
# Line 190  static const uschar poptable[] = { Line 196  static const uschar poptable[] = {
196    1, 1,                          /* \P, \p                                 */    1, 1,                          /* \P, \p                                 */
197    1, 1, 1, 1, 1,                 /* \R, \H, \h, \V, \v                     */    1, 1, 1, 1, 1,                 /* \R, \H, \h, \V, \v                     */
198    1,                             /* \X                                     */    1,                             /* \X                                     */
199    0, 0, 0, 0, 0, 0,              /* \Z, \z, ^, ^M, $, $M                   */    0, 0, 0, 0, 0, 0,              /* \Z, \z, $, $M, ^, ^M                   */
200    1,                             /* Char                                   */    1,                             /* Char                                   */
201    1,                             /* Chari                                  */    1,                             /* Chari                                  */
202    1,                             /* not                                    */    1,                             /* not                                    */
# Line 221  static const uschar poptable[] = { Line 227  static const uschar poptable[] = {
227    1,                             /* XCLASS - variable length               */    1,                             /* XCLASS - variable length               */
228    0,                             /* REF                                    */    0,                             /* REF                                    */
229    0,                             /* REFI                                   */    0,                             /* REFI                                   */
230      0,                             /* DNREF                                  */
231      0,                             /* DNREFI                                 */
232    0,                             /* RECURSE                                */    0,                             /* RECURSE                                */
233    0,                             /* CALLOUT                                */    0,                             /* CALLOUT                                */
234    0,                             /* Alt                                    */    0,                             /* Alt                                    */
# Line 249  static const uschar poptable[] = { Line 257  static const uschar poptable[] = {
257  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
258  and \w */  and \w */
259    
260  static const uschar toptable1[] = {  static const pcre_uint8 toptable1[] = {
261    0, 0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
262    ctype_digit, ctype_digit,    ctype_digit, ctype_digit,
263    ctype_space, ctype_space,    ctype_space, ctype_space,
# Line 257  static const uschar toptable1[] = { Line 265  static const uschar toptable1[] = {
265    0, 0                            /* OP_ANY, OP_ALLANY */    0, 0                            /* OP_ANY, OP_ALLANY */
266  };  };
267    
268  static const uschar toptable2[] = {  static const pcre_uint8 toptable2[] = {
269    0, 0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
270    ctype_digit, 0,    ctype_digit, 0,
271    ctype_space, 0,    ctype_space, 0,
# Line 277  typedef struct stateblock { Line 285  typedef struct stateblock {
285    int data;                       /* Some use extra data */    int data;                       /* Some use extra data */
286  } stateblock;  } stateblock;
287    
288  #define INTS_PER_STATEBLOCK  (sizeof(stateblock)/sizeof(int))  #define INTS_PER_STATEBLOCK  (int)(sizeof(stateblock)/sizeof(int))
289    
290    
291  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
# Line 296  Returns:       nothing Line 304  Returns:       nothing
304  */  */
305    
306  static void  static void
307  pchars(unsigned char *p, int length, FILE *f)  pchars(const pcre_uchar *p, int length, FILE *f)
308  {  {
309  int c;  pcre_uint32 c;
310  while (length-- > 0)  while (length-- > 0)
311    {    {
312    if (isprint(c = *(p++)))    if (isprint(c = *(p++)))
313      fprintf(f, "%c", c);      fprintf(f, "%c", c);
314    else    else
315      fprintf(f, "\\x%02x", c);      fprintf(f, "\\x{%02x}", c);
316    }    }
317  }  }
318  #endif  #endif
# Line 377  for the current character, one for the f Line 385  for the current character, one for the f
385      next_new_state->count  = (y); \      next_new_state->count  = (y); \
386      next_new_state->data   = (z); \      next_new_state->data   = (z); \
387      next_new_state++; \      next_new_state++; \
388      DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \      DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d) line %d\n", rlevel*2-2, SP, \
389          (x), (y), (z), __LINE__)); \
390      } \      } \
391    else return PCRE_ERROR_DFA_WSSIZE    else return PCRE_ERROR_DFA_WSSIZE
392    
# Line 386  for the current character, one for the f Line 395  for the current character, one for the f
395  static int  static int
396  internal_dfa_exec(  internal_dfa_exec(
397    dfa_match_data *md,    dfa_match_data *md,
398    const uschar *this_start_code,    const pcre_uchar *this_start_code,
399    const uschar *current_subject,    const pcre_uchar *current_subject,
400    int start_offset,    int start_offset,
401    int *offsets,    int *offsets,
402    int offsetcount,    int offsetcount,
# Line 398  internal_dfa_exec( Line 407  internal_dfa_exec(
407  stateblock *active_states, *new_states, *temp_states;  stateblock *active_states, *new_states, *temp_states;
408  stateblock *next_active_state, *next_new_state;  stateblock *next_active_state, *next_new_state;
409    
410  const uschar *ctypes, *lcc, *fcc;  const pcre_uint8 *ctypes, *lcc, *fcc;
411  const uschar *ptr;  const pcre_uchar *ptr;
412  const uschar *end_code, *first_op;  const pcre_uchar *end_code, *first_op;
413    
414  dfa_recursion_info new_recursive;  dfa_recursion_info new_recursive;
415    
# Line 409  int active_count, new_count, match_count Line 418  int active_count, new_count, match_count
418  /* Some fields in the md block are frequently referenced, so we load them into  /* Some fields in the md block are frequently referenced, so we load them into
419  independent variables in the hope that this will perform better. */  independent variables in the hope that this will perform better. */
420    
421  const uschar *start_subject = md->start_subject;  const pcre_uchar *start_subject = md->start_subject;
422  const uschar *end_subject = md->end_subject;  const pcre_uchar *end_subject = md->end_subject;
423  const uschar *start_code = md->start_code;  const pcre_uchar *start_code = md->start_code;
424    
425  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
426  BOOL utf8 = (md->poptions & PCRE_UTF8) != 0;  BOOL utf = (md->poptions & PCRE_UTF8) != 0;
427  #else  #else
428  BOOL utf8 = FALSE;  BOOL utf = FALSE;
429  #endif  #endif
430    
431    BOOL reset_could_continue = FALSE;
432    
433  rlevel++;  rlevel++;
434  offsetcount &= (-2);  offsetcount &= (-2);
435    
# Line 442  new_count = 0; Line 453  new_count = 0;
453    
454  first_op = this_start_code + 1 + LINK_SIZE +  first_op = this_start_code + 1 + LINK_SIZE +
455    ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||    ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
456      *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)? 2:0);      *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
457        ? IMM2_SIZE:0);
458    
459  /* The first thing in any (sub) pattern is a bracket of some sort. Push all  /* The first thing in any (sub) pattern is a bracket of some sort. Push all
460  the alternative states onto the list, and find out where the end is. This  the alternative states onto the list, and find out where the end is. This
# Line 470  if (*first_op == OP_REVERSE) Line 482  if (*first_op == OP_REVERSE)
482    /* If we can't go back the amount required for the longest lookbehind    /* If we can't go back the amount required for the longest lookbehind
483    pattern, go back as far as we can; some alternatives may still be viable. */    pattern, go back as far as we can; some alternatives may still be viable. */
484    
485  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
486    /* In character mode we have to step back character by character */    /* In character mode we have to step back character by character */
487    
488    if (utf8)    if (utf)
489      {      {
490      for (gone_back = 0; gone_back < max_back; gone_back++)      for (gone_back = 0; gone_back < max_back; gone_back++)
491        {        {
492        if (current_subject <= start_subject) break;        if (current_subject <= start_subject) break;
493        current_subject--;        current_subject--;
494        while (current_subject > start_subject &&        ACROSSCHAR(current_subject > start_subject, *current_subject, current_subject--);
              (*current_subject & 0xc0) == 0x80)  
         current_subject--;  
495        }        }
496      }      }
497    else    else
# Line 542  else Line 552  else
552      {      {
553      int length = 1 + LINK_SIZE +      int length = 1 + LINK_SIZE +
554        ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||        ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
555          *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)?          *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
556          2:0);          ? IMM2_SIZE:0);
557      do      do
558        {        {
559        ADD_NEW((int)(end_code - start_code + length), 0);        ADD_NEW((int)(end_code - start_code + length), 0);
# Line 556  else Line 566  else
566    
567  workspace[0] = 0;    /* Bit indicating which vector is current */  workspace[0] = 0;    /* Bit indicating which vector is current */
568    
569  DPRINTF(("%.*sEnd state = %d\n", rlevel*2-2, SP, end_code - start_code));  DPRINTF(("%.*sEnd state = %d\n", rlevel*2-2, SP, (int)(end_code - start_code)));
570    
571  /* Loop for scanning the subject */  /* Loop for scanning the subject */
572    
# Line 565  for (;;) Line 575  for (;;)
575    {    {
576    int i, j;    int i, j;
577    int clen, dlen;    int clen, dlen;
578    unsigned int c, d;    pcre_uint32 c, d;
579    int forced_fail = 0;    int forced_fail = 0;
580    BOOL could_continue = FALSE;    BOOL partial_newline = FALSE;
581      BOOL could_continue = reset_could_continue;
582      reset_could_continue = FALSE;
583    
584    /* Make the new state list into the active state list and empty the    /* Make the new state list into the active state list and empty the
585    new state list. */    new state list. */
# Line 583  for (;;) Line 595  for (;;)
595    
596  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
597    printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP);    printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP);
598    pchars((uschar *)ptr, strlen((char *)ptr), stdout);    pchars(ptr, STRLEN_UC(ptr), stdout);
599    printf("\"\n");    printf("\"\n");
600    
601    printf("%.*sActive states: ", rlevel*2-2, SP);    printf("%.*sActive states: ", rlevel*2-2, SP);
# Line 603  for (;;) Line 615  for (;;)
615    
616    if (ptr < end_subject)    if (ptr < end_subject)
617      {      {
618      clen = 1;        /* Number of bytes in the character */      clen = 1;        /* Number of data items in the character */
619  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
620      if (utf8) { GETCHARLEN(c, ptr, clen); } else      GETCHARLENTEST(c, ptr, clen);
621  #endif  /* SUPPORT_UTF8 */  #else
622      c = *ptr;      c = *ptr;
623    #endif  /* SUPPORT_UTF */
624      }      }
625    else    else
626      {      {
# Line 624  for (;;) Line 637  for (;;)
637      {      {
638      stateblock *current_state = active_states + i;      stateblock *current_state = active_states + i;
639      BOOL caseless = FALSE;      BOOL caseless = FALSE;
640      const uschar *code;      const pcre_uchar *code;
641      int state_offset = current_state->offset;      int state_offset = current_state->offset;
642      int count, codevalue, rrc;      int codevalue, rrc;
643        int count;
644    
645  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
646      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
# Line 637  for (;;) Line 651  for (;;)
651    
652      /* A negative offset is a special case meaning "hold off going to this      /* A negative offset is a special case meaning "hold off going to this
653      (negated) state until the number of characters in the data field have      (negated) state until the number of characters in the data field have
654      been skipped". */      been skipped". If the could_continue flag was passed over from a previous
655        state, arrange for it to passed on. */
656    
657      if (state_offset < 0)      if (state_offset < 0)
658        {        {
# Line 646  for (;;) Line 661  for (;;)
661          DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP));          DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP));
662          ADD_NEW_DATA(state_offset, current_state->count,          ADD_NEW_DATA(state_offset, current_state->count,
663            current_state->data - 1);            current_state->data - 1);
664            if (could_continue) reset_could_continue = TRUE;
665          continue;          continue;
666          }          }
667        else        else
# Line 685  for (;;) Line 701  for (;;)
701      permitted.      permitted.
702    
703      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
704      argument that is not a data character - but is always one byte long. We      argument that is not a data character - but is always one byte long because
705      have to take special action to deal with  \P, \p, \H, \h, \V, \v and \X in      the values are small. We have to take special action to deal with  \P, \p,
706      this case. To keep the other cases fast, convert these ones to new opcodes.      \H, \h, \V, \v and \X in this case. To keep the other cases fast, convert
707      */      these ones to new opcodes. */
708    
709      if (coptable[codevalue] > 0)      if (coptable[codevalue] > 0)
710        {        {
711        dlen = 1;        dlen = 1;
712  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
713        if (utf8) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else        if (utf) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else
714  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
715        d = code[coptable[codevalue]];        d = code[coptable[codevalue]];
716        if (codevalue >= OP_TYPESTAR)        if (codevalue >= OP_TYPESTAR)
717          {          {
# Line 779  for (;;) Line 795  for (;;)
795              offsets[0] = (int)(current_subject - start_subject);              offsets[0] = (int)(current_subject - start_subject);
796              offsets[1] = (int)(ptr - start_subject);              offsets[1] = (int)(ptr - start_subject);
797              DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,              DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,
798                offsets[1] - offsets[0], current_subject));                offsets[1] - offsets[0], (char *)current_subject));
799              }              }
800            if ((md->moptions & PCRE_DFA_SHORTEST) != 0)            if ((md->moptions & PCRE_DFA_SHORTEST) != 0)
801              {              {
# Line 816  for (;;) Line 832  for (;;)
832        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
833        case OP_CBRA:        case OP_CBRA:
834        case OP_SCBRA:        case OP_SCBRA:
835        ADD_ACTIVE((int)(code - start_code + 3 + LINK_SIZE),  0);        ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE + IMM2_SIZE),  0);
836        code += GET(code, 1);        code += GET(code, 1);
837        while (*code == OP_ALT)        while (*code == OP_ALT)
838          {          {
# Line 884  for (;;) Line 900  for (;;)
900        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
901        case OP_ANY:        case OP_ANY:
902        if (clen > 0 && !IS_NEWLINE(ptr))        if (clen > 0 && !IS_NEWLINE(ptr))
903          { ADD_NEW(state_offset + 1, 0); }          {
904            if (ptr + 1 >= md->end_subject &&
905                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
906                NLBLOCK->nltype == NLTYPE_FIXED &&
907                NLBLOCK->nllen == 2 &&
908                c == NLBLOCK->nl[0])
909              {
910              could_continue = partial_newline = TRUE;
911              }
912            else
913              {
914              ADD_NEW(state_offset + 1, 0);
915              }
916            }
917        break;        break;
918    
919        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
# Line 912  for (;;) Line 941  for (;;)
941                 (ptr == end_subject - md->nllen)                 (ptr == end_subject - md->nllen)
942              ))              ))
943            { ADD_ACTIVE(state_offset + 1, 0); }            { ADD_ACTIVE(state_offset + 1, 0); }
944            else if (ptr + 1 >= md->end_subject &&
945                     (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
946                     NLBLOCK->nltype == NLTYPE_FIXED &&
947                     NLBLOCK->nllen == 2 &&
948                     c == NLBLOCK->nl[0])
949              {
950              if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
951                {
952                reset_could_continue = TRUE;
953                ADD_NEW_DATA(-(state_offset + 1), 0, 1);
954                }
955              else could_continue = partial_newline = TRUE;
956              }
957          }          }
958        break;        break;
959    
# Line 924  for (;;) Line 966  for (;;)
966          else if (clen == 0 ||          else if (clen == 0 ||
967              ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr)))              ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr)))
968            { ADD_ACTIVE(state_offset + 1, 0); }            { ADD_ACTIVE(state_offset + 1, 0); }
969            else if (ptr + 1 >= md->end_subject &&
970                     (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
971                     NLBLOCK->nltype == NLTYPE_FIXED &&
972                     NLBLOCK->nllen == 2 &&
973                     c == NLBLOCK->nl[0])
974              {
975              if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
976                {
977                reset_could_continue = TRUE;
978                ADD_NEW_DATA(-(state_offset + 1), 0, 1);
979                }
980              else could_continue = partial_newline = TRUE;
981              }
982          }          }
983        else if (IS_NEWLINE(ptr))        else if (IS_NEWLINE(ptr))
984          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
# Line 956  for (;;) Line 1011  for (;;)
1011    
1012          if (ptr > start_subject)          if (ptr > start_subject)
1013            {            {
1014            const uschar *temp = ptr - 1;            const pcre_uchar *temp = ptr - 1;
1015            if (temp < md->start_used_ptr) md->start_used_ptr = temp;            if (temp < md->start_used_ptr) md->start_used_ptr = temp;
1016  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
1017            if (utf8) BACKCHAR(temp);            if (utf) { BACKCHAR(temp); }
1018  #endif  #endif
1019            GETCHARTEST(d, temp);            GETCHARTEST(d, temp);
1020  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 1011  for (;;) Line 1066  for (;;)
1066        if (clen > 0)        if (clen > 0)
1067          {          {
1068          BOOL OK;          BOOL OK;
1069            const pcre_uint32 *cp;
1070          const ucd_record * prop = GET_UCD(c);          const ucd_record * prop = GET_UCD(c);
1071          switch(code[1])          switch(code[1])
1072            {            {
# Line 1024  for (;;) Line 1080  for (;;)
1080            break;            break;
1081    
1082            case PT_GC:            case PT_GC:
1083            OK = _pcre_ucp_gentype[prop->chartype] == code[2];            OK = PRIV(ucp_gentype)[prop->chartype] == code[2];
1084            break;            break;
1085    
1086            case PT_PC:            case PT_PC:
# Line 1038  for (;;) Line 1094  for (;;)
1094            /* These are specials for combination cases. */            /* These are specials for combination cases. */
1095    
1096            case PT_ALNUM:            case PT_ALNUM:
1097            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1098                 _pcre_ucp_gentype[prop->chartype] == ucp_N;                 PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1099            break;            break;
1100    
1101              /* Perl space used to exclude VT, but from Perl 5.18 it is included,
1102              which means that Perl space and POSIX space are now identical. PCRE
1103              was changed at release 8.34. */
1104    
1105            case PT_SPACE:    /* Perl space */            case PT_SPACE:    /* Perl space */
           OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||  
                c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;  
           break;  
   
1106            case PT_PXSPACE:  /* POSIX space */            case PT_PXSPACE:  /* POSIX space */
1107            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1108                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
1109                 c == CHAR_FF || c == CHAR_CR;                 c == CHAR_FF || c == CHAR_CR;
1110            break;            break;
1111    
1112            case PT_WORD:            case PT_WORD:
1113            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1114                 _pcre_ucp_gentype[prop->chartype] == ucp_N ||                 PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1115                 c == CHAR_UNDERSCORE;                 c == CHAR_UNDERSCORE;
1116            break;            break;
1117    
1118              case PT_CLIST:
1119              cp = PRIV(ucd_caseless_sets) + code[2];
1120              for (;;)
1121                {
1122                if (c < *cp) { OK = FALSE; break; }
1123                if (c == *cp++) { OK = TRUE; break; }
1124                }
1125              break;
1126    
1127              case PT_UCNC:
1128              OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
1129                   c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
1130                   c >= 0xe000;
1131              break;
1132    
1133            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
1134    
1135            default:            default:
# Line 1086  for (;;) Line 1157  for (;;)
1157        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1158        if (clen > 0)        if (clen > 0)
1159          {          {
1160          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1161                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1162                NLBLOCK->nltype == NLTYPE_FIXED &&
1163                NLBLOCK->nllen == 2 &&
1164                c == NLBLOCK->nl[0])
1165              {
1166              could_continue = partial_newline = TRUE;
1167              }
1168            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1169              (c < 256 &&              (c < 256 &&
1170                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1171                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
# Line 1109  for (;;) Line 1188  for (;;)
1188        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
1189        if (clen > 0)        if (clen > 0)
1190          {          {
1191          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1192                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1193                NLBLOCK->nltype == NLTYPE_FIXED &&
1194                NLBLOCK->nllen == 2 &&
1195                c == NLBLOCK->nl[0])
1196              {
1197              could_continue = partial_newline = TRUE;
1198              }
1199            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1200              (c < 256 &&              (c < 256 &&
1201                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1202                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
# Line 1131  for (;;) Line 1218  for (;;)
1218        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
1219        if (clen > 0)        if (clen > 0)
1220          {          {
1221          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1222                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1223                NLBLOCK->nltype == NLTYPE_FIXED &&
1224                NLBLOCK->nllen == 2 &&
1225                c == NLBLOCK->nl[0])
1226              {
1227              could_continue = partial_newline = TRUE;
1228              }
1229            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1230              (c < 256 &&              (c < 256 &&
1231                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1232                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
# Line 1151  for (;;) Line 1246  for (;;)
1246        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1247        if (clen > 0)        if (clen > 0)
1248          {          {
1249          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1250                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1251                NLBLOCK->nltype == NLTYPE_FIXED &&
1252                NLBLOCK->nllen == 2 &&
1253                c == NLBLOCK->nl[0])
1254              {
1255              could_continue = partial_newline = TRUE;
1256              }
1257            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1258              (c < 256 &&              (c < 256 &&
1259                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1260                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
1261            {            {
1262            if (++count >= GET2(code, 1))            if (++count >= (int)GET2(code, 1))
1263              { ADD_NEW(state_offset + 4, 0); }              { ADD_NEW(state_offset + 1 + IMM2_SIZE + 1, 0); }
1264            else            else
1265              { ADD_NEW(state_offset, count); }              { ADD_NEW(state_offset, count); }
1266            }            }
# Line 1168  for (;;) Line 1271  for (;;)
1271        case OP_TYPEUPTO:        case OP_TYPEUPTO:
1272        case OP_TYPEMINUPTO:        case OP_TYPEMINUPTO:
1273        case OP_TYPEPOSUPTO:        case OP_TYPEPOSUPTO:
1274        ADD_ACTIVE(state_offset + 4, 0);        ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0);
1275        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1276        if (clen > 0)        if (clen > 0)
1277          {          {
1278          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1279                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1280                NLBLOCK->nltype == NLTYPE_FIXED &&
1281                NLBLOCK->nllen == 2 &&
1282                c == NLBLOCK->nl[0])
1283              {
1284              could_continue = partial_newline = TRUE;
1285              }
1286            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1287              (c < 256 &&              (c < 256 &&
1288                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1289                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
# Line 1182  for (;;) Line 1293  for (;;)
1293              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
1294              next_active_state--;              next_active_state--;
1295              }              }
1296            if (++count >= GET2(code, 1))            if (++count >= (int)GET2(code, 1))
1297              { ADD_NEW(state_offset + 4, 0); }              { ADD_NEW(state_offset + 2 + IMM2_SIZE, 0); }
1298            else            else
1299              { ADD_NEW(state_offset, count); }              { ADD_NEW(state_offset, count); }
1300            }            }
# Line 1205  for (;;) Line 1316  for (;;)
1316        if (clen > 0)        if (clen > 0)
1317          {          {
1318          BOOL OK;          BOOL OK;
1319            const pcre_uint32 *cp;
1320          const ucd_record * prop = GET_UCD(c);          const ucd_record * prop = GET_UCD(c);
1321          switch(code[2])          switch(code[2])
1322            {            {
# Line 1218  for (;;) Line 1330  for (;;)
1330            break;            break;
1331    
1332            case PT_GC:            case PT_GC:
1333            OK = _pcre_ucp_gentype[prop->chartype] == code[3];            OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
1334            break;            break;
1335    
1336            case PT_PC:            case PT_PC:
# Line 1232  for (;;) Line 1344  for (;;)
1344            /* These are specials for combination cases. */            /* These are specials for combination cases. */
1345    
1346            case PT_ALNUM:            case PT_ALNUM:
1347            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1348                 _pcre_ucp_gentype[prop->chartype] == ucp_N;                 PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1349            break;            break;
1350    
1351              /* Perl space used to exclude VT, but from Perl 5.18 it is included,
1352              which means that Perl space and POSIX space are now identical. PCRE
1353              was changed at release 8.34. */
1354    
1355            case PT_SPACE:    /* Perl space */            case PT_SPACE:    /* Perl space */
           OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||  
                c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;  
           break;  
   
1356            case PT_PXSPACE:  /* POSIX space */            case PT_PXSPACE:  /* POSIX space */
1357            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1358                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
1359                 c == CHAR_FF || c == CHAR_CR;                 c == CHAR_FF || c == CHAR_CR;
1360            break;            break;
1361    
1362            case PT_WORD:            case PT_WORD:
1363            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1364                 _pcre_ucp_gentype[prop->chartype] == ucp_N ||                 PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1365                 c == CHAR_UNDERSCORE;                 c == CHAR_UNDERSCORE;
1366            break;            break;
1367    
1368              case PT_CLIST:
1369              cp = PRIV(ucd_caseless_sets) + code[3];
1370              for (;;)
1371                {
1372                if (c < *cp) { OK = FALSE; break; }
1373                if (c == *cp++) { OK = TRUE; break; }
1374                }
1375              break;
1376    
1377              case PT_UCNC:
1378              OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
1379                   c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
1380                   c >= 0xe000;
1381              break;
1382    
1383            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
1384    
1385            default:            default:
# Line 1279  for (;;) Line 1406  for (;;)
1406        case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:        case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
1407        count = current_state->count;  /* Already matched */        count = current_state->count;  /* Already matched */
1408        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1409        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0)
1410          {          {
1411          const uschar *nptr = ptr + clen;          int lgb, rgb;
1412            const pcre_uchar *nptr = ptr + clen;
1413          int ncount = 0;          int ncount = 0;
1414          if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)          if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
1415            {            {
1416            active_count--;           /* Remove non-match possibility */            active_count--;           /* Remove non-match possibility */
1417            next_active_state--;            next_active_state--;
1418            }            }
1419            lgb = UCD_GRAPHBREAK(c);
1420          while (nptr < end_subject)          while (nptr < end_subject)
1421            {            {
1422            int nd;            dlen = 1;
1423            int ndlen = 1;            if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
1424            GETCHARLEN(nd, nptr, ndlen);            rgb = UCD_GRAPHBREAK(d);
1425            if (UCD_CATEGORY(nd) != ucp_M) break;            if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
1426            ncount++;            ncount++;
1427            nptr += ndlen;            lgb = rgb;
1428              nptr += dlen;
1429            }            }
1430          count++;          count++;
1431          ADD_NEW_DATA(-state_offset, count, ncount);          ADD_NEW_DATA(-state_offset, count, ncount);
# Line 1314  for (;;) Line 1444  for (;;)
1444          int ncount = 0;          int ncount = 0;
1445          switch (c)          switch (c)
1446            {            {
1447            case 0x000b:            case CHAR_VT:
1448            case 0x000c:            case CHAR_FF:
1449            case 0x0085:            case CHAR_NEL:
1450    #ifndef EBCDIC
1451            case 0x2028:            case 0x2028:
1452            case 0x2029:            case 0x2029:
1453    #endif  /* Not EBCDIC */
1454            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1455            goto ANYNL01;            goto ANYNL01;
1456    
1457            case 0x000d:            case CHAR_CR:
1458            if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;            if (ptr + 1 < end_subject && RAWUCHARTEST(ptr + 1) == CHAR_LF) ncount = 1;
1459            /* Fall through */            /* Fall through */
1460    
1461            ANYNL01:            ANYNL01:
1462            case 0x000a:            case CHAR_LF:
1463            if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)            if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
1464              {              {
1465              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
# Line 1354  for (;;) Line 1486  for (;;)
1486          BOOL OK;          BOOL OK;
1487          switch (c)          switch (c)
1488            {            {
1489            case 0x000a:            VSPACE_CASES:
           case 0x000b:  
           case 0x000c:  
           case 0x000d:  
           case 0x0085:  
           case 0x2028:  
           case 0x2029:  
1490            OK = TRUE;            OK = TRUE;
1491            break;            break;
1492    
# Line 1393  for (;;) Line 1519  for (;;)
1519          BOOL OK;          BOOL OK;
1520          switch (c)          switch (c)
1521            {            {
1522            case 0x09:      /* HT */            HSPACE_CASES:
           case 0x20:      /* SPACE */  
           case 0xa0:      /* NBSP */  
           case 0x1680:    /* OGHAM SPACE MARK */  
           case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */  
           case 0x2000:    /* EN QUAD */  
           case 0x2001:    /* EM QUAD */  
           case 0x2002:    /* EN SPACE */  
           case 0x2003:    /* EM SPACE */  
           case 0x2004:    /* THREE-PER-EM SPACE */  
           case 0x2005:    /* FOUR-PER-EM SPACE */  
           case 0x2006:    /* SIX-PER-EM SPACE */  
           case 0x2007:    /* FIGURE SPACE */  
           case 0x2008:    /* PUNCTUATION SPACE */  
           case 0x2009:    /* THIN SPACE */  
           case 0x200A:    /* HAIR SPACE */  
           case 0x202f:    /* NARROW NO-BREAK SPACE */  
           case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */  
           case 0x3000:    /* IDEOGRAPHIC SPACE */  
1523            OK = TRUE;            OK = TRUE;
1524            break;            break;
1525    
# Line 1452  for (;;) Line 1560  for (;;)
1560        if (clen > 0)        if (clen > 0)
1561          {          {
1562          BOOL OK;          BOOL OK;
1563            const pcre_uint32 *cp;
1564          const ucd_record * prop = GET_UCD(c);          const ucd_record * prop = GET_UCD(c);
1565          switch(code[2])          switch(code[2])
1566            {            {
# Line 1465  for (;;) Line 1574  for (;;)
1574            break;            break;
1575    
1576            case PT_GC:            case PT_GC:
1577            OK = _pcre_ucp_gentype[prop->chartype] == code[3];            OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
1578            break;            break;
1579    
1580            case PT_PC:            case PT_PC:
# Line 1479  for (;;) Line 1588  for (;;)
1588            /* These are specials for combination cases. */            /* These are specials for combination cases. */
1589    
1590            case PT_ALNUM:            case PT_ALNUM:
1591            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1592                 _pcre_ucp_gentype[prop->chartype] == ucp_N;                 PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1593            break;            break;
1594    
1595              /* Perl space used to exclude VT, but from Perl 5.18 it is included,
1596              which means that Perl space and POSIX space are now identical. PCRE
1597              was changed at release 8.34. */
1598    
1599            case PT_SPACE:    /* Perl space */            case PT_SPACE:    /* Perl space */
           OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||  
                c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;  
           break;  
   
1600            case PT_PXSPACE:  /* POSIX space */            case PT_PXSPACE:  /* POSIX space */
1601            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1602                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
1603                 c == CHAR_FF || c == CHAR_CR;                 c == CHAR_FF || c == CHAR_CR;
1604            break;            break;
1605    
1606            case PT_WORD:            case PT_WORD:
1607            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1608                 _pcre_ucp_gentype[prop->chartype] == ucp_N ||                 PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1609                 c == CHAR_UNDERSCORE;                 c == CHAR_UNDERSCORE;
1610            break;            break;
1611    
1612              case PT_CLIST:
1613              cp = PRIV(ucd_caseless_sets) + code[3];
1614              for (;;)
1615                {
1616                if (c < *cp) { OK = FALSE; break; }
1617                if (c == *cp++) { OK = TRUE; break; }
1618                }
1619              break;
1620    
1621              case PT_UCNC:
1622              OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
1623                   c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
1624                   c >= 0xe000;
1625              break;
1626    
1627            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
1628    
1629            default:            default:
# Line 1535  for (;;) Line 1659  for (;;)
1659        QS2:        QS2:
1660    
1661        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
1662        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0)
1663          {          {
1664          const uschar *nptr = ptr + clen;          int lgb, rgb;
1665            const pcre_uchar *nptr = ptr + clen;
1666          int ncount = 0;          int ncount = 0;
1667          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
1668              codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)              codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)
# Line 1545  for (;;) Line 1670  for (;;)
1670            active_count--;           /* Remove non-match possibility */            active_count--;           /* Remove non-match possibility */
1671            next_active_state--;            next_active_state--;
1672            }            }
1673            lgb = UCD_GRAPHBREAK(c);
1674          while (nptr < end_subject)          while (nptr < end_subject)
1675            {            {
1676            int nd;            dlen = 1;
1677            int ndlen = 1;            if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
1678            GETCHARLEN(nd, nptr, ndlen);            rgb = UCD_GRAPHBREAK(d);
1679            if (UCD_CATEGORY(nd) != ucp_M) break;            if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
1680            ncount++;            ncount++;
1681            nptr += ndlen;            lgb = rgb;
1682              nptr += dlen;
1683            }            }
1684          ADD_NEW_DATA(-(state_offset + count), 0, ncount);          ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1685          }          }
# Line 1578  for (;;) Line 1705  for (;;)
1705          int ncount = 0;          int ncount = 0;
1706          switch (c)          switch (c)
1707            {            {
1708            case 0x000b:            case CHAR_VT:
1709            case 0x000c:            case CHAR_FF:
1710            case 0x0085:            case CHAR_NEL:
1711    #ifndef EBCDIC
1712            case 0x2028:            case 0x2028:
1713            case 0x2029:            case 0x2029:
1714    #endif  /* Not EBCDIC */
1715            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1716            goto ANYNL02;            goto ANYNL02;
1717    
1718            case 0x000d:            case CHAR_CR:
1719            if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;            if (ptr + 1 < end_subject && RAWUCHARTEST(ptr + 1) == CHAR_LF) ncount = 1;
1720            /* Fall through */            /* Fall through */
1721    
1722            ANYNL02:            ANYNL02:
1723            case 0x000a:            case CHAR_LF:
1724            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||
1725                codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)                codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
1726              {              {
1727              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
1728              next_active_state--;              next_active_state--;
1729              }              }
1730            ADD_NEW_DATA(-(state_offset + count), 0, ncount);            ADD_NEW_DATA(-(state_offset + (int)count), 0, ncount);
1731            break;            break;
1732    
1733            default:            default:
# Line 1626  for (;;) Line 1755  for (;;)
1755          BOOL OK;          BOOL OK;
1756          switch (c)          switch (c)
1757            {            {
1758            case 0x000a:            VSPACE_CASES:
           case 0x000b:  
           case 0x000c:  
           case 0x000d:  
           case 0x0085:  
           case 0x2028:  
           case 0x2029:  
1759            OK = TRUE;            OK = TRUE;
1760            break;            break;
1761    
# Line 1648  for (;;) Line 1771  for (;;)
1771              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
1772              next_active_state--;              next_active_state--;
1773              }              }
1774            ADD_NEW_DATA(-(state_offset + count), 0, 0);            ADD_NEW_DATA(-(state_offset + (int)count), 0, 0);
1775            }            }
1776          }          }
1777        break;        break;
# Line 1672  for (;;) Line 1795  for (;;)
1795          BOOL OK;          BOOL OK;
1796          switch (c)          switch (c)
1797            {            {
1798            case 0x09:      /* HT */            HSPACE_CASES:
           case 0x20:      /* SPACE */  
           case 0xa0:      /* NBSP */  
           case 0x1680:    /* OGHAM SPACE MARK */  
           case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */  
           case 0x2000:    /* EN QUAD */  
           case 0x2001:    /* EM QUAD */  
           case 0x2002:    /* EN SPACE */  
           case 0x2003:    /* EM SPACE */  
           case 0x2004:    /* THREE-PER-EM SPACE */  
           case 0x2005:    /* FOUR-PER-EM SPACE */  
           case 0x2006:    /* SIX-PER-EM SPACE */  
           case 0x2007:    /* FIGURE SPACE */  
           case 0x2008:    /* PUNCTUATION SPACE */  
           case 0x2009:    /* THIN SPACE */  
           case 0x200A:    /* HAIR SPACE */  
           case 0x202f:    /* NARROW NO-BREAK SPACE */  
           case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */  
           case 0x3000:    /* IDEOGRAPHIC SPACE */  
1799            OK = TRUE;            OK = TRUE;
1800            break;            break;
1801    
# Line 1707  for (;;) Line 1812  for (;;)
1812              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
1813              next_active_state--;              next_active_state--;
1814              }              }
1815            ADD_NEW_DATA(-(state_offset + count), 0, 0);            ADD_NEW_DATA(-(state_offset + (int)count), 0, 0);
1816            }            }
1817          }          }
1818        break;        break;
# Line 1719  for (;;) Line 1824  for (;;)
1824        case OP_PROP_EXTRA + OP_TYPEMINUPTO:        case OP_PROP_EXTRA + OP_TYPEMINUPTO:
1825        case OP_PROP_EXTRA + OP_TYPEPOSUPTO:        case OP_PROP_EXTRA + OP_TYPEPOSUPTO:
1826        if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)
1827          { ADD_ACTIVE(state_offset + 6, 0); }          { ADD_ACTIVE(state_offset + 1 + IMM2_SIZE + 3, 0); }
1828        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1829        if (clen > 0)        if (clen > 0)
1830          {          {
1831          BOOL OK;          BOOL OK;
1832            const pcre_uint32 *cp;
1833          const ucd_record * prop = GET_UCD(c);          const ucd_record * prop = GET_UCD(c);
1834          switch(code[4])          switch(code[1 + IMM2_SIZE + 1])
1835            {            {
1836            case PT_ANY:            case PT_ANY:
1837            OK = TRUE;            OK = TRUE;
# Line 1737  for (;;) Line 1843  for (;;)
1843            break;            break;
1844    
1845            case PT_GC:            case PT_GC:
1846            OK = _pcre_ucp_gentype[prop->chartype] == code[5];            OK = PRIV(ucp_gentype)[prop->chartype] == code[1 + IMM2_SIZE + 2];
1847            break;            break;
1848    
1849            case PT_PC:            case PT_PC:
1850            OK = prop->chartype == code[5];            OK = prop->chartype == code[1 + IMM2_SIZE + 2];
1851            break;            break;
1852    
1853            case PT_SC:            case PT_SC:
1854            OK = prop->script == code[5];            OK = prop->script == code[1 + IMM2_SIZE + 2];
1855            break;            break;
1856    
1857            /* These are specials for combination cases. */            /* These are specials for combination cases. */
1858    
1859            case PT_ALNUM:            case PT_ALNUM:
1860            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1861                 _pcre_ucp_gentype[prop->chartype] == ucp_N;                 PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1862            break;            break;
1863    
1864              /* Perl space used to exclude VT, but from Perl 5.18 it is included,
1865              which means that Perl space and POSIX space are now identical. PCRE
1866              was changed at release 8.34. */
1867    
1868            case PT_SPACE:    /* Perl space */            case PT_SPACE:    /* Perl space */
           OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||  
                c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;  
           break;  
   
1869            case PT_PXSPACE:  /* POSIX space */            case PT_PXSPACE:  /* POSIX space */
1870            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1871                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
1872                 c == CHAR_FF || c == CHAR_CR;                 c == CHAR_FF || c == CHAR_CR;
1873            break;            break;
1874    
1875            case PT_WORD:            case PT_WORD:
1876            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1877                 _pcre_ucp_gentype[prop->chartype] == ucp_N ||                 PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1878                 c == CHAR_UNDERSCORE;                 c == CHAR_UNDERSCORE;
1879            break;            break;
1880    
1881              case PT_CLIST:
1882              cp = PRIV(ucd_caseless_sets) + code[1 + IMM2_SIZE + 2];
1883              for (;;)
1884                {
1885                if (c < *cp) { OK = FALSE; break; }
1886                if (c == *cp++) { OK = TRUE; break; }
1887                }
1888              break;
1889    
1890              case PT_UCNC:
1891              OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
1892                   c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
1893                   c >= 0xe000;
1894              break;
1895    
1896            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
1897    
1898            default:            default:
# Line 1786  for (;;) Line 1907  for (;;)
1907              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
1908              next_active_state--;              next_active_state--;
1909              }              }
1910            if (++count >= GET2(code, 1))            if (++count >= (int)GET2(code, 1))
1911              { ADD_NEW(state_offset + 6, 0); }              { ADD_NEW(state_offset + 1 + IMM2_SIZE + 3, 0); }
1912            else            else
1913              { ADD_NEW(state_offset, count); }              { ADD_NEW(state_offset, count); }
1914            }            }
# Line 1800  for (;;) Line 1921  for (;;)
1921        case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:        case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:
1922        case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:        case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:
1923        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
1924          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1925        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1926        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0)
1927          {          {
1928          const uschar *nptr = ptr + clen;          int lgb, rgb;
1929            const pcre_uchar *nptr = ptr + clen;
1930          int ncount = 0;          int ncount = 0;
1931          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
1932            {            {
1933            active_count--;           /* Remove non-match possibility */            active_count--;           /* Remove non-match possibility */
1934            next_active_state--;            next_active_state--;
1935            }            }
1936            lgb = UCD_GRAPHBREAK(c);
1937          while (nptr < end_subject)          while (nptr < end_subject)
1938            {            {
1939            int nd;            dlen = 1;
1940            int ndlen = 1;            if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
1941            GETCHARLEN(nd, nptr, ndlen);            rgb = UCD_GRAPHBREAK(d);
1942            if (UCD_CATEGORY(nd) != ucp_M) break;            if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
1943            ncount++;            ncount++;
1944            nptr += ndlen;            lgb = rgb;
1945              nptr += dlen;
1946            }            }
1947          if (++count >= GET2(code, 1))          if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
1948            { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }              reset_could_continue = TRUE;
1949            if (++count >= (int)GET2(code, 1))
1950              { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
1951          else          else
1952            { ADD_NEW_DATA(-state_offset, count, ncount); }            { ADD_NEW_DATA(-state_offset, count, ncount); }
1953          }          }
# Line 1834  for (;;) Line 1960  for (;;)
1960        case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:        case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:
1961        case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:        case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:
1962        if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)
1963          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1964        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1965        if (clen > 0)        if (clen > 0)
1966          {          {
1967          int ncount = 0;          int ncount = 0;
1968          switch (c)          switch (c)
1969            {            {
1970            case 0x000b:            case CHAR_VT:
1971            case 0x000c:            case CHAR_FF:
1972            case 0x0085:            case CHAR_NEL:
1973    #ifndef EBCDIC
1974            case 0x2028:            case 0x2028:
1975            case 0x2029:            case 0x2029:
1976    #endif  /* Not EBCDIC */
1977            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1978            goto ANYNL03;            goto ANYNL03;
1979    
1980            case 0x000d:            case CHAR_CR:
1981            if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;            if (ptr + 1 < end_subject && RAWUCHARTEST(ptr + 1) == CHAR_LF) ncount = 1;
1982            /* Fall through */            /* Fall through */
1983    
1984            ANYNL03:            ANYNL03:
1985            case 0x000a:            case CHAR_LF:
1986            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)
1987              {              {
1988              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
1989              next_active_state--;              next_active_state--;
1990              }              }
1991            if (++count >= GET2(code, 1))            if (++count >= (int)GET2(code, 1))
1992              { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }              { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
1993            else            else
1994              { ADD_NEW_DATA(-state_offset, count, ncount); }              { ADD_NEW_DATA(-state_offset, count, ncount); }
1995            break;            break;
# Line 1878  for (;;) Line 2006  for (;;)
2006        case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:        case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:
2007        case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:        case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:
2008        if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)
2009          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
2010        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
2011        if (clen > 0)        if (clen > 0)
2012          {          {
2013          BOOL OK;          BOOL OK;
2014          switch (c)          switch (c)
2015            {            {
2016            case 0x000a:            VSPACE_CASES:
           case 0x000b:  
           case 0x000c:  
           case 0x000d:  
           case 0x0085:  
           case 0x2028:  
           case 0x2029:  
2017            OK = TRUE;            OK = TRUE;
2018            break;            break;
2019    
# Line 1906  for (;;) Line 2028  for (;;)
2028              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
2029              next_active_state--;              next_active_state--;
2030              }              }
2031            if (++count >= GET2(code, 1))            if (++count >= (int)GET2(code, 1))
2032              { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }              { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
2033            else            else
2034              { ADD_NEW_DATA(-state_offset, count, 0); }              { ADD_NEW_DATA(-state_offset, count, 0); }
2035            }            }
# Line 1920  for (;;) Line 2042  for (;;)
2042        case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:        case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:
2043        case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:        case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:
2044        if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)
2045          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
2046        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
2047        if (clen > 0)        if (clen > 0)
2048          {          {
2049          BOOL OK;          BOOL OK;
2050          switch (c)          switch (c)
2051            {            {
2052            case 0x09:      /* HT */            HSPACE_CASES:
           case 0x20:      /* SPACE */  
           case 0xa0:      /* NBSP */  
           case 0x1680:    /* OGHAM SPACE MARK */  
           case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */  
           case 0x2000:    /* EN QUAD */  
           case 0x2001:    /* EM QUAD */  
           case 0x2002:    /* EN SPACE */  
           case 0x2003:    /* EM SPACE */  
           case 0x2004:    /* THREE-PER-EM SPACE */  
           case 0x2005:    /* FOUR-PER-EM SPACE */  
           case 0x2006:    /* SIX-PER-EM SPACE */  
           case 0x2007:    /* FIGURE SPACE */  
           case 0x2008:    /* PUNCTUATION SPACE */  
           case 0x2009:    /* THIN SPACE */  
           case 0x200A:    /* HAIR SPACE */  
           case 0x202f:    /* NARROW NO-BREAK SPACE */  
           case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */  
           case 0x3000:    /* IDEOGRAPHIC SPACE */  
2053            OK = TRUE;            OK = TRUE;
2054            break;            break;
2055    
# Line 1961  for (;;) Line 2065  for (;;)
2065              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
2066              next_active_state--;              next_active_state--;
2067              }              }
2068            if (++count >= GET2(code, 1))            if (++count >= (int)GET2(code, 1))
2069              { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }              { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
2070            else            else
2071              { ADD_NEW_DATA(-state_offset, count, 0); }              { ADD_NEW_DATA(-state_offset, count, 0); }
2072            }            }
# Line 1984  for (;;) Line 2088  for (;;)
2088        case OP_CHARI:        case OP_CHARI:
2089        if (clen == 0) break;        if (clen == 0) break;
2090    
2091  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2092        if (utf8)        if (utf)
2093          {          {
2094          if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else          if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
2095            {            {
2096            unsigned int othercase;            unsigned int othercase;
2097            if (c < 128) othercase = fcc[c]; else            if (c < 128)
2098                othercase = fcc[c];
2099            /* If we have Unicode property support, we can use it to test the            else
2100            other case of the character. */              /* If we have Unicode property support, we can use it to test the
2101                other case of the character. */
2102  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2103            othercase = UCD_OTHERCASE(c);              othercase = UCD_OTHERCASE(c);
2104  #else  #else
2105            othercase = NOTACHAR;              othercase = NOTACHAR;
2106  #endif  #endif
2107    
2108            if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }            if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }
2109            }            }
2110          }          }
2111        else        else
2112  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2113          /* Not UTF mode */
       /* Non-UTF-8 mode */  
2114          {          {
2115          if (lcc[c] == lcc[d]) { ADD_NEW(state_offset + 2, 0); }          if (TABLE_GET(c, lcc, c) == TABLE_GET(d, lcc, d))
2116              { ADD_NEW(state_offset + 2, 0); }
2117          }          }
2118        break;        break;
2119    
# Line 2021  for (;;) Line 2125  for (;;)
2125        to wait for them to pass before continuing. */        to wait for them to pass before continuing. */
2126    
2127        case OP_EXTUNI:        case OP_EXTUNI:
2128        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0)
2129          {          {
2130          const uschar *nptr = ptr + clen;          int lgb, rgb;
2131            const pcre_uchar *nptr = ptr + clen;
2132          int ncount = 0;          int ncount = 0;
2133            lgb = UCD_GRAPHBREAK(c);
2134          while (nptr < end_subject)          while (nptr < end_subject)
2135            {            {
2136            int nclen = 1;            dlen = 1;
2137            GETCHARLEN(c, nptr, nclen);            if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
2138            if (UCD_CATEGORY(c) != ucp_M) break;            rgb = UCD_GRAPHBREAK(d);
2139              if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
2140            ncount++;            ncount++;
2141            nptr += nclen;            lgb = rgb;
2142              nptr += dlen;
2143            }            }
2144            if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
2145                reset_could_continue = TRUE;
2146          ADD_NEW_DATA(-(state_offset + 1), 0, ncount);          ADD_NEW_DATA(-(state_offset + 1), 0, ncount);
2147          }          }
2148        break;        break;
# Line 2046  for (;;) Line 2156  for (;;)
2156        case OP_ANYNL:        case OP_ANYNL:
2157        if (clen > 0) switch(c)        if (clen > 0) switch(c)
2158          {          {
2159          case 0x000b:          case CHAR_VT:
2160          case 0x000c:          case CHAR_FF:
2161          case 0x0085:          case CHAR_NEL:
2162    #ifndef EBCDIC
2163          case 0x2028:          case 0x2028:
2164          case 0x2029:          case 0x2029:
2165    #endif  /* Not EBCDIC */
2166          if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;          if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
2167    
2168          case 0x000a:          case CHAR_LF:
2169          ADD_NEW(state_offset + 1, 0);          ADD_NEW(state_offset + 1, 0);
2170          break;          break;
2171    
2172          case 0x000d:          case CHAR_CR:
2173          if (ptr + 1 < end_subject && ptr[1] == 0x0a)          if (ptr + 1 >= end_subject)
2174              {
2175              ADD_NEW(state_offset + 1, 0);
2176              if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
2177                reset_could_continue = TRUE;
2178              }
2179            else if (RAWUCHARTEST(ptr + 1) == CHAR_LF)
2180            {            {
2181            ADD_NEW_DATA(-(state_offset + 1), 0, 1);            ADD_NEW_DATA(-(state_offset + 1), 0, 1);
2182            }            }
# Line 2074  for (;;) Line 2192  for (;;)
2192        case OP_NOT_VSPACE:        case OP_NOT_VSPACE:
2193        if (clen > 0) switch(c)        if (clen > 0) switch(c)
2194          {          {
2195          case 0x000a:          VSPACE_CASES:
         case 0x000b:  
         case 0x000c:  
         case 0x000d:  
         case 0x0085:  
         case 0x2028:  
         case 0x2029:  
2196          break;          break;
2197    
2198          default:          default:
# Line 2093  for (;;) Line 2205  for (;;)
2205        case OP_VSPACE:        case OP_VSPACE:
2206        if (clen > 0) switch(c)        if (clen > 0) switch(c)
2207          {          {
2208          case 0x000a:          VSPACE_CASES:
         case 0x000b:  
         case 0x000c:  
         case 0x000d:  
         case 0x0085:  
         case 0x2028:  
         case 0x2029:  
2209          ADD_NEW(state_offset + 1, 0);          ADD_NEW(state_offset + 1, 0);
2210          break;          break;
2211    
2212          default: break;          default:
2213            break;
2214          }          }
2215        break;        break;
2216    
# Line 2111  for (;;) Line 2218  for (;;)
2218        case OP_NOT_HSPACE:        case OP_NOT_HSPACE:
2219        if (clen > 0) switch(c)        if (clen > 0) switch(c)
2220          {          {
2221          case 0x09:      /* HT */          HSPACE_CASES:
         case 0x20:      /* SPACE */  
         case 0xa0:      /* NBSP */  
         case 0x1680:    /* OGHAM SPACE MARK */  
         case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */  
         case 0x2000:    /* EN QUAD */  
         case 0x2001:    /* EM QUAD */  
         case 0x2002:    /* EN SPACE */  
         case 0x2003:    /* EM SPACE */  
         case 0x2004:    /* THREE-PER-EM SPACE */  
         case 0x2005:    /* FOUR-PER-EM SPACE */  
         case 0x2006:    /* SIX-PER-EM SPACE */  
         case 0x2007:    /* FIGURE SPACE */  
         case 0x2008:    /* PUNCTUATION SPACE */  
         case 0x2009:    /* THIN SPACE */  
         case 0x200A:    /* HAIR SPACE */  
         case 0x202f:    /* NARROW NO-BREAK SPACE */  
         case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */  
         case 0x3000:    /* IDEOGRAPHIC SPACE */  
2222          break;          break;
2223    
2224          default:          default:
# Line 2142  for (;;) Line 2231  for (;;)
2231        case OP_HSPACE:        case OP_HSPACE:
2232        if (clen > 0) switch(c)        if (clen > 0) switch(c)
2233          {          {
2234          case 0x09:      /* HT */          HSPACE_CASES:
         case 0x20:      /* SPACE */  
         case 0xa0:      /* NBSP */  
         case 0x1680:    /* OGHAM SPACE MARK */  
         case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */  
         case 0x2000:    /* EN QUAD */  
         case 0x2001:    /* EM QUAD */  
         case 0x2002:    /* EN SPACE */  
         case 0x2003:    /* EM SPACE */  
         case 0x2004:    /* THREE-PER-EM SPACE */  
         case 0x2005:    /* FOUR-PER-EM SPACE */  
         case 0x2006:    /* SIX-PER-EM SPACE */  
         case 0x2007:    /* FIGURE SPACE */  
         case 0x2008:    /* PUNCTUATION SPACE */  
         case 0x2009:    /* THIN SPACE */  
         case 0x200A:    /* HAIR SPACE */  
         case 0x202f:    /* NARROW NO-BREAK SPACE */  
         case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */  
         case 0x3000:    /* IDEOGRAPHIC SPACE */  
2235          ADD_NEW(state_offset + 1, 0);          ADD_NEW(state_offset + 1, 0);
2236          break;          break;
2237    
2238            default:
2239            break;
2240          }          }
2241        break;        break;
2242    
2243        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2244        /* Match a negated single character casefully. This is only used for        /* Match a negated single character casefully. */
       one-byte characters, that is, we know that d < 256. The character we are  
       checking (c) can be multibyte. */  
2245    
2246        case OP_NOT:        case OP_NOT:
2247        if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); }        if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); }
2248        break;        break;
2249    
2250        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2251        /* Match a negated single character caselessly. This is only used for        /* Match a negated single character caselessly. */
       one-byte characters, that is, we know that d < 256. The character we are  
       checking (c) can be multibyte. */  
2252    
2253        case OP_NOTI:        case OP_NOTI:
2254        if (clen > 0 && c != d && c != fcc[d])        if (clen > 0)
2255          { ADD_NEW(state_offset + dlen + 1, 0); }          {
2256            unsigned int otherd;
2257    #ifdef SUPPORT_UTF
2258            if (utf && d >= 128)
2259              {
2260    #ifdef SUPPORT_UCP
2261              otherd = UCD_OTHERCASE(d);
2262    #endif  /* SUPPORT_UCP */
2263              }
2264            else
2265    #endif  /* SUPPORT_UTF */
2266            otherd = TABLE_GET(d, fcc, d);
2267            if (c != d && c != otherd)
2268              { ADD_NEW(state_offset + dlen + 1, 0); }
2269            }
2270        break;        break;
2271    
2272        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
# Line 2206  for (;;) Line 2290  for (;;)
2290        if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }
2291        if (clen > 0)        if (clen > 0)
2292          {          {
2293          unsigned int otherd = NOTACHAR;          pcre_uint32 otherd = NOTACHAR;
2294          if (caseless)          if (caseless)
2295            {            {
2296  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2297            if (utf8 && d >= 128)            if (utf && d >= 128)
2298              {              {
2299  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2300              otherd = UCD_OTHERCASE(d);              otherd = UCD_OTHERCASE(d);
2301  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2302              }              }
2303            else            else
2304  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2305            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2306            }            }
2307          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2308            {            {
# Line 2253  for (;;) Line 2337  for (;;)
2337        ADD_ACTIVE(state_offset + dlen + 1, 0);        ADD_ACTIVE(state_offset + dlen + 1, 0);
2338        if (clen > 0)        if (clen > 0)
2339          {          {
2340          unsigned int otherd = NOTACHAR;          pcre_uint32 otherd = NOTACHAR;
2341          if (caseless)          if (caseless)
2342            {            {
2343  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2344            if (utf8 && d >= 128)            if (utf && d >= 128)
2345              {              {
2346  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2347              otherd = UCD_OTHERCASE(d);              otherd = UCD_OTHERCASE(d);
2348  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2349              }              }
2350            else            else
2351  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2352            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2353            }            }
2354          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2355            {            {
# Line 2298  for (;;) Line 2382  for (;;)
2382        ADD_ACTIVE(state_offset + dlen + 1, 0);        ADD_ACTIVE(state_offset + dlen + 1, 0);
2383        if (clen > 0)        if (clen > 0)
2384          {          {
2385          unsigned int otherd = NOTACHAR;          pcre_uint32 otherd = NOTACHAR;
2386          if (caseless)          if (caseless)
2387            {            {
2388  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2389            if (utf8 && d >= 128)            if (utf && d >= 128)
2390              {              {
2391  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2392              otherd = UCD_OTHERCASE(d);              otherd = UCD_OTHERCASE(d);
2393  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2394              }              }
2395            else            else
2396  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2397            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2398            }            }
2399          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2400            {            {
# Line 2335  for (;;) Line 2419  for (;;)
2419        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
2420        if (clen > 0)        if (clen > 0)
2421          {          {
2422          unsigned int otherd = NOTACHAR;          pcre_uint32 otherd = NOTACHAR;
2423          if (caseless)          if (caseless)
2424            {            {
2425  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2426            if (utf8 && d >= 128)            if (utf && d >= 128)
2427              {              {
2428  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2429              otherd = UCD_OTHERCASE(d);              otherd = UCD_OTHERCASE(d);
2430  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2431              }              }
2432            else            else
2433  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2434            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2435            }            }
2436          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2437            {            {
2438            if (++count >= GET2(code, 1))            if (++count >= (int)GET2(code, 1))
2439              { ADD_NEW(state_offset + dlen + 3, 0); }              { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
2440            else            else
2441              { ADD_NEW(state_offset, count); }              { ADD_NEW(state_offset, count); }
2442            }            }
# Line 2375  for (;;) Line 2459  for (;;)
2459        case OP_NOTUPTO:        case OP_NOTUPTO:
2460        case OP_NOTMINUPTO:        case OP_NOTMINUPTO:
2461        case OP_NOTPOSUPTO:        case OP_NOTPOSUPTO:
2462        ADD_ACTIVE(state_offset + dlen + 3, 0);        ADD_ACTIVE(state_offset + dlen + 1 + IMM2_SIZE, 0);
2463        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
2464        if (clen > 0)        if (clen > 0)
2465          {          {
2466          unsigned int otherd = NOTACHAR;          pcre_uint32 otherd = NOTACHAR;
2467          if (caseless)          if (caseless)
2468            {            {
2469  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2470            if (utf8 && d >= 128)            if (utf && d >= 128)
2471              {              {
2472  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2473              otherd = UCD_OTHERCASE(d);              otherd = UCD_OTHERCASE(d);
2474  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2475              }              }
2476            else            else
2477  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2478            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2479            }            }
2480          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2481            {            {
# Line 2400  for (;;) Line 2484  for (;;)
2484              active_count--;             /* Remove non-match possibility */              active_count--;             /* Remove non-match possibility */
2485              next_active_state--;              next_active_state--;
2486              }              }
2487            if (++count >= GET2(code, 1))            if (++count >= (int)GET2(code, 1))
2488              { ADD_NEW(state_offset + dlen + 3, 0); }              { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
2489            else            else
2490              { ADD_NEW(state_offset, count); }              { ADD_NEW(state_offset, count); }
2491            }            }
# Line 2418  for (;;) Line 2502  for (;;)
2502          {          {
2503          BOOL isinclass = FALSE;          BOOL isinclass = FALSE;
2504          int next_state_offset;          int next_state_offset;
2505          const uschar *ecode;          const pcre_uchar *ecode;
2506    
2507          /* For a simple class, there is always just a 32-byte table, and we          /* For a simple class, there is always just a 32-byte table, and we
2508          can set isinclass from it. */          can set isinclass from it. */
2509    
2510          if (codevalue != OP_XCLASS)          if (codevalue != OP_XCLASS)
2511            {            {
2512            ecode = code + 33;            ecode = code + 1 + (32 / sizeof(pcre_uchar));
2513            if (clen > 0)            if (clen > 0)
2514              {              {
2515              isinclass = (c > 255)? (codevalue == OP_NCLASS) :              isinclass = (c > 255)? (codevalue == OP_NCLASS) :
2516                ((code[1 + c/8] & (1 << (c&7))) != 0);                ((((pcre_uint8 *)(code + 1))[c/8] & (1 << (c&7))) != 0);
2517              }              }
2518            }            }
2519    
# Line 2440  for (;;) Line 2524  for (;;)
2524          else          else
2525           {           {
2526           ecode = code + GET(code, 1);           ecode = code + GET(code, 1);
2527           if (clen > 0) isinclass = _pcre_xclass(c, code + 1 + LINK_SIZE);           if (clen > 0) isinclass = PRIV(xclass)(c, code + 1 + LINK_SIZE, utf);
2528           }           }
2529    
2530          /* At this point, isinclass is set for all kinds of class, and ecode          /* At this point, isinclass is set for all kinds of class, and ecode
# Line 2473  for (;;) Line 2557  for (;;)
2557            case OP_CRRANGE:            case OP_CRRANGE:
2558            case OP_CRMINRANGE:            case OP_CRMINRANGE:
2559            count = current_state->count;  /* Already matched */            count = current_state->count;  /* Already matched */
2560            if (count >= GET2(ecode, 1))            if (count >= (int)GET2(ecode, 1))
2561              { ADD_ACTIVE(next_state_offset + 5, 0); }              { ADD_ACTIVE(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
2562            if (isinclass)            if (isinclass)
2563              {              {
2564              int max = GET2(ecode, 3);              int max = (int)GET2(ecode, 1 + IMM2_SIZE);
2565              if (++count >= max && max != 0)   /* Max 0 => no limit */              if (++count >= max && max != 0)   /* Max 0 => no limit */
2566                { ADD_NEW(next_state_offset + 5, 0); }                { ADD_NEW(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
2567              else              else
2568                { ADD_NEW(state_offset, count); }                { ADD_NEW(state_offset, count); }
2569              }              }
# Line 2510  for (;;) Line 2594  for (;;)
2594          int rc;          int rc;
2595          int local_offsets[2];          int local_offsets[2];
2596          int local_workspace[1000];          int local_workspace[1000];
2597          const uschar *endasscode = code + GET(code, 1);          const pcre_uchar *endasscode = code + GET(code, 1);
2598    
2599          while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);          while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
2600    
# Line 2547  for (;;) Line 2631  for (;;)
2631          if (code[LINK_SIZE+1] == OP_CALLOUT)          if (code[LINK_SIZE+1] == OP_CALLOUT)
2632            {            {
2633            rrc = 0;            rrc = 0;
2634            if (pcre_callout != NULL)            if (PUBL(callout) != NULL)
2635              {              {
2636              pcre_callout_block cb;              PUBL(callout_block) cb;
2637              cb.version          = 1;   /* Version 1 of the callout block */              cb.version          = 1;   /* Version 1 of the callout block */
2638              cb.callout_number   = code[LINK_SIZE+2];              cb.callout_number   = code[LINK_SIZE+2];
2639              cb.offset_vector    = offsets;              cb.offset_vector    = offsets;
2640    #if defined COMPILE_PCRE8
2641              cb.subject          = (PCRE_SPTR)start_subject;              cb.subject          = (PCRE_SPTR)start_subject;
2642    #elif defined COMPILE_PCRE16
2643                cb.subject          = (PCRE_SPTR16)start_subject;
2644    #elif defined COMPILE_PCRE32
2645                cb.subject          = (PCRE_SPTR32)start_subject;
2646    #endif
2647              cb.subject_length   = (int)(end_subject - start_subject);              cb.subject_length   = (int)(end_subject - start_subject);
2648              cb.start_match      = (int)(current_subject - start_subject);              cb.start_match      = (int)(current_subject - start_subject);
2649              cb.current_position = (int)(ptr - start_subject);              cb.current_position = (int)(ptr - start_subject);
# Line 2563  for (;;) Line 2653  for (;;)
2653              cb.capture_last     = -1;              cb.capture_last     = -1;
2654              cb.callout_data     = md->callout_data;              cb.callout_data     = md->callout_data;
2655              cb.mark             = NULL;   /* No (*MARK) support */              cb.mark             = NULL;   /* No (*MARK) support */
2656              if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */              if ((rrc = (*PUBL(callout))(&cb)) < 0) return rrc;   /* Abandon */
2657              }              }
2658            if (rrc > 0) break;                      /* Fail this thread */            if (rrc > 0) break;                      /* Fail this thread */
2659            code += _pcre_OP_lengths[OP_CALLOUT];    /* Skip callout data */            code += PRIV(OP_lengths)[OP_CALLOUT];    /* Skip callout data */
2660            }            }
2661    
2662          condcode = code[LINK_SIZE+1];          condcode = code[LINK_SIZE+1];
# Line 2587  for (;;) Line 2677  for (;;)
2677    
2678          else if (condcode == OP_RREF || condcode == OP_NRREF)          else if (condcode == OP_RREF || condcode == OP_NRREF)
2679            {            {
2680            int value = GET2(code, LINK_SIZE+2);            int value = GET2(code, LINK_SIZE + 2);
2681            if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;            if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
2682            if (md->recursive != NULL)            if (md->recursive != NULL)
2683              { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }              { ADD_ACTIVE(state_offset + LINK_SIZE + 2 + IMM2_SIZE, 0); }
2684            else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }            else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
2685            }            }
2686    
# Line 2599  for (;;) Line 2689  for (;;)
2689          else          else
2690            {            {
2691            int rc;            int rc;
2692            const uschar *asscode = code + LINK_SIZE + 1;            const pcre_uchar *asscode = code + LINK_SIZE + 1;
2693            const uschar *endasscode = asscode + GET(asscode, 1);            const pcre_uchar *endasscode = asscode + GET(asscode, 1);
2694    
2695            while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);            while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
2696    
# Line 2631  for (;;) Line 2721  for (;;)
2721          dfa_recursion_info *ri;          dfa_recursion_info *ri;
2722          int local_offsets[1000];          int local_offsets[1000];
2723          int local_workspace[1000];          int local_workspace[1000];
2724          const uschar *callpat = start_code + GET(code, 1);          const pcre_uchar *callpat = start_code + GET(code, 1);
2725          int recno = (callpat == md->start_code)? 0 :          int recno = (callpat == md->start_code)? 0 :
2726            GET2(callpat, 1 + LINK_SIZE);            GET2(callpat, 1 + LINK_SIZE);
2727          int rc;          int rc;
# Line 2682  for (;;) Line 2772  for (;;)
2772            {            {
2773            for (rc = rc*2 - 2; rc >= 0; rc -= 2)            for (rc = rc*2 - 2; rc >= 0; rc -= 2)
2774              {              {
             const uschar *p = start_subject + local_offsets[rc];  
             const uschar *pp = start_subject + local_offsets[rc+1];  
2775              int charcount = local_offsets[rc+1] - local_offsets[rc];              int charcount = local_offsets[rc+1] - local_offsets[rc];
2776              while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2777                if (utf)
2778                  {
2779                  const pcre_uchar *p = start_subject + local_offsets[rc];
2780                  const pcre_uchar *pp = start_subject + local_offsets[rc+1];
2781                  while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2782                  }
2783    #endif
2784              if (charcount > 0)              if (charcount > 0)
2785                {                {
2786                ADD_NEW_DATA(-(state_offset + LINK_SIZE + 1), 0, (charcount - 1));                ADD_NEW_DATA(-(state_offset + LINK_SIZE + 1), 0, (charcount - 1));
# Line 2708  for (;;) Line 2803  for (;;)
2803        case OP_BRAPOSZERO:        case OP_BRAPOSZERO:
2804          {          {
2805          int charcount, matched_count;          int charcount, matched_count;
2806          const uschar *local_ptr = ptr;          const pcre_uchar *local_ptr = ptr;
2807          BOOL allow_zero;          BOOL allow_zero;
2808    
2809          if (codevalue == OP_BRAPOSZERO)          if (codevalue == OP_BRAPOSZERO)
# Line 2758  for (;;) Line 2853  for (;;)
2853    
2854          if (matched_count > 0 || allow_zero)          if (matched_count > 0 || allow_zero)
2855            {            {
2856            const uschar *end_subpattern = code;            const pcre_uchar *end_subpattern = code;
2857            int next_state_offset;            int next_state_offset;
2858    
2859            do { end_subpattern += GET(end_subpattern, 1); }            do { end_subpattern += GET(end_subpattern, 1); }
# Line 2779  for (;;) Line 2874  for (;;)
2874              }              }
2875            else            else
2876              {              {
2877              const uschar *p = ptr;              const pcre_uchar *p = ptr;
2878              const uschar *pp = local_ptr;              const pcre_uchar *pp = local_ptr;
2879              charcount = pp - p;              charcount = (int)(pp - p);
2880              while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2881                if (utf) while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2882    #endif
2883              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
2884              }              }
2885            }            }
# Line 2791  for (;;) Line 2888  for (;;)
2888    
2889        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2890        case OP_ONCE:        case OP_ONCE:
2891        case OP_ONCE_NC:        case OP_ONCE_NC:
2892          {          {
2893          int local_offsets[2];          int local_offsets[2];
2894          int local_workspace[1000];          int local_workspace[1000];
# Line 2809  for (;;) Line 2906  for (;;)
2906    
2907          if (rc >= 0)          if (rc >= 0)
2908            {            {
2909            const uschar *end_subpattern = code;            const pcre_uchar *end_subpattern = code;
2910            int charcount = local_offsets[1] - local_offsets[0];            int charcount = local_offsets[1] - local_offsets[0];
2911            int next_state_offset, repeat_state_offset;            int next_state_offset, repeat_state_offset;
2912    
# Line 2862  for (;;) Line 2959  for (;;)
2959              }              }
2960            else            else
2961              {              {
2962              const uschar *p = start_subject + local_offsets[0];  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2963              const uschar *pp = start_subject + local_offsets[1];              if (utf)
2964              while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;                {
2965                  const pcre_uchar *p = start_subject + local_offsets[0];
2966                  const pcre_uchar *pp = start_subject + local_offsets[1];
2967                  while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2968                  }
2969    #endif
2970              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
2971              if (repeat_state_offset >= 0)              if (repeat_state_offset >= 0)
2972                { ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); }                { ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); }
# Line 2880  for (;;) Line 2982  for (;;)
2982    
2983        case OP_CALLOUT:        case OP_CALLOUT:
2984        rrc = 0;        rrc = 0;
2985        if (pcre_callout != NULL)        if (PUBL(callout) != NULL)
2986          {          {
2987          pcre_callout_block cb;          PUBL(callout_block) cb;
2988          cb.version          = 1;   /* Version 1 of the callout block */          cb.version          = 1;   /* Version 1 of the callout block */
2989          cb.callout_number   = code[1];          cb.callout_number   = code[1];
2990          cb.offset_vector    = offsets;          cb.offset_vector    = offsets;
2991    #if defined COMPILE_PCRE8
2992          cb.subject          = (PCRE_SPTR)start_subject;          cb.subject          = (PCRE_SPTR)start_subject;
2993    #elif defined COMPILE_PCRE16
2994            cb.subject          = (PCRE_SPTR16)start_subject;
2995    #elif defined COMPILE_PCRE32
2996            cb.subject          = (PCRE_SPTR32)start_subject;
2997    #endif
2998          cb.subject_length   = (int)(end_subject - start_subject);          cb.subject_length   = (int)(end_subject - start_subject);
2999          cb.start_match      = (int)(current_subject - start_subject);          cb.start_match      = (int)(current_subject - start_subject);
3000          cb.current_position = (int)(ptr - start_subject);          cb.current_position = (int)(ptr - start_subject);
# Line 2896  for (;;) Line 3004  for (;;)
3004          cb.capture_last     = -1;          cb.capture_last     = -1;
3005          cb.callout_data     = md->callout_data;          cb.callout_data     = md->callout_data;
3006          cb.mark             = NULL;   /* No (*MARK) support */          cb.mark             = NULL;   /* No (*MARK) support */
3007          if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */          if ((rrc = (*PUBL(callout))(&cb)) < 0) return rrc;   /* Abandon */
3008          }          }
3009        if (rrc == 0)        if (rrc == 0)
3010          { ADD_ACTIVE(state_offset + _pcre_OP_lengths[OP_CALLOUT], 0); }          { ADD_ACTIVE(state_offset + PRIV(OP_lengths)[OP_CALLOUT], 0); }
3011        break;        break;
3012    
3013    
# Line 2928  for (;;) Line 3036  for (;;)
3036    if (new_count <= 0)    if (new_count <= 0)
3037      {      {
3038      if (rlevel == 1 &&                               /* Top level, and */      if (rlevel == 1 &&                               /* Top level, and */
3039          could_continue &&                            /* Some could go on */          could_continue &&                            /* Some could go on, and */
3040          forced_fail != workspace[1] &&               /* Not all forced fail & */          forced_fail != workspace[1] &&               /* Not all forced fail & */
3041          (                                            /* either... */          (                                            /* either... */
3042          (md->moptions & PCRE_PARTIAL_HARD) != 0      /* Hard partial */          (md->moptions & PCRE_PARTIAL_HARD) != 0      /* Hard partial */
# Line 2936  for (;;) Line 3044  for (;;)
3044          ((md->moptions & PCRE_PARTIAL_SOFT) != 0 &&  /* Soft partial and */          ((md->moptions & PCRE_PARTIAL_SOFT) != 0 &&  /* Soft partial and */
3045           match_count < 0)                            /* no matches */           match_count < 0)                            /* no matches */
3046          ) &&                                         /* And... */          ) &&                                         /* And... */
3047          ptr >= end_subject &&                  /* Reached end of subject */          (
3048          ptr > md->start_used_ptr)              /* Inspected non-empty string */          partial_newline ||                           /* Either partial NL */
3049        {            (                                          /* or ... */
3050        if (offsetcount >= 2)            ptr >= end_subject &&                /* End of subject and */
3051          {            ptr > md->start_used_ptr)            /* Inspected non-empty string */
3052          offsets[0] = (int)(md->start_used_ptr - start_subject);            )
3053          offsets[1] = (int)(end_subject - start_subject);          )
         }  
3054        match_count = PCRE_ERROR_PARTIAL;        match_count = PCRE_ERROR_PARTIAL;
       }  
   
3055      DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"      DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
3056        "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count,        "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count,
3057        rlevel*2-2, SP));        rlevel*2-2, SP));
# Line 2996  Returns:          > 0 => number of match Line 3101  Returns:          > 0 => number of match
3101                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
3102  */  */
3103    
3104    #if defined COMPILE_PCRE8
3105  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
3106  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
3107    const char *subject, int length, int start_offset, int options, int *offsets,    const char *subject, int length, int start_offset, int options, int *offsets,
3108    int offsetcount, int *workspace, int wscount)    int offsetcount, int *workspace, int wscount)
3109    #elif defined COMPILE_PCRE16
3110    PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
3111    pcre16_dfa_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
3112      PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
3113      int offsetcount, int *workspace, int wscount)
3114    #elif defined COMPILE_PCRE32
3115    PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
3116    pcre32_dfa_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
3117      PCRE_SPTR32 subject, int length, int start_offset, int options, int *offsets,
3118      int offsetcount, int *workspace, int wscount)
3119    #endif
3120  {  {
3121  real_pcre *re = (real_pcre *)argument_re;  REAL_PCRE *re = (REAL_PCRE *)argument_re;
3122  dfa_match_data match_block;  dfa_match_data match_block;
3123  dfa_match_data *md = &match_block;  dfa_match_data *md = &match_block;
3124  BOOL utf8, anchored, startline, firstline;  BOOL utf, anchored, startline, firstline;
3125  const uschar *current_subject, *end_subject, *lcc;  const pcre_uchar *current_subject, *end_subject;
   
 pcre_study_data internal_study;  
3126  const pcre_study_data *study = NULL;  const pcre_study_data *study = NULL;
 real_pcre internal_re;  
3127    
3128  const uschar *req_byte_ptr;  const pcre_uchar *req_char_ptr;
3129  const uschar *start_bits = NULL;  const pcre_uint8 *start_bits = NULL;
3130  BOOL first_byte_caseless = FALSE;  BOOL has_first_char = FALSE;
3131  BOOL req_byte_caseless = FALSE;  BOOL has_req_char = FALSE;
3132  int first_byte = -1;  pcre_uchar first_char = 0;
3133  int req_byte = -1;  pcre_uchar first_char2 = 0;
3134  int req_byte2 = -1;  pcre_uchar req_char = 0;
3135    pcre_uchar req_char2 = 0;
3136  int newline;  int newline;
3137    
3138  /* Plausibility checks */  /* Plausibility checks */
# Line 3027  if (re == NULL || subject == NULL || wor Line 3142  if (re == NULL || subject == NULL || wor
3142     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
3143  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
3144  if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;  if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;
3145    if (length < 0) return PCRE_ERROR_BADLENGTH;
3146  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
3147    
3148  /* We need to find the pointer to any study data before we test for byte  /* Check that the first field in the block is the magic number. If it is not,
3149  flipping, so we scan the extra_data block first. This may set two fields in the  return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
3150  match block, so we must initialize them beforehand. However, the other fields  REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
3151  in the match block must not be set until after the byte flipping. */  means that the pattern is likely compiled with different endianness. */
3152    
3153    if (re->magic_number != MAGIC_NUMBER)
3154      return re->magic_number == REVERSED_MAGIC_NUMBER?
3155        PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
3156    if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
3157    
3158    /* If restarting after a partial match, do some sanity checks on the contents
3159    of the workspace. */
3160    
3161    if ((options & PCRE_DFA_RESTART) != 0)
3162      {
3163      if ((workspace[0] & (-2)) != 0 || workspace[1] < 1 ||
3164        workspace[1] > (wscount - 2)/INTS_PER_STATEBLOCK)
3165          return PCRE_ERROR_DFA_BADRESTART;
3166      }
3167    
3168    /* Set up study, callout, and table data */
3169    
3170  md->tables = re->tables;  md->tables = re->tables;
3171  md->callout_data = NULL;  md->callout_data = NULL;
# Line 3051  if (extra_data != NULL) Line 3184  if (extra_data != NULL)
3184      md->tables = extra_data->tables;      md->tables = extra_data->tables;
3185    }    }
3186    
 /* Check that the first field in the block is the magic number. If it is not,  
 test for a regex that was compiled on a host of opposite endianness. If this is  
 the case, flipped values are put in internal_re and internal_study if there was  
 study data too. */  
   
 if (re->magic_number != MAGIC_NUMBER)  
   {  
   re = _pcre_try_flipped(re, &internal_re, study, &internal_study);  
   if (re == NULL) return PCRE_ERROR_BADMAGIC;  
   if (study != NULL) study = &internal_study;  
   }  
   
3187  /* Set some local values */  /* Set some local values */
3188    
3189  current_subject = (const unsigned char *)subject + start_offset;  current_subject = (const pcre_uchar *)subject + start_offset;
3190  end_subject = (const unsigned char *)subject + length;  end_subject = (const pcre_uchar *)subject + length;
3191  req_byte_ptr = current_subject - 1;  req_char_ptr = current_subject - 1;
3192    
3193  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3194  utf8 = (re->options & PCRE_UTF8) != 0;  /* PCRE_UTF(16|32) have the same value as PCRE_UTF8. */
3195    utf = (re->options & PCRE_UTF8) != 0;
3196  #else  #else
3197  utf8 = FALSE;  utf = FALSE;
3198  #endif  #endif
3199    
3200  anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||  anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||
# Line 3080  anchored = (options & (PCRE_ANCHORED|PCR Line 3202  anchored = (options & (PCRE_ANCHORED|PCR
3202    
3203  /* The remaining fixed data for passing around. */  /* The remaining fixed data for passing around. */
3204    
3205  md->start_code = (const uschar *)argument_re +  md->start_code = (const pcre_uchar *)argument_re +
3206      re->name_table_offset + re->name_count * re->name_entry_size;      re->name_table_offset + re->name_count * re->name_entry_size;
3207  md->start_subject = (const unsigned char *)subject;  md->start_subject = (const pcre_uchar *)subject;
3208  md->end_subject = end_subject;  md->end_subject = end_subject;
3209  md->start_offset = start_offset;  md->start_offset = start_offset;
3210  md->moptions = options;  md->moptions = options;
# Line 3143  else Line 3265  else
3265  /* Check a UTF-8 string if required. Unfortunately there's no way of passing  /* Check a UTF-8 string if required. Unfortunately there's no way of passing
3266  back the character offset. */  back the character offset. */
3267    
3268  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3269  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
3270    {    {
3271    int erroroffset;    int erroroffset;
3272    int errorcode = _pcre_valid_utf8((uschar *)subject, length, &erroroffset);    int errorcode = PRIV(valid_utf)((pcre_uchar *)subject, length, &erroroffset);
3273    if (errorcode != 0)    if (errorcode != 0)
3274      {      {
3275      if (offsetcount >= 2)      if (offsetcount >= 2)
# Line 3155  if (utf8 && (options & PCRE_NO_UTF8_CHEC Line 3277  if (utf8 && (options & PCRE_NO_UTF8_CHEC
3277        offsets[0] = erroroffset;        offsets[0] = erroroffset;
3278        offsets[1] = errorcode;        offsets[1] = errorcode;
3279        }        }
3280      return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != 0)?  #if defined COMPILE_PCRE8
3281        return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != 0) ?
3282        PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;        PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
3283    #elif defined COMPILE_PCRE16
3284        return (errorcode <= PCRE_UTF16_ERR1 && (options & PCRE_PARTIAL_HARD) != 0) ?
3285          PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16;
3286    #elif defined COMPILE_PCRE32
3287        return PCRE_ERROR_BADUTF32;
3288    #endif
3289      }      }
3290    #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
3291    if (start_offset > 0 && start_offset < length &&    if (start_offset > 0 && start_offset < length &&
3292          (((USPTR)subject)[start_offset] & 0xc0) == 0x80)          NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
3293      return PCRE_ERROR_BADUTF8_OFFSET;      return PCRE_ERROR_BADUTF8_OFFSET;
3294    #endif
3295    }    }
3296  #endif  #endif
3297    
# Line 3168  if (utf8 && (options & PCRE_NO_UTF8_CHEC Line 3299  if (utf8 && (options & PCRE_NO_UTF8_CHEC
3299  is a feature that makes it possible to save compiled regex and re-use them  is a feature that makes it possible to save compiled regex and re-use them
3300  in other programs later. */  in other programs later. */
3301    
3302  if (md->tables == NULL) md->tables = _pcre_default_tables;  if (md->tables == NULL) md->tables = PRIV(default_tables);
3303    
3304  /* The lower casing table and the "must be at the start of a line" flag are  /* The "must be at the start of a line" flags are used in a loop when finding
3305  used in a loop when finding where to start. */  where to start. */
3306    
 lcc = md->tables + lcc_offset;  
3307  startline = (re->flags & PCRE_STARTLINE) != 0;  startline = (re->flags & PCRE_STARTLINE) != 0;
3308  firstline = (re->options & PCRE_FIRSTLINE) != 0;  firstline = (re->options & PCRE_FIRSTLINE) != 0;
3309    
# Line 3187  if (!anchored) Line 3317  if (!anchored)
3317    {    {
3318    if ((re->flags & PCRE_FIRSTSET) != 0)    if ((re->flags & PCRE_FIRSTSET) != 0)
3319      {      {
3320      first_byte = re->first_byte & 255;      has_first_char = TRUE;
3321      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)      first_char = first_char2 = (pcre_uchar)(re->first_char);
3322        first_byte = lcc[first_byte];      if ((re->flags & PCRE_FCH_CASELESS) != 0)
3323          {
3324          first_char2 = TABLE_GET(first_char, md->tables + fcc_offset, first_char);
3325    #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3326          if (utf && first_char > 127)
3327            first_char2 = UCD_OTHERCASE(first_char);
3328    #endif
3329          }
3330      }      }
3331    else    else
3332      {      {
# Line 3204  character" set. */ Line 3341  character" set. */
3341    
3342  if ((re->flags & PCRE_REQCHSET) != 0)  if ((re->flags & PCRE_REQCHSET) != 0)
3343    {    {
3344    req_byte = re->req_byte & 255;    has_req_char = TRUE;
3345    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;    req_char = req_char2 = (pcre_uchar)(re->req_char);
3346    req_byte2 = (md->tables + fcc_offset)[req_byte];  /* case flipped */    if ((re->flags & PCRE_RCH_CASELESS) != 0)
3347        {
3348        req_char2 = TABLE_GET(req_char, md->tables + fcc_offset, req_char);
3349    #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3350        if (utf && req_char > 127)
3351          req_char2 = UCD_OTHERCASE(req_char);
3352    #endif
3353        }
3354    }    }
3355    
3356  /* Call the main matching function, looping for a non-anchored regex after a  /* Call the main matching function, looping for a non-anchored regex after a
# Line 3219  for (;;) Line 3363  for (;;)
3363    
3364    if ((options & PCRE_DFA_RESTART) == 0)    if ((options & PCRE_DFA_RESTART) == 0)
3365      {      {
3366      const uschar *save_end_subject = end_subject;      const pcre_uchar *save_end_subject = end_subject;
3367    
3368      /* If firstline is TRUE, the start of the match is constrained to the first      /* If firstline is TRUE, the start of the match is constrained to the first
3369      line of a multiline string. Implement this by temporarily adjusting      line of a multiline string. Implement this by temporarily adjusting
# Line 3228  for (;;) Line 3372  for (;;)
3372    
3373      if (firstline)      if (firstline)
3374        {        {
3375        USPTR t = current_subject;        PCRE_PUCHAR t = current_subject;
3376  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3377        if (utf8)        if (utf)
3378          {          {
3379          while (t < md->end_subject && !IS_NEWLINE(t))          while (t < md->end_subject && !IS_NEWLINE(t))
3380            {            {
3381            t++;            t++;
3382            while (t < end_subject && (*t & 0xc0) == 0x80) t++;            ACROSSCHAR(t < end_subject, *t, t++);
3383            }            }
3384          }          }
3385        else        else
# Line 3252  for (;;) Line 3396  for (;;)
3396    
3397      if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)      if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
3398        {        {
3399        /* Advance to a known first byte. */        /* Advance to a known first char. */
3400    
3401        if (first_byte >= 0)        if (has_first_char)
3402          {          {
3403          if (first_byte_caseless)          if (first_char != first_char2)
3404              {
3405              pcre_uchar csc;
3406            while (current_subject < end_subject &&            while (current_subject < end_subject &&
3407                   lcc[*current_subject] != first_byte)                   (csc = RAWUCHARTEST(current_subject)) != first_char && csc != first_char2)
3408              current_subject++;              current_subject++;
3409              }
3410          else          else
3411            while (current_subject < end_subject &&            while (current_subject < end_subject &&
3412                   *current_subject != first_byte)                   RAWUCHARTEST(current_subject) != first_char)
3413              current_subject++;              current_subject++;
3414          }          }
3415    
# Line 3272  for (;;) Line 3419  for (;;)
3419          {          {
3420          if (current_subject > md->start_subject + start_offset)          if (current_subject > md->start_subject + start_offset)
3421            {            {
3422  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3423            if (utf8)            if (utf)
3424              {              {
3425              while (current_subject < end_subject &&              while (current_subject < end_subject &&
3426                     !WAS_NEWLINE(current_subject))                     !WAS_NEWLINE(current_subject))
3427                {                {
3428                current_subject++;                current_subject++;
3429                while(current_subject < end_subject &&                ACROSSCHAR(current_subject < end_subject, *current_subject,
3430                      (*current_subject & 0xc0) == 0x80)                  current_subject++);
                 current_subject++;  
3431                }                }
3432              }              }
3433            else            else
# Line 3293  for (;;) Line 3439  for (;;)
3439            ANYCRLF, and we are now at a LF, advance the match position by one            ANYCRLF, and we are now at a LF, advance the match position by one
3440            more character. */            more character. */
3441    
3442            if (current_subject[-1] == CHAR_CR &&            if (RAWUCHARTEST(current_subject - 1) == CHAR_CR &&
3443                 (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&                 (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
3444                 current_subject < end_subject &&                 current_subject < end_subject &&
3445                 *current_subject == CHAR_NL)                 RAWUCHARTEST(current_subject) == CHAR_NL)
3446              current_subject++;              current_subject++;
3447            }            }
3448          }          }
# Line 3307  for (;;) Line 3453  for (;;)
3453          {          {
3454          while (current_subject < end_subject)          while (current_subject < end_subject)
3455            {            {
3456            register unsigned int c = *current_subject;            register pcre_uint32 c = RAWUCHARTEST(current_subject);
3457    #ifndef COMPILE_PCRE8
3458              if (c > 255) c = 255;
3459    #endif
3460            if ((start_bits[c/8] & (1 << (c&7))) == 0)            if ((start_bits[c/8] & (1 << (c&7))) == 0)
3461              {              {
3462              current_subject++;              current_subject++;
3463  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3464              if (utf8)              /* In non 8-bit mode, the iteration will stop for
3465                while(current_subject < end_subject &&              characters > 255 at the beginning or not stop at all. */
3466                      (*current_subject & 0xc0) == 0x80) current_subject++;              if (utf)
3467                  ACROSSCHAR(current_subject < end_subject, *current_subject,
3468                    current_subject++);
3469  #endif  #endif
3470              }              }
3471            else break;            else break;
# Line 3342  for (;;) Line 3493  for (;;)
3493            (pcre_uint32)(end_subject - current_subject) < study->minlength)            (pcre_uint32)(end_subject - current_subject) < study->minlength)
3494          return PCRE_ERROR_NOMATCH;          return PCRE_ERROR_NOMATCH;
3495    
3496        /* If req_byte is set, we know that that character must appear in the        /* If req_char is set, we know that that character must appear in the
3497        subject for the match to succeed. If the first character is set, req_byte        subject for the match to succeed. If the first character is set, req_char
3498        must be later in the subject; otherwise the test starts at the match        must be later in the subject; otherwise the test starts at the match
3499        point. This optimization can save a huge amount of work in patterns with        point. This optimization can save a huge amount of work in patterns with
3500        nested unlimited repeats that aren't going to match. Writing separate        nested unlimited repeats that aren't going to match. Writing separate
# Line 3355  for (;;) Line 3506  for (;;)
3506        patterns. This showed up when somebody was matching /^C/ on a 32-megabyte        patterns. This showed up when somebody was matching /^C/ on a 32-megabyte
3507        string... so we don't do this when the string is sufficiently long. */        string... so we don't do this when the string is sufficiently long. */
3508    
3509        if (req_byte >= 0 && end_subject - current_subject < REQ_BYTE_MAX)        if (has_req_char && end_subject - current_subject < REQ_BYTE_MAX)
3510          {          {
3511          register const uschar *p = current_subject + ((first_byte >= 0)? 1 : 0);          register PCRE_PUCHAR p = current_subject + (has_first_char? 1:0);
3512    
3513          /* We don't need to repeat the search if we haven't yet reached the          /* We don't need to repeat the search if we haven't yet reached the
3514          place we found it at last time. */          place we found it at last time. */
3515    
3516          if (p > req_byte_ptr)          if (p > req_char_ptr)
3517            {            {
3518            if (req_byte_caseless)            if (req_char != req_char2)
3519              {              {
3520              while (p < end_subject)              while (p < end_subject)
3521                {                {
3522                register int pp = *p++;                register pcre_uint32 pp = RAWUCHARINCTEST(p);
3523                if (pp == req_byte || pp == req_byte2) { p--; break; }                if (pp == req_char || pp == req_char2) { p--; break; }
3524                }                }
3525              }              }
3526            else            else
3527              {              {
3528              while (p < end_subject)              while (p < end_subject)
3529                {                {
3530                if (*p++ == req_byte) { p--; break; }                if (RAWUCHARINCTEST(p) == req_char) { p--; break; }
3531                }                }
3532              }              }
3533    
# Line 3389  for (;;) Line 3540  for (;;)
3540            found it, so that we don't search again next time round the loop if            found it, so that we don't search again next time round the loop if
3541            the start hasn't passed this character yet. */            the start hasn't passed this character yet. */
3542    
3543            req_byte_ptr = p;            req_char_ptr = p;
3544            }            }
3545          }          }
3546        }        }
# Line 3414  for (;;) Line 3565  for (;;)
3565    /* Anything other than "no match" means we are done, always; otherwise, carry    /* Anything other than "no match" means we are done, always; otherwise, carry
3566    on only if not anchored. */    on only if not anchored. */
3567    
3568    if (rc != PCRE_ERROR_NOMATCH || anchored) return rc;    if (rc != PCRE_ERROR_NOMATCH || anchored)
3569        {
3570        if (rc == PCRE_ERROR_PARTIAL && offsetcount >= 2)
3571          {
3572          offsets[0] = (int)(md->start_used_ptr - (PCRE_PUCHAR)subject);
3573          offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
3574          if (offsetcount > 2)
3575            offsets[2] = (int)(current_subject - (PCRE_PUCHAR)subject);
3576          }
3577        return rc;
3578        }
3579    
3580    /* Advance to the next subject character unless we are at the end of a line    /* Advance to the next subject character unless we are at the end of a line
3581    and firstline is set. */    and firstline is set. */
3582    
3583    if (firstline && IS_NEWLINE(current_subject)) break;    if (firstline && IS_NEWLINE(current_subject)) break;
3584    current_subject++;    current_subject++;
3585    if (utf8)  #ifdef SUPPORT_UTF
3586      if (utf)
3587      {      {
3588      while (current_subject < end_subject && (*current_subject & 0xc0) == 0x80)      ACROSSCHAR(current_subject < end_subject, *current_subject,
3589        current_subject++;        current_subject++);
3590      }      }
3591    #endif
3592    if (current_subject > end_subject) break;    if (current_subject > end_subject) break;
3593    
3594    /* If we have just passed a CR and we are now at a LF, and the pattern does    /* If we have just passed a CR and we are now at a LF, and the pattern does
3595    not contain any explicit matches for \r or \n, and the newline option is CRLF    not contain any explicit matches for \r or \n, and the newline option is CRLF
3596    or ANY or ANYCRLF, advance the match position by one more character. */    or ANY or ANYCRLF, advance the match position by one more character. */
3597    
3598    if (current_subject[-1] == CHAR_CR &&    if (RAWUCHARTEST(current_subject - 1) == CHAR_CR &&
3599        current_subject < end_subject &&        current_subject < end_subject &&
3600        *current_subject == CHAR_NL &&        RAWUCHARTEST(current_subject) == CHAR_NL &&
3601        (re->flags & PCRE_HASCRORLF) == 0 &&        (re->flags & PCRE_HASCRORLF) == 0 &&
3602          (md->nltype == NLTYPE_ANY ||          (md->nltype == NLTYPE_ANY ||
3603           md->nltype == NLTYPE_ANYCRLF ||           md->nltype == NLTYPE_ANYCRLF ||

Legend:
Removed from v.728  
changed lines
  Added in v.1364

  ViewVC Help
Powered by ViewVC 1.1.5