/[pcre]/code/trunk/pcre_dfa_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_dfa_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 728 by ph10, Mon Oct 10 16:01:03 2011 UTC revision 1015 by ph10, Sun Aug 26 16:07:14 2012 UTC
# Line 7  and semantics are as close as possible t Line 7  and semantics are as close as possible t
7  below for why this module is different).  below for why this module is different).
8    
9                         Written by Philip Hazel                         Written by Philip Hazel
10             Copyright (c) 1997-2011 University of Cambridge             Copyright (c) 1997-2012 University of Cambridge
11    
12  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
13  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 38  POSSIBILITY OF SUCH DAMAGE. Line 38  POSSIBILITY OF SUCH DAMAGE.
38  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
39  */  */
40    
   
41  /* This module contains the external function pcre_dfa_exec(), which is an  /* This module contains the external function pcre_dfa_exec(), which is an
42  alternative matching function that uses a sort of DFA algorithm (not a true  alternative matching function that uses a sort of DFA algorithm (not a true
43  FSM). This is NOT Perl- compatible, but it has advantages in certain  FSM). This is NOT Perl-compatible, but it has advantages in certain
44  applications. */  applications. */
45    
46    
# Line 113  small value. Non-zero values in the tabl Line 112  small value. Non-zero values in the tabl
112  the character is to be found. ***NOTE*** If the start of this table is  the character is to be found. ***NOTE*** If the start of this table is
113  modified, the three tables that follow must also be modified. */  modified, the three tables that follow must also be modified. */
114    
115  static const uschar coptable[] = {  static const pcre_uint8 coptable[] = {
116    0,                             /* End                                    */    0,                             /* End                                    */
117    0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */    0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */
118    0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */    0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */
# Line 128  static const uschar coptable[] = { Line 127  static const uschar coptable[] = {
127    1,                             /* noti                                   */    1,                             /* noti                                   */
128    /* Positive single-char repeats                                          */    /* Positive single-char repeats                                          */
129    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
130    3, 3, 3,                       /* upto, minupto, exact                   */    1+IMM2_SIZE, 1+IMM2_SIZE,      /* upto, minupto                          */
131    1, 1, 1, 3,                    /* *+, ++, ?+, upto+                      */    1+IMM2_SIZE,                   /* exact                                  */
132      1, 1, 1, 1+IMM2_SIZE,          /* *+, ++, ?+, upto+                      */
133    1, 1, 1, 1, 1, 1,              /* *I, *?I, +I, +?I, ?I, ??I              */    1, 1, 1, 1, 1, 1,              /* *I, *?I, +I, +?I, ?I, ??I              */
134    3, 3, 3,                       /* upto I, minupto I, exact I             */    1+IMM2_SIZE, 1+IMM2_SIZE,      /* upto I, minupto I                      */
135    1, 1, 1, 3,                    /* *+I, ++I, ?+I, upto+I                  */    1+IMM2_SIZE,                   /* exact I                                */
136      1, 1, 1, 1+IMM2_SIZE,          /* *+I, ++I, ?+I, upto+I                  */
137    /* Negative single-char repeats - only for chars < 256                   */    /* Negative single-char repeats - only for chars < 256                   */
138    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */
139    3, 3, 3,                       /* NOT upto, minupto, exact               */    1+IMM2_SIZE, 1+IMM2_SIZE,      /* NOT upto, minupto                      */
140    1, 1, 1, 3,                    /* NOT *+, ++, ?+, upto+                  */    1+IMM2_SIZE,                   /* NOT exact                              */
141      1, 1, 1, 1+IMM2_SIZE,          /* NOT *+, ++, ?+, upto+                  */
142    1, 1, 1, 1, 1, 1,              /* NOT *I, *?I, +I, +?I, ?I, ??I          */    1, 1, 1, 1, 1, 1,              /* NOT *I, *?I, +I, +?I, ?I, ??I          */
143    3, 3, 3,                       /* NOT upto I, minupto I, exact I         */    1+IMM2_SIZE, 1+IMM2_SIZE,      /* NOT upto I, minupto I                  */
144    1, 1, 1, 3,                    /* NOT *+I, ++I, ?+I, upto+I              */    1+IMM2_SIZE,                   /* NOT exact I                            */
145      1, 1, 1, 1+IMM2_SIZE,          /* NOT *+I, ++I, ?+I, upto+I              */
146    /* Positive type repeats                                                 */    /* Positive type repeats                                                 */
147    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */
148    3, 3, 3,                       /* Type upto, minupto, exact              */    1+IMM2_SIZE, 1+IMM2_SIZE,      /* Type upto, minupto                     */
149    1, 1, 1, 3,                    /* Type *+, ++, ?+, upto+                 */    1+IMM2_SIZE,                   /* Type exact                             */
150      1, 1, 1, 1+IMM2_SIZE,          /* Type *+, ++, ?+, upto+                 */
151    /* Character class & ref repeats                                         */    /* Character class & ref repeats                                         */
152    0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */    0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */
153    0, 0,                          /* CRRANGE, CRMINRANGE                    */    0, 0,                          /* CRRANGE, CRMINRANGE                    */
# Line 182  remember the fact that a character could Line 186  remember the fact that a character could
186  the subject is reached. ***NOTE*** If the start of this table is modified, the  the subject is reached. ***NOTE*** If the start of this table is modified, the
187  two tables that follow must also be modified. */  two tables that follow must also be modified. */
188    
189  static const uschar poptable[] = {  static const pcre_uint8 poptable[] = {
190    0,                             /* End                                    */    0,                             /* End                                    */
191    0, 0, 0, 1, 1,                 /* \A, \G, \K, \B, \b                     */    0, 0, 0, 1, 1,                 /* \A, \G, \K, \B, \b                     */
192    1, 1, 1, 1, 1, 1,              /* \D, \d, \S, \s, \W, \w                 */    1, 1, 1, 1, 1, 1,              /* \D, \d, \S, \s, \W, \w                 */
# Line 249  static const uschar poptable[] = { Line 253  static const uschar poptable[] = {
253  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
254  and \w */  and \w */
255    
256  static const uschar toptable1[] = {  static const pcre_uint8 toptable1[] = {
257    0, 0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
258    ctype_digit, ctype_digit,    ctype_digit, ctype_digit,
259    ctype_space, ctype_space,    ctype_space, ctype_space,
# Line 257  static const uschar toptable1[] = { Line 261  static const uschar toptable1[] = {
261    0, 0                            /* OP_ANY, OP_ALLANY */    0, 0                            /* OP_ANY, OP_ALLANY */
262  };  };
263    
264  static const uschar toptable2[] = {  static const pcre_uint8 toptable2[] = {
265    0, 0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
266    ctype_digit, 0,    ctype_digit, 0,
267    ctype_space, 0,    ctype_space, 0,
# Line 277  typedef struct stateblock { Line 281  typedef struct stateblock {
281    int data;                       /* Some use extra data */    int data;                       /* Some use extra data */
282  } stateblock;  } stateblock;
283    
284  #define INTS_PER_STATEBLOCK  (sizeof(stateblock)/sizeof(int))  #define INTS_PER_STATEBLOCK  (int)(sizeof(stateblock)/sizeof(int))
285    
286    
287  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
# Line 296  Returns:       nothing Line 300  Returns:       nothing
300  */  */
301    
302  static void  static void
303  pchars(unsigned char *p, int length, FILE *f)  pchars(const pcre_uchar *p, int length, FILE *f)
304  {  {
305  int c;  int c;
306  while (length-- > 0)  while (length-- > 0)
# Line 377  for the current character, one for the f Line 381  for the current character, one for the f
381      next_new_state->count  = (y); \      next_new_state->count  = (y); \
382      next_new_state->data   = (z); \      next_new_state->data   = (z); \
383      next_new_state++; \      next_new_state++; \
384      DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \      DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d) line %d\n", rlevel*2-2, SP, \
385          (x), (y), (z), __LINE__)); \
386      } \      } \
387    else return PCRE_ERROR_DFA_WSSIZE    else return PCRE_ERROR_DFA_WSSIZE
388    
# Line 386  for the current character, one for the f Line 391  for the current character, one for the f
391  static int  static int
392  internal_dfa_exec(  internal_dfa_exec(
393    dfa_match_data *md,    dfa_match_data *md,
394    const uschar *this_start_code,    const pcre_uchar *this_start_code,
395    const uschar *current_subject,    const pcre_uchar *current_subject,
396    int start_offset,    int start_offset,
397    int *offsets,    int *offsets,
398    int offsetcount,    int offsetcount,
# Line 398  internal_dfa_exec( Line 403  internal_dfa_exec(
403  stateblock *active_states, *new_states, *temp_states;  stateblock *active_states, *new_states, *temp_states;
404  stateblock *next_active_state, *next_new_state;  stateblock *next_active_state, *next_new_state;
405    
406  const uschar *ctypes, *lcc, *fcc;  const pcre_uint8 *ctypes, *lcc, *fcc;
407  const uschar *ptr;  const pcre_uchar *ptr;
408  const uschar *end_code, *first_op;  const pcre_uchar *end_code, *first_op;
409    
410  dfa_recursion_info new_recursive;  dfa_recursion_info new_recursive;
411    
# Line 409  int active_count, new_count, match_count Line 414  int active_count, new_count, match_count
414  /* Some fields in the md block are frequently referenced, so we load them into  /* Some fields in the md block are frequently referenced, so we load them into
415  independent variables in the hope that this will perform better. */  independent variables in the hope that this will perform better. */
416    
417  const uschar *start_subject = md->start_subject;  const pcre_uchar *start_subject = md->start_subject;
418  const uschar *end_subject = md->end_subject;  const pcre_uchar *end_subject = md->end_subject;
419  const uschar *start_code = md->start_code;  const pcre_uchar *start_code = md->start_code;
420    
421  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
422  BOOL utf8 = (md->poptions & PCRE_UTF8) != 0;  BOOL utf = (md->poptions & PCRE_UTF8) != 0;
423  #else  #else
424  BOOL utf8 = FALSE;  BOOL utf = FALSE;
425  #endif  #endif
426    
427    BOOL reset_could_continue = FALSE;
428    
429  rlevel++;  rlevel++;
430  offsetcount &= (-2);  offsetcount &= (-2);
431    
# Line 442  new_count = 0; Line 449  new_count = 0;
449    
450  first_op = this_start_code + 1 + LINK_SIZE +  first_op = this_start_code + 1 + LINK_SIZE +
451    ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||    ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
452      *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)? 2:0);      *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
453        ? IMM2_SIZE:0);
454    
455  /* The first thing in any (sub) pattern is a bracket of some sort. Push all  /* The first thing in any (sub) pattern is a bracket of some sort. Push all
456  the alternative states onto the list, and find out where the end is. This  the alternative states onto the list, and find out where the end is. This
# Line 470  if (*first_op == OP_REVERSE) Line 478  if (*first_op == OP_REVERSE)
478    /* If we can't go back the amount required for the longest lookbehind    /* If we can't go back the amount required for the longest lookbehind
479    pattern, go back as far as we can; some alternatives may still be viable. */    pattern, go back as far as we can; some alternatives may still be viable. */
480    
481  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
482    /* In character mode we have to step back character by character */    /* In character mode we have to step back character by character */
483    
484    if (utf8)    if (utf)
485      {      {
486      for (gone_back = 0; gone_back < max_back; gone_back++)      for (gone_back = 0; gone_back < max_back; gone_back++)
487        {        {
488        if (current_subject <= start_subject) break;        if (current_subject <= start_subject) break;
489        current_subject--;        current_subject--;
490        while (current_subject > start_subject &&        ACROSSCHAR(current_subject > start_subject, *current_subject, current_subject--);
              (*current_subject & 0xc0) == 0x80)  
         current_subject--;  
491        }        }
492      }      }
493    else    else
# Line 542  else Line 548  else
548      {      {
549      int length = 1 + LINK_SIZE +      int length = 1 + LINK_SIZE +
550        ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||        ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
551          *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)?          *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
552          2:0);          ? IMM2_SIZE:0);
553      do      do
554        {        {
555        ADD_NEW((int)(end_code - start_code + length), 0);        ADD_NEW((int)(end_code - start_code + length), 0);
# Line 556  else Line 562  else
562    
563  workspace[0] = 0;    /* Bit indicating which vector is current */  workspace[0] = 0;    /* Bit indicating which vector is current */
564    
565  DPRINTF(("%.*sEnd state = %d\n", rlevel*2-2, SP, end_code - start_code));  DPRINTF(("%.*sEnd state = %d\n", rlevel*2-2, SP, (int)(end_code - start_code)));
566    
567  /* Loop for scanning the subject */  /* Loop for scanning the subject */
568    
# Line 567  for (;;) Line 573  for (;;)
573    int clen, dlen;    int clen, dlen;
574    unsigned int c, d;    unsigned int c, d;
575    int forced_fail = 0;    int forced_fail = 0;
576    BOOL could_continue = FALSE;    BOOL partial_newline = FALSE;
577      BOOL could_continue = reset_could_continue;
578      reset_could_continue = FALSE;
579    
580    /* Make the new state list into the active state list and empty the    /* Make the new state list into the active state list and empty the
581    new state list. */    new state list. */
# Line 583  for (;;) Line 591  for (;;)
591    
592  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
593    printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP);    printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP);
594    pchars((uschar *)ptr, strlen((char *)ptr), stdout);    pchars(ptr, STRLEN_UC(ptr), stdout);
595    printf("\"\n");    printf("\"\n");
596    
597    printf("%.*sActive states: ", rlevel*2-2, SP);    printf("%.*sActive states: ", rlevel*2-2, SP);
# Line 603  for (;;) Line 611  for (;;)
611    
612    if (ptr < end_subject)    if (ptr < end_subject)
613      {      {
614      clen = 1;        /* Number of bytes in the character */      clen = 1;        /* Number of data items in the character */
615  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
616      if (utf8) { GETCHARLEN(c, ptr, clen); } else      if (utf) { GETCHARLEN(c, ptr, clen); } else
617  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
618      c = *ptr;      c = *ptr;
619      }      }
620    else    else
# Line 624  for (;;) Line 632  for (;;)
632      {      {
633      stateblock *current_state = active_states + i;      stateblock *current_state = active_states + i;
634      BOOL caseless = FALSE;      BOOL caseless = FALSE;
635      const uschar *code;      const pcre_uchar *code;
636      int state_offset = current_state->offset;      int state_offset = current_state->offset;
637      int count, codevalue, rrc;      int count, codevalue, rrc;
638    
# Line 637  for (;;) Line 645  for (;;)
645    
646      /* A negative offset is a special case meaning "hold off going to this      /* A negative offset is a special case meaning "hold off going to this
647      (negated) state until the number of characters in the data field have      (negated) state until the number of characters in the data field have
648      been skipped". */      been skipped". If the could_continue flag was passed over from a previous
649        state, arrange for it to passed on. */
650    
651      if (state_offset < 0)      if (state_offset < 0)
652        {        {
# Line 646  for (;;) Line 655  for (;;)
655          DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP));          DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP));
656          ADD_NEW_DATA(state_offset, current_state->count,          ADD_NEW_DATA(state_offset, current_state->count,
657            current_state->data - 1);            current_state->data - 1);
658            if (could_continue) reset_could_continue = TRUE;
659          continue;          continue;
660          }          }
661        else        else
# Line 685  for (;;) Line 695  for (;;)
695      permitted.      permitted.
696    
697      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
698      argument that is not a data character - but is always one byte long. We      argument that is not a data character - but is always one byte long because
699      have to take special action to deal with  \P, \p, \H, \h, \V, \v and \X in      the values are small. We have to take special action to deal with  \P, \p,
700      this case. To keep the other cases fast, convert these ones to new opcodes.      \H, \h, \V, \v and \X in this case. To keep the other cases fast, convert
701      */      these ones to new opcodes. */
702    
703      if (coptable[codevalue] > 0)      if (coptable[codevalue] > 0)
704        {        {
705        dlen = 1;        dlen = 1;
706  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
707        if (utf8) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else        if (utf) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else
708  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
709        d = code[coptable[codevalue]];        d = code[coptable[codevalue]];
710        if (codevalue >= OP_TYPESTAR)        if (codevalue >= OP_TYPESTAR)
711          {          {
# Line 779  for (;;) Line 789  for (;;)
789              offsets[0] = (int)(current_subject - start_subject);              offsets[0] = (int)(current_subject - start_subject);
790              offsets[1] = (int)(ptr - start_subject);              offsets[1] = (int)(ptr - start_subject);
791              DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,              DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,
792                offsets[1] - offsets[0], current_subject));                offsets[1] - offsets[0], (char *)current_subject));
793              }              }
794            if ((md->moptions & PCRE_DFA_SHORTEST) != 0)            if ((md->moptions & PCRE_DFA_SHORTEST) != 0)
795              {              {
# Line 816  for (;;) Line 826  for (;;)
826        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
827        case OP_CBRA:        case OP_CBRA:
828        case OP_SCBRA:        case OP_SCBRA:
829        ADD_ACTIVE((int)(code - start_code + 3 + LINK_SIZE),  0);        ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE + IMM2_SIZE),  0);
830        code += GET(code, 1);        code += GET(code, 1);
831        while (*code == OP_ALT)        while (*code == OP_ALT)
832          {          {
# Line 884  for (;;) Line 894  for (;;)
894        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
895        case OP_ANY:        case OP_ANY:
896        if (clen > 0 && !IS_NEWLINE(ptr))        if (clen > 0 && !IS_NEWLINE(ptr))
897          { ADD_NEW(state_offset + 1, 0); }          {
898            if (ptr + 1 >= md->end_subject &&
899                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
900                NLBLOCK->nltype == NLTYPE_FIXED &&
901                NLBLOCK->nllen == 2 &&
902                c == NLBLOCK->nl[0])
903              {
904              could_continue = partial_newline = TRUE;
905              }
906            else
907              {
908              ADD_NEW(state_offset + 1, 0);
909              }
910            }
911        break;        break;
912    
913        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
# Line 912  for (;;) Line 935  for (;;)
935                 (ptr == end_subject - md->nllen)                 (ptr == end_subject - md->nllen)
936              ))              ))
937            { ADD_ACTIVE(state_offset + 1, 0); }            { ADD_ACTIVE(state_offset + 1, 0); }
938            else if (ptr + 1 >= md->end_subject &&
939                     (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
940                     NLBLOCK->nltype == NLTYPE_FIXED &&
941                     NLBLOCK->nllen == 2 &&
942                     c == NLBLOCK->nl[0])
943              {
944              if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
945                {
946                reset_could_continue = TRUE;
947                ADD_NEW_DATA(-(state_offset + 1), 0, 1);
948                }
949              else could_continue = partial_newline = TRUE;
950              }
951          }          }
952        break;        break;
953    
# Line 924  for (;;) Line 960  for (;;)
960          else if (clen == 0 ||          else if (clen == 0 ||
961              ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr)))              ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr)))
962            { ADD_ACTIVE(state_offset + 1, 0); }            { ADD_ACTIVE(state_offset + 1, 0); }
963            else if (ptr + 1 >= md->end_subject &&
964                     (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
965                     NLBLOCK->nltype == NLTYPE_FIXED &&
966                     NLBLOCK->nllen == 2 &&
967                     c == NLBLOCK->nl[0])
968              {
969              if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
970                {
971                reset_could_continue = TRUE;
972                ADD_NEW_DATA(-(state_offset + 1), 0, 1);
973                }
974              else could_continue = partial_newline = TRUE;
975              }
976          }          }
977        else if (IS_NEWLINE(ptr))        else if (IS_NEWLINE(ptr))
978          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
# Line 956  for (;;) Line 1005  for (;;)
1005    
1006          if (ptr > start_subject)          if (ptr > start_subject)
1007            {            {
1008            const uschar *temp = ptr - 1;            const pcre_uchar *temp = ptr - 1;
1009            if (temp < md->start_used_ptr) md->start_used_ptr = temp;            if (temp < md->start_used_ptr) md->start_used_ptr = temp;
1010  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
1011            if (utf8) BACKCHAR(temp);            if (utf) { BACKCHAR(temp); }
1012  #endif  #endif
1013            GETCHARTEST(d, temp);            GETCHARTEST(d, temp);
1014  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 1024  for (;;) Line 1073  for (;;)
1073            break;            break;
1074    
1075            case PT_GC:            case PT_GC:
1076            OK = _pcre_ucp_gentype[prop->chartype] == code[2];            OK = PRIV(ucp_gentype)[prop->chartype] == code[2];
1077            break;            break;
1078    
1079            case PT_PC:            case PT_PC:
# Line 1038  for (;;) Line 1087  for (;;)
1087            /* These are specials for combination cases. */            /* These are specials for combination cases. */
1088    
1089            case PT_ALNUM:            case PT_ALNUM:
1090            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1091                 _pcre_ucp_gentype[prop->chartype] == ucp_N;                 PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1092            break;            break;
1093    
1094            case PT_SPACE:    /* Perl space */            case PT_SPACE:    /* Perl space */
1095            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1096                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
1097            break;            break;
1098    
1099            case PT_PXSPACE:  /* POSIX space */            case PT_PXSPACE:  /* POSIX space */
1100            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1101                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
1102                 c == CHAR_FF || c == CHAR_CR;                 c == CHAR_FF || c == CHAR_CR;
1103            break;            break;
1104    
1105            case PT_WORD:            case PT_WORD:
1106            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1107                 _pcre_ucp_gentype[prop->chartype] == ucp_N ||                 PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1108                 c == CHAR_UNDERSCORE;                 c == CHAR_UNDERSCORE;
1109            break;            break;
1110    
# Line 1086  for (;;) Line 1135  for (;;)
1135        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1136        if (clen > 0)        if (clen > 0)
1137          {          {
1138          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1139                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1140                NLBLOCK->nltype == NLTYPE_FIXED &&
1141                NLBLOCK->nllen == 2 &&
1142                c == NLBLOCK->nl[0])
1143              {
1144              could_continue = partial_newline = TRUE;
1145              }
1146            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1147              (c < 256 &&              (c < 256 &&
1148                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1149                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
# Line 1109  for (;;) Line 1166  for (;;)
1166        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
1167        if (clen > 0)        if (clen > 0)
1168          {          {
1169          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1170                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1171                NLBLOCK->nltype == NLTYPE_FIXED &&
1172                NLBLOCK->nllen == 2 &&
1173                c == NLBLOCK->nl[0])
1174              {
1175              could_continue = partial_newline = TRUE;
1176              }
1177            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1178              (c < 256 &&              (c < 256 &&
1179                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1180                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
# Line 1131  for (;;) Line 1196  for (;;)
1196        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
1197        if (clen > 0)        if (clen > 0)
1198          {          {
1199          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1200                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1201                NLBLOCK->nltype == NLTYPE_FIXED &&
1202                NLBLOCK->nllen == 2 &&
1203                c == NLBLOCK->nl[0])
1204              {
1205              could_continue = partial_newline = TRUE;
1206              }
1207            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1208              (c < 256 &&              (c < 256 &&
1209                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1210                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
# Line 1151  for (;;) Line 1224  for (;;)
1224        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1225        if (clen > 0)        if (clen > 0)
1226          {          {
1227          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1228                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1229                NLBLOCK->nltype == NLTYPE_FIXED &&
1230                NLBLOCK->nllen == 2 &&
1231                c == NLBLOCK->nl[0])
1232              {
1233              could_continue = partial_newline = TRUE;
1234              }
1235            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1236              (c < 256 &&              (c < 256 &&
1237                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1238                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
1239            {            {
1240            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1241              { ADD_NEW(state_offset + 4, 0); }              { ADD_NEW(state_offset + 1 + IMM2_SIZE + 1, 0); }
1242            else            else
1243              { ADD_NEW(state_offset, count); }              { ADD_NEW(state_offset, count); }
1244            }            }
# Line 1168  for (;;) Line 1249  for (;;)
1249        case OP_TYPEUPTO:        case OP_TYPEUPTO:
1250        case OP_TYPEMINUPTO:        case OP_TYPEMINUPTO:
1251        case OP_TYPEPOSUPTO:        case OP_TYPEPOSUPTO:
1252        ADD_ACTIVE(state_offset + 4, 0);        ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0);
1253        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1254        if (clen > 0)        if (clen > 0)
1255          {          {
1256          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1257                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1258                NLBLOCK->nltype == NLTYPE_FIXED &&
1259                NLBLOCK->nllen == 2 &&
1260                c == NLBLOCK->nl[0])
1261              {
1262              could_continue = partial_newline = TRUE;
1263              }
1264            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1265              (c < 256 &&              (c < 256 &&
1266                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1267                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
# Line 1183  for (;;) Line 1272  for (;;)
1272              next_active_state--;              next_active_state--;
1273              }              }
1274            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1275              { ADD_NEW(state_offset + 4, 0); }              { ADD_NEW(state_offset + 2 + IMM2_SIZE, 0); }
1276            else            else
1277              { ADD_NEW(state_offset, count); }              { ADD_NEW(state_offset, count); }
1278            }            }
# Line 1218  for (;;) Line 1307  for (;;)
1307            break;            break;
1308    
1309            case PT_GC:            case PT_GC:
1310            OK = _pcre_ucp_gentype[prop->chartype] == code[3];            OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
1311            break;            break;
1312    
1313            case PT_PC:            case PT_PC:
# Line 1232  for (;;) Line 1321  for (;;)
1321            /* These are specials for combination cases. */            /* These are specials for combination cases. */
1322    
1323            case PT_ALNUM:            case PT_ALNUM:
1324            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1325                 _pcre_ucp_gentype[prop->chartype] == ucp_N;                 PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1326            break;            break;
1327    
1328            case PT_SPACE:    /* Perl space */            case PT_SPACE:    /* Perl space */
1329            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1330                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
1331            break;            break;
1332    
1333            case PT_PXSPACE:  /* POSIX space */            case PT_PXSPACE:  /* POSIX space */
1334            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1335                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
1336                 c == CHAR_FF || c == CHAR_CR;                 c == CHAR_FF || c == CHAR_CR;
1337            break;            break;
1338    
1339            case PT_WORD:            case PT_WORD:
1340            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1341                 _pcre_ucp_gentype[prop->chartype] == ucp_N ||                 PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1342                 c == CHAR_UNDERSCORE;                 c == CHAR_UNDERSCORE;
1343            break;            break;
1344    
# Line 1279  for (;;) Line 1368  for (;;)
1368        case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:        case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
1369        count = current_state->count;  /* Already matched */        count = current_state->count;  /* Already matched */
1370        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1371        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0)
1372          {          {
1373          const uschar *nptr = ptr + clen;          int lgb, rgb;
1374            const pcre_uchar *nptr = ptr + clen;
1375          int ncount = 0;          int ncount = 0;
1376          if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)          if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
1377            {            {
1378            active_count--;           /* Remove non-match possibility */            active_count--;           /* Remove non-match possibility */
1379            next_active_state--;            next_active_state--;
1380            }            }
1381            lgb = UCD_GRAPHBREAK(c);
1382          while (nptr < end_subject)          while (nptr < end_subject)
1383            {            {
1384            int nd;            dlen = 1;
1385            int ndlen = 1;            if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
1386            GETCHARLEN(nd, nptr, ndlen);            rgb = UCD_GRAPHBREAK(d);
1387            if (UCD_CATEGORY(nd) != ucp_M) break;            if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
1388            ncount++;            ncount++;
1389            nptr += ndlen;            lgb = rgb;
1390              nptr += dlen;
1391            }            }
1392          count++;          count++;
1393          ADD_NEW_DATA(-state_offset, count, ncount);          ADD_NEW_DATA(-state_offset, count, ncount);
# Line 1465  for (;;) Line 1557  for (;;)
1557            break;            break;
1558    
1559            case PT_GC:            case PT_GC:
1560            OK = _pcre_ucp_gentype[prop->chartype] == code[3];            OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
1561            break;            break;
1562    
1563            case PT_PC:            case PT_PC:
# Line 1479  for (;;) Line 1571  for (;;)
1571            /* These are specials for combination cases. */            /* These are specials for combination cases. */
1572    
1573            case PT_ALNUM:            case PT_ALNUM:
1574            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1575                 _pcre_ucp_gentype[prop->chartype] == ucp_N;                 PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1576            break;            break;
1577    
1578            case PT_SPACE:    /* Perl space */            case PT_SPACE:    /* Perl space */
1579            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1580                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
1581            break;            break;
1582    
1583            case PT_PXSPACE:  /* POSIX space */            case PT_PXSPACE:  /* POSIX space */
1584            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1585                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
1586                 c == CHAR_FF || c == CHAR_CR;                 c == CHAR_FF || c == CHAR_CR;
1587            break;            break;
1588    
1589            case PT_WORD:            case PT_WORD:
1590            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1591                 _pcre_ucp_gentype[prop->chartype] == ucp_N ||                 PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1592                 c == CHAR_UNDERSCORE;                 c == CHAR_UNDERSCORE;
1593            break;            break;
1594    
# Line 1535  for (;;) Line 1627  for (;;)
1627        QS2:        QS2:
1628    
1629        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
1630        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0)
1631          {          {
1632          const uschar *nptr = ptr + clen;          int lgb, rgb;
1633            const pcre_uchar *nptr = ptr + clen;
1634          int ncount = 0;          int ncount = 0;
1635          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
1636              codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)              codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)
# Line 1545  for (;;) Line 1638  for (;;)
1638            active_count--;           /* Remove non-match possibility */            active_count--;           /* Remove non-match possibility */
1639            next_active_state--;            next_active_state--;
1640            }            }
1641            lgb = UCD_GRAPHBREAK(c);
1642          while (nptr < end_subject)          while (nptr < end_subject)
1643            {            {
1644            int nd;            dlen = 1;
1645            int ndlen = 1;            if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
1646            GETCHARLEN(nd, nptr, ndlen);            rgb = UCD_GRAPHBREAK(d);
1647            if (UCD_CATEGORY(nd) != ucp_M) break;            if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
1648            ncount++;            ncount++;
1649            nptr += ndlen;            lgb = rgb;
1650              nptr += dlen;
1651            }            }
1652          ADD_NEW_DATA(-(state_offset + count), 0, ncount);          ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1653          }          }
# Line 1719  for (;;) Line 1814  for (;;)
1814        case OP_PROP_EXTRA + OP_TYPEMINUPTO:        case OP_PROP_EXTRA + OP_TYPEMINUPTO:
1815        case OP_PROP_EXTRA + OP_TYPEPOSUPTO:        case OP_PROP_EXTRA + OP_TYPEPOSUPTO:
1816        if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)
1817          { ADD_ACTIVE(state_offset + 6, 0); }          { ADD_ACTIVE(state_offset + 1 + IMM2_SIZE + 3, 0); }
1818        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1819        if (clen > 0)        if (clen > 0)
1820          {          {
1821          BOOL OK;          BOOL OK;
1822          const ucd_record * prop = GET_UCD(c);          const ucd_record * prop = GET_UCD(c);
1823          switch(code[4])          switch(code[1 + IMM2_SIZE + 1])
1824            {            {
1825            case PT_ANY:            case PT_ANY:
1826            OK = TRUE;            OK = TRUE;
# Line 1737  for (;;) Line 1832  for (;;)
1832            break;            break;
1833    
1834            case PT_GC:            case PT_GC:
1835            OK = _pcre_ucp_gentype[prop->chartype] == code[5];            OK = PRIV(ucp_gentype)[prop->chartype] == code[1 + IMM2_SIZE + 2];
1836            break;            break;
1837    
1838            case PT_PC:            case PT_PC:
1839            OK = prop->chartype == code[5];            OK = prop->chartype == code[1 + IMM2_SIZE + 2];
1840            break;            break;
1841    
1842            case PT_SC:            case PT_SC:
1843            OK = prop->script == code[5];            OK = prop->script == code[1 + IMM2_SIZE + 2];
1844            break;            break;
1845    
1846            /* These are specials for combination cases. */            /* These are specials for combination cases. */
1847    
1848            case PT_ALNUM:            case PT_ALNUM:
1849            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1850                 _pcre_ucp_gentype[prop->chartype] == ucp_N;                 PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1851            break;            break;
1852    
1853            case PT_SPACE:    /* Perl space */            case PT_SPACE:    /* Perl space */
1854            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1855                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
1856            break;            break;
1857    
1858            case PT_PXSPACE:  /* POSIX space */            case PT_PXSPACE:  /* POSIX space */
1859            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1860                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
1861                 c == CHAR_FF || c == CHAR_CR;                 c == CHAR_FF || c == CHAR_CR;
1862            break;            break;
1863    
1864            case PT_WORD:            case PT_WORD:
1865            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1866                 _pcre_ucp_gentype[prop->chartype] == ucp_N ||                 PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1867                 c == CHAR_UNDERSCORE;                 c == CHAR_UNDERSCORE;
1868            break;            break;
1869    
# Line 1787  for (;;) Line 1882  for (;;)
1882              next_active_state--;              next_active_state--;
1883              }              }
1884            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1885              { ADD_NEW(state_offset + 6, 0); }              { ADD_NEW(state_offset + 1 + IMM2_SIZE + 3, 0); }
1886            else            else
1887              { ADD_NEW(state_offset, count); }              { ADD_NEW(state_offset, count); }
1888            }            }
# Line 1800  for (;;) Line 1895  for (;;)
1895        case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:        case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:
1896        case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:        case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:
1897        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
1898          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1899        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1900        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0)
1901          {          {
1902          const uschar *nptr = ptr + clen;          int lgb, rgb;
1903            const pcre_uchar *nptr = ptr + clen;
1904          int ncount = 0;          int ncount = 0;
1905          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
1906            {            {
1907            active_count--;           /* Remove non-match possibility */            active_count--;           /* Remove non-match possibility */
1908            next_active_state--;            next_active_state--;
1909            }            }
1910            lgb = UCD_GRAPHBREAK(c);
1911          while (nptr < end_subject)          while (nptr < end_subject)
1912            {            {
1913            int nd;            dlen = 1;
1914            int ndlen = 1;            if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
1915            GETCHARLEN(nd, nptr, ndlen);            rgb = UCD_GRAPHBREAK(d);
1916            if (UCD_CATEGORY(nd) != ucp_M) break;            if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
1917            ncount++;            ncount++;
1918            nptr += ndlen;            lgb = rgb;
1919              nptr += dlen;
1920            }            }
1921            if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
1922                reset_could_continue = TRUE;
1923          if (++count >= GET2(code, 1))          if (++count >= GET2(code, 1))
1924            { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }            { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
1925          else          else
1926            { ADD_NEW_DATA(-state_offset, count, ncount); }            { ADD_NEW_DATA(-state_offset, count, ncount); }
1927          }          }
# Line 1834  for (;;) Line 1934  for (;;)
1934        case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:        case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:
1935        case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:        case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:
1936        if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)
1937          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1938        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1939        if (clen > 0)        if (clen > 0)
1940          {          {
# Line 1861  for (;;) Line 1961  for (;;)
1961              next_active_state--;              next_active_state--;
1962              }              }
1963            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1964              { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }              { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
1965            else            else
1966              { ADD_NEW_DATA(-state_offset, count, ncount); }              { ADD_NEW_DATA(-state_offset, count, ncount); }
1967            break;            break;
# Line 1878  for (;;) Line 1978  for (;;)
1978        case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:        case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:
1979        case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:        case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:
1980        if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)
1981          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1982        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1983        if (clen > 0)        if (clen > 0)
1984          {          {
# Line 1907  for (;;) Line 2007  for (;;)
2007              next_active_state--;              next_active_state--;
2008              }              }
2009            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
2010              { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }              { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
2011            else            else
2012              { ADD_NEW_DATA(-state_offset, count, 0); }              { ADD_NEW_DATA(-state_offset, count, 0); }
2013            }            }
# Line 1920  for (;;) Line 2020  for (;;)
2020        case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:        case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:
2021        case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:        case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:
2022        if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)
2023          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
2024        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
2025        if (clen > 0)        if (clen > 0)
2026          {          {
# Line 1962  for (;;) Line 2062  for (;;)
2062              next_active_state--;              next_active_state--;
2063              }              }
2064            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
2065              { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }              { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
2066            else            else
2067              { ADD_NEW_DATA(-state_offset, count, 0); }              { ADD_NEW_DATA(-state_offset, count, 0); }
2068            }            }
# Line 1984  for (;;) Line 2084  for (;;)
2084        case OP_CHARI:        case OP_CHARI:
2085        if (clen == 0) break;        if (clen == 0) break;
2086    
2087  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2088        if (utf8)        if (utf)
2089          {          {
2090          if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else          if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
2091            {            {
2092            unsigned int othercase;            unsigned int othercase;
2093            if (c < 128) othercase = fcc[c]; else            if (c < 128)
2094                othercase = fcc[c];
2095            /* If we have Unicode property support, we can use it to test the            else
2096            other case of the character. */              /* If we have Unicode property support, we can use it to test the
2097                other case of the character. */
2098  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2099            othercase = UCD_OTHERCASE(c);              othercase = UCD_OTHERCASE(c);
2100  #else  #else
2101            othercase = NOTACHAR;              othercase = NOTACHAR;
2102  #endif  #endif
2103    
2104            if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }            if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }
2105            }            }
2106          }          }
2107        else        else
2108  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2109          /* Not UTF mode */
       /* Non-UTF-8 mode */  
2110          {          {
2111          if (lcc[c] == lcc[d]) { ADD_NEW(state_offset + 2, 0); }          if (TABLE_GET(c, lcc, c) == TABLE_GET(d, lcc, d))
2112              { ADD_NEW(state_offset + 2, 0); }
2113          }          }
2114        break;        break;
2115    
# Line 2021  for (;;) Line 2121  for (;;)
2121        to wait for them to pass before continuing. */        to wait for them to pass before continuing. */
2122    
2123        case OP_EXTUNI:        case OP_EXTUNI:
2124        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0)
2125          {          {
2126          const uschar *nptr = ptr + clen;          int lgb, rgb;
2127            const pcre_uchar *nptr = ptr + clen;
2128          int ncount = 0;          int ncount = 0;
2129            lgb = UCD_GRAPHBREAK(c);
2130          while (nptr < end_subject)          while (nptr < end_subject)
2131            {            {
2132            int nclen = 1;            dlen = 1;
2133            GETCHARLEN(c, nptr, nclen);            if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
2134            if (UCD_CATEGORY(c) != ucp_M) break;            rgb = UCD_GRAPHBREAK(d);
2135              if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
2136            ncount++;            ncount++;
2137            nptr += nclen;            lgb = rgb;
2138              nptr += dlen;
2139            }            }
2140            if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
2141                reset_could_continue = TRUE;
2142          ADD_NEW_DATA(-(state_offset + 1), 0, ncount);          ADD_NEW_DATA(-(state_offset + 1), 0, ncount);
2143          }          }
2144        break;        break;
# Line 2058  for (;;) Line 2164  for (;;)
2164          break;          break;
2165    
2166          case 0x000d:          case 0x000d:
2167          if (ptr + 1 < end_subject && ptr[1] == 0x0a)          if (ptr + 1 >= end_subject)
2168              {
2169              ADD_NEW(state_offset + 1, 0);
2170              if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
2171                reset_could_continue = TRUE;
2172              }
2173            else if (ptr[1] == 0x0a)
2174            {            {
2175            ADD_NEW_DATA(-(state_offset + 1), 0, 1);            ADD_NEW_DATA(-(state_offset + 1), 0, 1);
2176            }            }
# Line 2167  for (;;) Line 2279  for (;;)
2279        break;        break;
2280    
2281        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2282        /* Match a negated single character casefully. This is only used for        /* Match a negated single character casefully. */
       one-byte characters, that is, we know that d < 256. The character we are  
       checking (c) can be multibyte. */  
2283    
2284        case OP_NOT:        case OP_NOT:
2285        if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); }        if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); }
2286        break;        break;
2287    
2288        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2289        /* Match a negated single character caselessly. This is only used for        /* Match a negated single character caselessly. */
       one-byte characters, that is, we know that d < 256. The character we are  
       checking (c) can be multibyte. */  
2290    
2291        case OP_NOTI:        case OP_NOTI:
2292        if (clen > 0 && c != d && c != fcc[d])        if (clen > 0)
2293          { ADD_NEW(state_offset + dlen + 1, 0); }          {
2294            unsigned int otherd;
2295    #ifdef SUPPORT_UTF
2296            if (utf && d >= 128)
2297              {
2298    #ifdef SUPPORT_UCP
2299              otherd = UCD_OTHERCASE(d);
2300    #endif  /* SUPPORT_UCP */
2301              }
2302            else
2303    #endif  /* SUPPORT_UTF */
2304            otherd = TABLE_GET(d, fcc, d);
2305            if (c != d && c != otherd)
2306              { ADD_NEW(state_offset + dlen + 1, 0); }
2307            }
2308        break;        break;
2309    
2310        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
# Line 2209  for (;;) Line 2331  for (;;)
2331          unsigned int otherd = NOTACHAR;          unsigned int otherd = NOTACHAR;
2332          if (caseless)          if (caseless)
2333            {            {
2334  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2335            if (utf8 && d >= 128)            if (utf && d >= 128)
2336              {              {
2337  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2338              otherd = UCD_OTHERCASE(d);              otherd = UCD_OTHERCASE(d);
2339  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2340              }              }
2341            else            else
2342  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2343            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2344            }            }
2345          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2346            {            {
# Line 2256  for (;;) Line 2378  for (;;)
2378          unsigned int otherd = NOTACHAR;          unsigned int otherd = NOTACHAR;
2379          if (caseless)          if (caseless)
2380            {            {
2381  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2382            if (utf8 && d >= 128)            if (utf && d >= 128)
2383              {              {
2384  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2385              otherd = UCD_OTHERCASE(d);              otherd = UCD_OTHERCASE(d);
2386  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2387              }              }
2388            else            else
2389  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2390            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2391            }            }
2392          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2393            {            {
# Line 2301  for (;;) Line 2423  for (;;)
2423          unsigned int otherd = NOTACHAR;          unsigned int otherd = NOTACHAR;
2424          if (caseless)          if (caseless)
2425            {            {
2426  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2427            if (utf8 && d >= 128)            if (utf && d >= 128)
2428              {              {
2429  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2430              otherd = UCD_OTHERCASE(d);              otherd = UCD_OTHERCASE(d);
2431  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2432              }              }
2433            else            else
2434  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2435            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2436            }            }
2437          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2438            {            {
# Line 2338  for (;;) Line 2460  for (;;)
2460          unsigned int otherd = NOTACHAR;          unsigned int otherd = NOTACHAR;
2461          if (caseless)          if (caseless)
2462            {            {
2463  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2464            if (utf8 && d >= 128)            if (utf && d >= 128)
2465              {              {
2466  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2467              otherd = UCD_OTHERCASE(d);              otherd = UCD_OTHERCASE(d);
2468  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2469              }              }
2470            else            else
2471  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2472            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2473            }            }
2474          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2475            {            {
2476            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
2477              { ADD_NEW(state_offset + dlen + 3, 0); }              { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
2478            else            else
2479              { ADD_NEW(state_offset, count); }              { ADD_NEW(state_offset, count); }
2480            }            }
# Line 2375  for (;;) Line 2497  for (;;)
2497        case OP_NOTUPTO:        case OP_NOTUPTO:
2498        case OP_NOTMINUPTO:        case OP_NOTMINUPTO:
2499        case OP_NOTPOSUPTO:        case OP_NOTPOSUPTO:
2500        ADD_ACTIVE(state_offset + dlen + 3, 0);        ADD_ACTIVE(state_offset + dlen + 1 + IMM2_SIZE, 0);
2501        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
2502        if (clen > 0)        if (clen > 0)
2503          {          {
2504          unsigned int otherd = NOTACHAR;          unsigned int otherd = NOTACHAR;
2505          if (caseless)          if (caseless)
2506            {            {
2507  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2508            if (utf8 && d >= 128)            if (utf && d >= 128)
2509              {              {
2510  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2511              otherd = UCD_OTHERCASE(d);              otherd = UCD_OTHERCASE(d);
2512  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2513              }              }
2514            else            else
2515  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2516            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2517            }            }
2518          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2519            {            {
# Line 2401  for (;;) Line 2523  for (;;)
2523              next_active_state--;              next_active_state--;
2524              }              }
2525            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
2526              { ADD_NEW(state_offset + dlen + 3, 0); }              { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
2527            else            else
2528              { ADD_NEW(state_offset, count); }              { ADD_NEW(state_offset, count); }
2529            }            }
# Line 2418  for (;;) Line 2540  for (;;)
2540          {          {
2541          BOOL isinclass = FALSE;          BOOL isinclass = FALSE;
2542          int next_state_offset;          int next_state_offset;
2543          const uschar *ecode;          const pcre_uchar *ecode;
2544    
2545          /* For a simple class, there is always just a 32-byte table, and we          /* For a simple class, there is always just a 32-byte table, and we
2546          can set isinclass from it. */          can set isinclass from it. */
2547    
2548          if (codevalue != OP_XCLASS)          if (codevalue != OP_XCLASS)
2549            {            {
2550            ecode = code + 33;            ecode = code + 1 + (32 / sizeof(pcre_uchar));
2551            if (clen > 0)            if (clen > 0)
2552              {              {
2553              isinclass = (c > 255)? (codevalue == OP_NCLASS) :              isinclass = (c > 255)? (codevalue == OP_NCLASS) :
2554                ((code[1 + c/8] & (1 << (c&7))) != 0);                ((((pcre_uint8 *)(code + 1))[c/8] & (1 << (c&7))) != 0);
2555              }              }
2556            }            }
2557    
# Line 2440  for (;;) Line 2562  for (;;)
2562          else          else
2563           {           {
2564           ecode = code + GET(code, 1);           ecode = code + GET(code, 1);
2565           if (clen > 0) isinclass = _pcre_xclass(c, code + 1 + LINK_SIZE);           if (clen > 0) isinclass = PRIV(xclass)(c, code + 1 + LINK_SIZE, utf);
2566           }           }
2567    
2568          /* At this point, isinclass is set for all kinds of class, and ecode          /* At this point, isinclass is set for all kinds of class, and ecode
# Line 2474  for (;;) Line 2596  for (;;)
2596            case OP_CRMINRANGE:            case OP_CRMINRANGE:
2597            count = current_state->count;  /* Already matched */            count = current_state->count;  /* Already matched */
2598            if (count >= GET2(ecode, 1))            if (count >= GET2(ecode, 1))
2599              { ADD_ACTIVE(next_state_offset + 5, 0); }              { ADD_ACTIVE(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
2600            if (isinclass)            if (isinclass)
2601              {              {
2602              int max = GET2(ecode, 3);              int max = GET2(ecode, 1 + IMM2_SIZE);
2603              if (++count >= max && max != 0)   /* Max 0 => no limit */              if (++count >= max && max != 0)   /* Max 0 => no limit */
2604                { ADD_NEW(next_state_offset + 5, 0); }                { ADD_NEW(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
2605              else              else
2606                { ADD_NEW(state_offset, count); }                { ADD_NEW(state_offset, count); }
2607              }              }
# Line 2510  for (;;) Line 2632  for (;;)
2632          int rc;          int rc;
2633          int local_offsets[2];          int local_offsets[2];
2634          int local_workspace[1000];          int local_workspace[1000];
2635          const uschar *endasscode = code + GET(code, 1);          const pcre_uchar *endasscode = code + GET(code, 1);
2636    
2637          while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);          while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
2638    
# Line 2547  for (;;) Line 2669  for (;;)
2669          if (code[LINK_SIZE+1] == OP_CALLOUT)          if (code[LINK_SIZE+1] == OP_CALLOUT)
2670            {            {
2671            rrc = 0;            rrc = 0;
2672            if (pcre_callout != NULL)            if (PUBL(callout) != NULL)
2673              {              {
2674              pcre_callout_block cb;              PUBL(callout_block) cb;
2675              cb.version          = 1;   /* Version 1 of the callout block */              cb.version          = 1;   /* Version 1 of the callout block */
2676              cb.callout_number   = code[LINK_SIZE+2];              cb.callout_number   = code[LINK_SIZE+2];
2677              cb.offset_vector    = offsets;              cb.offset_vector    = offsets;
2678    #ifdef COMPILE_PCRE8
2679              cb.subject          = (PCRE_SPTR)start_subject;              cb.subject          = (PCRE_SPTR)start_subject;
2680    #else
2681                cb.subject          = (PCRE_SPTR16)start_subject;
2682    #endif
2683              cb.subject_length   = (int)(end_subject - start_subject);              cb.subject_length   = (int)(end_subject - start_subject);
2684              cb.start_match      = (int)(current_subject - start_subject);              cb.start_match      = (int)(current_subject - start_subject);
2685              cb.current_position = (int)(ptr - start_subject);              cb.current_position = (int)(ptr - start_subject);
# Line 2563  for (;;) Line 2689  for (;;)
2689              cb.capture_last     = -1;              cb.capture_last     = -1;
2690              cb.callout_data     = md->callout_data;              cb.callout_data     = md->callout_data;
2691              cb.mark             = NULL;   /* No (*MARK) support */              cb.mark             = NULL;   /* No (*MARK) support */
2692              if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */              if ((rrc = (*PUBL(callout))(&cb)) < 0) return rrc;   /* Abandon */
2693              }              }
2694            if (rrc > 0) break;                      /* Fail this thread */            if (rrc > 0) break;                      /* Fail this thread */
2695            code += _pcre_OP_lengths[OP_CALLOUT];    /* Skip callout data */            code += PRIV(OP_lengths)[OP_CALLOUT];    /* Skip callout data */
2696            }            }
2697    
2698          condcode = code[LINK_SIZE+1];          condcode = code[LINK_SIZE+1];
# Line 2587  for (;;) Line 2713  for (;;)
2713    
2714          else if (condcode == OP_RREF || condcode == OP_NRREF)          else if (condcode == OP_RREF || condcode == OP_NRREF)
2715            {            {
2716            int value = GET2(code, LINK_SIZE+2);            int value = GET2(code, LINK_SIZE + 2);
2717            if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;            if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
2718            if (md->recursive != NULL)            if (md->recursive != NULL)
2719              { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }              { ADD_ACTIVE(state_offset + LINK_SIZE + 2 + IMM2_SIZE, 0); }
2720            else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }            else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
2721            }            }
2722    
# Line 2599  for (;;) Line 2725  for (;;)
2725          else          else
2726            {            {
2727            int rc;            int rc;
2728            const uschar *asscode = code + LINK_SIZE + 1;            const pcre_uchar *asscode = code + LINK_SIZE + 1;
2729            const uschar *endasscode = asscode + GET(asscode, 1);            const pcre_uchar *endasscode = asscode + GET(asscode, 1);
2730    
2731            while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);            while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
2732    
# Line 2631  for (;;) Line 2757  for (;;)
2757          dfa_recursion_info *ri;          dfa_recursion_info *ri;
2758          int local_offsets[1000];          int local_offsets[1000];
2759          int local_workspace[1000];          int local_workspace[1000];
2760          const uschar *callpat = start_code + GET(code, 1);          const pcre_uchar *callpat = start_code + GET(code, 1);
2761          int recno = (callpat == md->start_code)? 0 :          int recno = (callpat == md->start_code)? 0 :
2762            GET2(callpat, 1 + LINK_SIZE);            GET2(callpat, 1 + LINK_SIZE);
2763          int rc;          int rc;
# Line 2682  for (;;) Line 2808  for (;;)
2808            {            {
2809            for (rc = rc*2 - 2; rc >= 0; rc -= 2)            for (rc = rc*2 - 2; rc >= 0; rc -= 2)
2810              {              {
             const uschar *p = start_subject + local_offsets[rc];  
             const uschar *pp = start_subject + local_offsets[rc+1];  
2811              int charcount = local_offsets[rc+1] - local_offsets[rc];              int charcount = local_offsets[rc+1] - local_offsets[rc];
2812              while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;  #ifdef SUPPORT_UTF
2813                if (utf)
2814                  {
2815                  const pcre_uchar *p = start_subject + local_offsets[rc];
2816                  const pcre_uchar *pp = start_subject + local_offsets[rc+1];
2817                  while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2818                  }
2819    #endif
2820              if (charcount > 0)              if (charcount > 0)
2821                {                {
2822                ADD_NEW_DATA(-(state_offset + LINK_SIZE + 1), 0, (charcount - 1));                ADD_NEW_DATA(-(state_offset + LINK_SIZE + 1), 0, (charcount - 1));
# Line 2708  for (;;) Line 2839  for (;;)
2839        case OP_BRAPOSZERO:        case OP_BRAPOSZERO:
2840          {          {
2841          int charcount, matched_count;          int charcount, matched_count;
2842          const uschar *local_ptr = ptr;          const pcre_uchar *local_ptr = ptr;
2843          BOOL allow_zero;          BOOL allow_zero;
2844    
2845          if (codevalue == OP_BRAPOSZERO)          if (codevalue == OP_BRAPOSZERO)
# Line 2758  for (;;) Line 2889  for (;;)
2889    
2890          if (matched_count > 0 || allow_zero)          if (matched_count > 0 || allow_zero)
2891            {            {
2892            const uschar *end_subpattern = code;            const pcre_uchar *end_subpattern = code;
2893            int next_state_offset;            int next_state_offset;
2894    
2895            do { end_subpattern += GET(end_subpattern, 1); }            do { end_subpattern += GET(end_subpattern, 1); }
# Line 2779  for (;;) Line 2910  for (;;)
2910              }              }
2911            else            else
2912              {              {
2913              const uschar *p = ptr;              const pcre_uchar *p = ptr;
2914              const uschar *pp = local_ptr;              const pcre_uchar *pp = local_ptr;
2915              charcount = pp - p;              charcount = (int)(pp - p);
2916              while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;  #ifdef SUPPORT_UTF
2917                if (utf) while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2918    #endif
2919              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
2920              }              }
2921            }            }
# Line 2791  for (;;) Line 2924  for (;;)
2924    
2925        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2926        case OP_ONCE:        case OP_ONCE:
2927        case OP_ONCE_NC:        case OP_ONCE_NC:
2928          {          {
2929          int local_offsets[2];          int local_offsets[2];
2930          int local_workspace[1000];          int local_workspace[1000];
# Line 2809  for (;;) Line 2942  for (;;)
2942    
2943          if (rc >= 0)          if (rc >= 0)
2944            {            {
2945            const uschar *end_subpattern = code;            const pcre_uchar *end_subpattern = code;
2946            int charcount = local_offsets[1] - local_offsets[0];            int charcount = local_offsets[1] - local_offsets[0];
2947            int next_state_offset, repeat_state_offset;            int next_state_offset, repeat_state_offset;
2948    
# Line 2862  for (;;) Line 2995  for (;;)
2995              }              }
2996            else            else
2997              {              {
2998              const uschar *p = start_subject + local_offsets[0];  #ifdef SUPPORT_UTF
2999              const uschar *pp = start_subject + local_offsets[1];              if (utf)
3000              while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;                {
3001                  const pcre_uchar *p = start_subject + local_offsets[0];
3002                  const pcre_uchar *pp = start_subject + local_offsets[1];
3003                  while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
3004                  }
3005    #endif
3006              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
3007              if (repeat_state_offset >= 0)              if (repeat_state_offset >= 0)
3008                { ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); }                { ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); }
# Line 2880  for (;;) Line 3018  for (;;)
3018    
3019        case OP_CALLOUT:        case OP_CALLOUT:
3020        rrc = 0;        rrc = 0;
3021        if (pcre_callout != NULL)        if (PUBL(callout) != NULL)
3022          {          {
3023          pcre_callout_block cb;          PUBL(callout_block) cb;
3024          cb.version          = 1;   /* Version 1 of the callout block */          cb.version          = 1;   /* Version 1 of the callout block */
3025          cb.callout_number   = code[1];          cb.callout_number   = code[1];
3026          cb.offset_vector    = offsets;          cb.offset_vector    = offsets;
3027    #ifdef COMPILE_PCRE8
3028          cb.subject          = (PCRE_SPTR)start_subject;          cb.subject          = (PCRE_SPTR)start_subject;
3029    #else
3030            cb.subject          = (PCRE_SPTR16)start_subject;
3031    #endif
3032          cb.subject_length   = (int)(end_subject - start_subject);          cb.subject_length   = (int)(end_subject - start_subject);
3033          cb.start_match      = (int)(current_subject - start_subject);          cb.start_match      = (int)(current_subject - start_subject);
3034          cb.current_position = (int)(ptr - start_subject);          cb.current_position = (int)(ptr - start_subject);
# Line 2896  for (;;) Line 3038  for (;;)
3038          cb.capture_last     = -1;          cb.capture_last     = -1;
3039          cb.callout_data     = md->callout_data;          cb.callout_data     = md->callout_data;
3040          cb.mark             = NULL;   /* No (*MARK) support */          cb.mark             = NULL;   /* No (*MARK) support */
3041          if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */          if ((rrc = (*PUBL(callout))(&cb)) < 0) return rrc;   /* Abandon */
3042          }          }
3043        if (rrc == 0)        if (rrc == 0)
3044          { ADD_ACTIVE(state_offset + _pcre_OP_lengths[OP_CALLOUT], 0); }          { ADD_ACTIVE(state_offset + PRIV(OP_lengths)[OP_CALLOUT], 0); }
3045        break;        break;
3046    
3047    
# Line 2928  for (;;) Line 3070  for (;;)
3070    if (new_count <= 0)    if (new_count <= 0)
3071      {      {
3072      if (rlevel == 1 &&                               /* Top level, and */      if (rlevel == 1 &&                               /* Top level, and */
3073          could_continue &&                            /* Some could go on */          could_continue &&                            /* Some could go on, and */
3074          forced_fail != workspace[1] &&               /* Not all forced fail & */          forced_fail != workspace[1] &&               /* Not all forced fail & */
3075          (                                            /* either... */          (                                            /* either... */
3076          (md->moptions & PCRE_PARTIAL_HARD) != 0      /* Hard partial */          (md->moptions & PCRE_PARTIAL_HARD) != 0      /* Hard partial */
# Line 2936  for (;;) Line 3078  for (;;)
3078          ((md->moptions & PCRE_PARTIAL_SOFT) != 0 &&  /* Soft partial and */          ((md->moptions & PCRE_PARTIAL_SOFT) != 0 &&  /* Soft partial and */
3079           match_count < 0)                            /* no matches */           match_count < 0)                            /* no matches */
3080          ) &&                                         /* And... */          ) &&                                         /* And... */
3081          ptr >= end_subject &&                  /* Reached end of subject */          (
3082          ptr > md->start_used_ptr)              /* Inspected non-empty string */          partial_newline ||                           /* Either partial NL */
3083              (                                          /* or ... */
3084              ptr >= end_subject &&                /* End of subject and */
3085              ptr > md->start_used_ptr)            /* Inspected non-empty string */
3086              )
3087            )
3088        {        {
3089        if (offsetcount >= 2)        if (offsetcount >= 2)
3090          {          {
# Line 2996  Returns:          > 0 => number of match Line 3143  Returns:          > 0 => number of match
3143                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
3144  */  */
3145    
3146    #ifdef COMPILE_PCRE8
3147  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
3148  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
3149    const char *subject, int length, int start_offset, int options, int *offsets,    const char *subject, int length, int start_offset, int options, int *offsets,
3150    int offsetcount, int *workspace, int wscount)    int offsetcount, int *workspace, int wscount)
3151    #else
3152    PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
3153    pcre16_dfa_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
3154      PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
3155      int offsetcount, int *workspace, int wscount)
3156    #endif
3157  {  {
3158  real_pcre *re = (real_pcre *)argument_re;  REAL_PCRE *re = (REAL_PCRE *)argument_re;
3159  dfa_match_data match_block;  dfa_match_data match_block;
3160  dfa_match_data *md = &match_block;  dfa_match_data *md = &match_block;
3161  BOOL utf8, anchored, startline, firstline;  BOOL utf, anchored, startline, firstline;
3162  const uschar *current_subject, *end_subject, *lcc;  const pcre_uchar *current_subject, *end_subject;
   
 pcre_study_data internal_study;  
3163  const pcre_study_data *study = NULL;  const pcre_study_data *study = NULL;
 real_pcre internal_re;  
3164    
3165  const uschar *req_byte_ptr;  const pcre_uchar *req_char_ptr;
3166  const uschar *start_bits = NULL;  const pcre_uint8 *start_bits = NULL;
3167  BOOL first_byte_caseless = FALSE;  BOOL has_first_char = FALSE;
3168  BOOL req_byte_caseless = FALSE;  BOOL has_req_char = FALSE;
3169  int first_byte = -1;  pcre_uchar first_char = 0;
3170  int req_byte = -1;  pcre_uchar first_char2 = 0;
3171  int req_byte2 = -1;  pcre_uchar req_char = 0;
3172    pcre_uchar req_char2 = 0;
3173  int newline;  int newline;
3174    
3175  /* Plausibility checks */  /* Plausibility checks */
# Line 3029  if (offsetcount < 0) return PCRE_ERROR_B Line 3181  if (offsetcount < 0) return PCRE_ERROR_B
3181  if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;  if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;
3182  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
3183    
3184  /* We need to find the pointer to any study data before we test for byte  /* Check that the first field in the block is the magic number. If it is not,
3185  flipping, so we scan the extra_data block first. This may set two fields in the  return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
3186  match block, so we must initialize them beforehand. However, the other fields  REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
3187  in the match block must not be set until after the byte flipping. */  means that the pattern is likely compiled with different endianness. */
3188    
3189    if (re->magic_number != MAGIC_NUMBER)
3190      return re->magic_number == REVERSED_MAGIC_NUMBER?
3191        PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
3192    if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
3193    
3194    /* If restarting after a partial match, do some sanity checks on the contents
3195    of the workspace. */
3196    
3197    if ((options & PCRE_DFA_RESTART) != 0)
3198      {
3199      if ((workspace[0] & (-2)) != 0 || workspace[1] < 1 ||
3200        workspace[1] > (wscount - 2)/INTS_PER_STATEBLOCK)
3201          return PCRE_ERROR_DFA_BADRESTART;
3202      }
3203    
3204    /* Set up study, callout, and table data */
3205    
3206  md->tables = re->tables;  md->tables = re->tables;
3207  md->callout_data = NULL;  md->callout_data = NULL;
# Line 3051  if (extra_data != NULL) Line 3220  if (extra_data != NULL)
3220      md->tables = extra_data->tables;      md->tables = extra_data->tables;
3221    }    }
3222    
 /* Check that the first field in the block is the magic number. If it is not,  
 test for a regex that was compiled on a host of opposite endianness. If this is  
 the case, flipped values are put in internal_re and internal_study if there was  
 study data too. */  
   
 if (re->magic_number != MAGIC_NUMBER)  
   {  
   re = _pcre_try_flipped(re, &internal_re, study, &internal_study);  
   if (re == NULL) return PCRE_ERROR_BADMAGIC;  
   if (study != NULL) study = &internal_study;  
   }  
   
3223  /* Set some local values */  /* Set some local values */
3224    
3225  current_subject = (const unsigned char *)subject + start_offset;  current_subject = (const pcre_uchar *)subject + start_offset;
3226  end_subject = (const unsigned char *)subject + length;  end_subject = (const pcre_uchar *)subject + length;
3227  req_byte_ptr = current_subject - 1;  req_char_ptr = current_subject - 1;
3228    
3229  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3230  utf8 = (re->options & PCRE_UTF8) != 0;  /* PCRE_UTF16 has the same value as PCRE_UTF8. */
3231    utf = (re->options & PCRE_UTF8) != 0;
3232  #else  #else
3233  utf8 = FALSE;  utf = FALSE;
3234  #endif  #endif
3235    
3236  anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||  anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||
# Line 3080  anchored = (options & (PCRE_ANCHORED|PCR Line 3238  anchored = (options & (PCRE_ANCHORED|PCR
3238    
3239  /* The remaining fixed data for passing around. */  /* The remaining fixed data for passing around. */
3240    
3241  md->start_code = (const uschar *)argument_re +  md->start_code = (const pcre_uchar *)argument_re +
3242      re->name_table_offset + re->name_count * re->name_entry_size;      re->name_table_offset + re->name_count * re->name_entry_size;
3243  md->start_subject = (const unsigned char *)subject;  md->start_subject = (const pcre_uchar *)subject;
3244  md->end_subject = end_subject;  md->end_subject = end_subject;
3245  md->start_offset = start_offset;  md->start_offset = start_offset;
3246  md->moptions = options;  md->moptions = options;
# Line 3143  else Line 3301  else
3301  /* Check a UTF-8 string if required. Unfortunately there's no way of passing  /* Check a UTF-8 string if required. Unfortunately there's no way of passing
3302  back the character offset. */  back the character offset. */
3303    
3304  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3305  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
3306    {    {
3307    int erroroffset;    int erroroffset;
3308    int errorcode = _pcre_valid_utf8((uschar *)subject, length, &erroroffset);    int errorcode = PRIV(valid_utf)((pcre_uchar *)subject, length, &erroroffset);
3309    if (errorcode != 0)    if (errorcode != 0)
3310      {      {
3311      if (offsetcount >= 2)      if (offsetcount >= 2)
# Line 3159  if (utf8 && (options & PCRE_NO_UTF8_CHEC Line 3317  if (utf8 && (options & PCRE_NO_UTF8_CHEC
3317        PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;        PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
3318      }      }
3319    if (start_offset > 0 && start_offset < length &&    if (start_offset > 0 && start_offset < length &&
3320          (((USPTR)subject)[start_offset] & 0xc0) == 0x80)          NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
3321      return PCRE_ERROR_BADUTF8_OFFSET;      return PCRE_ERROR_BADUTF8_OFFSET;
3322    }    }
3323  #endif  #endif
# Line 3168  if (utf8 && (options & PCRE_NO_UTF8_CHEC Line 3326  if (utf8 && (options & PCRE_NO_UTF8_CHEC
3326  is a feature that makes it possible to save compiled regex and re-use them  is a feature that makes it possible to save compiled regex and re-use them
3327  in other programs later. */  in other programs later. */
3328    
3329  if (md->tables == NULL) md->tables = _pcre_default_tables;  if (md->tables == NULL) md->tables = PRIV(default_tables);
3330    
3331  /* The lower casing table and the "must be at the start of a line" flag are  /* The "must be at the start of a line" flags are used in a loop when finding
3332  used in a loop when finding where to start. */  where to start. */
3333    
 lcc = md->tables + lcc_offset;  
3334  startline = (re->flags & PCRE_STARTLINE) != 0;  startline = (re->flags & PCRE_STARTLINE) != 0;
3335  firstline = (re->options & PCRE_FIRSTLINE) != 0;  firstline = (re->options & PCRE_FIRSTLINE) != 0;
3336    
# Line 3187  if (!anchored) Line 3344  if (!anchored)
3344    {    {
3345    if ((re->flags & PCRE_FIRSTSET) != 0)    if ((re->flags & PCRE_FIRSTSET) != 0)
3346      {      {
3347      first_byte = re->first_byte & 255;      has_first_char = TRUE;
3348      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)      first_char = first_char2 = (pcre_uchar)(re->first_char);
3349        first_byte = lcc[first_byte];      if ((re->flags & PCRE_FCH_CASELESS) != 0)
3350          {
3351          first_char2 = TABLE_GET(first_char, md->tables + fcc_offset, first_char);
3352    #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3353          if (utf && first_char > 127)
3354            first_char2 = UCD_OTHERCASE(first_char);
3355    #endif
3356          }
3357      }      }
3358    else    else
3359      {      {
# Line 3204  character" set. */ Line 3368  character" set. */
3368    
3369  if ((re->flags & PCRE_REQCHSET) != 0)  if ((re->flags & PCRE_REQCHSET) != 0)
3370    {    {
3371    req_byte = re->req_byte & 255;    has_req_char = TRUE;
3372    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;    req_char = req_char2 = (pcre_uchar)(re->req_char);
3373    req_byte2 = (md->tables + fcc_offset)[req_byte];  /* case flipped */    if ((re->flags & PCRE_RCH_CASELESS) != 0)
3374        {
3375        req_char2 = TABLE_GET(req_char, md->tables + fcc_offset, req_char);
3376    #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3377        if (utf && req_char > 127)
3378          req_char2 = UCD_OTHERCASE(req_char);
3379    #endif
3380        }
3381    }    }
3382    
3383  /* Call the main matching function, looping for a non-anchored regex after a  /* Call the main matching function, looping for a non-anchored regex after a
# Line 3219  for (;;) Line 3390  for (;;)
3390    
3391    if ((options & PCRE_DFA_RESTART) == 0)    if ((options & PCRE_DFA_RESTART) == 0)
3392      {      {
3393      const uschar *save_end_subject = end_subject;      const pcre_uchar *save_end_subject = end_subject;
3394    
3395      /* If firstline is TRUE, the start of the match is constrained to the first      /* If firstline is TRUE, the start of the match is constrained to the first
3396      line of a multiline string. Implement this by temporarily adjusting      line of a multiline string. Implement this by temporarily adjusting
# Line 3228  for (;;) Line 3399  for (;;)
3399    
3400      if (firstline)      if (firstline)
3401        {        {
3402        USPTR t = current_subject;        PCRE_PUCHAR t = current_subject;
3403  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3404        if (utf8)        if (utf)
3405          {          {
3406          while (t < md->end_subject && !IS_NEWLINE(t))          while (t < md->end_subject && !IS_NEWLINE(t))
3407            {            {
3408            t++;            t++;
3409            while (t < end_subject && (*t & 0xc0) == 0x80) t++;            ACROSSCHAR(t < end_subject, *t, t++);
3410            }            }
3411          }          }
3412        else        else
# Line 3252  for (;;) Line 3423  for (;;)
3423    
3424      if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)      if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
3425        {        {
3426        /* Advance to a known first byte. */        /* Advance to a known first char. */
3427    
3428        if (first_byte >= 0)        if (has_first_char)
3429          {          {
3430          if (first_byte_caseless)          if (first_char != first_char2)
3431            while (current_subject < end_subject &&            while (current_subject < end_subject &&
3432                   lcc[*current_subject] != first_byte)                *current_subject != first_char && *current_subject != first_char2)
3433              current_subject++;              current_subject++;
3434          else          else
3435            while (current_subject < end_subject &&            while (current_subject < end_subject &&
3436                   *current_subject != first_byte)                   *current_subject != first_char)
3437              current_subject++;              current_subject++;
3438          }          }
3439    
# Line 3272  for (;;) Line 3443  for (;;)
3443          {          {
3444          if (current_subject > md->start_subject + start_offset)          if (current_subject > md->start_subject + start_offset)
3445            {            {
3446  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3447            if (utf8)            if (utf)
3448              {              {
3449              while (current_subject < end_subject &&              while (current_subject < end_subject &&
3450                     !WAS_NEWLINE(current_subject))                     !WAS_NEWLINE(current_subject))
3451                {                {
3452                current_subject++;                current_subject++;
3453                while(current_subject < end_subject &&                ACROSSCHAR(current_subject < end_subject, *current_subject,
3454                      (*current_subject & 0xc0) == 0x80)                  current_subject++);
                 current_subject++;  
3455                }                }
3456              }              }
3457            else            else
# Line 3308  for (;;) Line 3478  for (;;)
3478          while (current_subject < end_subject)          while (current_subject < end_subject)
3479            {            {
3480            register unsigned int c = *current_subject;            register unsigned int c = *current_subject;
3481    #ifndef COMPILE_PCRE8
3482              if (c > 255) c = 255;
3483    #endif
3484            if ((start_bits[c/8] & (1 << (c&7))) == 0)            if ((start_bits[c/8] & (1 << (c&7))) == 0)
3485              {              {
3486              current_subject++;              current_subject++;
3487  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3488              if (utf8)              /* In non 8-bit mode, the iteration will stop for
3489                while(current_subject < end_subject &&              characters > 255 at the beginning or not stop at all. */
3490                      (*current_subject & 0xc0) == 0x80) current_subject++;              if (utf)
3491                  ACROSSCHAR(current_subject < end_subject, *current_subject,
3492                    current_subject++);
3493  #endif  #endif
3494              }              }
3495            else break;            else break;
# Line 3342  for (;;) Line 3517  for (;;)
3517            (pcre_uint32)(end_subject - current_subject) < study->minlength)            (pcre_uint32)(end_subject - current_subject) < study->minlength)
3518          return PCRE_ERROR_NOMATCH;          return PCRE_ERROR_NOMATCH;
3519    
3520        /* If req_byte is set, we know that that character must appear in the        /* If req_char is set, we know that that character must appear in the
3521        subject for the match to succeed. If the first character is set, req_byte        subject for the match to succeed. If the first character is set, req_char
3522        must be later in the subject; otherwise the test starts at the match        must be later in the subject; otherwise the test starts at the match
3523        point. This optimization can save a huge amount of work in patterns with        point. This optimization can save a huge amount of work in patterns with
3524        nested unlimited repeats that aren't going to match. Writing separate        nested unlimited repeats that aren't going to match. Writing separate
# Line 3355  for (;;) Line 3530  for (;;)
3530        patterns. This showed up when somebody was matching /^C/ on a 32-megabyte        patterns. This showed up when somebody was matching /^C/ on a 32-megabyte
3531        string... so we don't do this when the string is sufficiently long. */        string... so we don't do this when the string is sufficiently long. */
3532    
3533        if (req_byte >= 0 && end_subject - current_subject < REQ_BYTE_MAX)        if (has_req_char && end_subject - current_subject < REQ_BYTE_MAX)
3534          {          {
3535          register const uschar *p = current_subject + ((first_byte >= 0)? 1 : 0);          register PCRE_PUCHAR p = current_subject + (has_first_char? 1:0);
3536    
3537          /* We don't need to repeat the search if we haven't yet reached the          /* We don't need to repeat the search if we haven't yet reached the
3538          place we found it at last time. */          place we found it at last time. */
3539    
3540          if (p > req_byte_ptr)          if (p > req_char_ptr)
3541            {            {
3542            if (req_byte_caseless)            if (req_char != req_char2)
3543              {              {
3544              while (p < end_subject)              while (p < end_subject)
3545                {                {
3546                register int pp = *p++;                register int pp = *p++;
3547                if (pp == req_byte || pp == req_byte2) { p--; break; }                if (pp == req_char || pp == req_char2) { p--; break; }
3548                }                }
3549              }              }
3550            else            else
3551              {              {
3552              while (p < end_subject)              while (p < end_subject)
3553                {                {
3554                if (*p++ == req_byte) { p--; break; }                if (*p++ == req_char) { p--; break; }
3555                }                }
3556              }              }
3557    
# Line 3389  for (;;) Line 3564  for (;;)
3564            found it, so that we don't search again next time round the loop if            found it, so that we don't search again next time round the loop if
3565            the start hasn't passed this character yet. */            the start hasn't passed this character yet. */
3566    
3567            req_byte_ptr = p;            req_char_ptr = p;
3568            }            }
3569          }          }
3570        }        }
# Line 3421  for (;;) Line 3596  for (;;)
3596    
3597    if (firstline && IS_NEWLINE(current_subject)) break;    if (firstline && IS_NEWLINE(current_subject)) break;
3598    current_subject++;    current_subject++;
3599    if (utf8)  #ifdef SUPPORT_UTF
3600      if (utf)
3601      {      {
3602      while (current_subject < end_subject && (*current_subject & 0xc0) == 0x80)      ACROSSCHAR(current_subject < end_subject, *current_subject,
3603        current_subject++;        current_subject++);
3604      }      }
3605    #endif
3606    if (current_subject > end_subject) break;    if (current_subject > end_subject) break;
3607    
3608    /* If we have just passed a CR and we are now at a LF, and the pattern does    /* If we have just passed a CR and we are now at a LF, and the pattern does

Legend:
Removed from v.728  
changed lines
  Added in v.1015

  ViewVC Help
Powered by ViewVC 1.1.5