/[pcre]/code/trunk/pcre_dfa_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_dfa_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 645 by ph10, Sun Jul 31 17:02:18 2011 UTC revision 982 by ph10, Wed Jun 20 15:15:27 2012 UTC
# Line 7  and semantics are as close as possible t Line 7  and semantics are as close as possible t
7  below for why this module is different).  below for why this module is different).
8    
9                         Written by Philip Hazel                         Written by Philip Hazel
10             Copyright (c) 1997-2011 University of Cambridge             Copyright (c) 1997-2012 University of Cambridge
11    
12  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
13  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 38  POSSIBILITY OF SUCH DAMAGE. Line 38  POSSIBILITY OF SUCH DAMAGE.
38  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
39  */  */
40    
   
41  /* This module contains the external function pcre_dfa_exec(), which is an  /* This module contains the external function pcre_dfa_exec(), which is an
42  alternative matching function that uses a sort of DFA algorithm (not a true  alternative matching function that uses a sort of DFA algorithm (not a true
43  FSM). This is NOT Perl- compatible, but it has advantages in certain  FSM). This is NOT Perl-compatible, but it has advantages in certain
44  applications. */  applications. */
45    
46    
# Line 113  small value. Non-zero values in the tabl Line 112  small value. Non-zero values in the tabl
112  the character is to be found. ***NOTE*** If the start of this table is  the character is to be found. ***NOTE*** If the start of this table is
113  modified, the three tables that follow must also be modified. */  modified, the three tables that follow must also be modified. */
114    
115  static const uschar coptable[] = {  static const pcre_uint8 coptable[] = {
116    0,                             /* End                                    */    0,                             /* End                                    */
117    0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */    0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */
118    0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */    0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */
# Line 128  static const uschar coptable[] = { Line 127  static const uschar coptable[] = {
127    1,                             /* noti                                   */    1,                             /* noti                                   */
128    /* Positive single-char repeats                                          */    /* Positive single-char repeats                                          */
129    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
130    3, 3, 3,                       /* upto, minupto, exact                   */    1+IMM2_SIZE, 1+IMM2_SIZE,      /* upto, minupto                          */
131    1, 1, 1, 3,                    /* *+, ++, ?+, upto+                      */    1+IMM2_SIZE,                   /* exact                                  */
132      1, 1, 1, 1+IMM2_SIZE,          /* *+, ++, ?+, upto+                      */
133    1, 1, 1, 1, 1, 1,              /* *I, *?I, +I, +?I, ?I, ??I              */    1, 1, 1, 1, 1, 1,              /* *I, *?I, +I, +?I, ?I, ??I              */
134    3, 3, 3,                       /* upto I, minupto I, exact I             */    1+IMM2_SIZE, 1+IMM2_SIZE,      /* upto I, minupto I                      */
135    1, 1, 1, 3,                    /* *+I, ++I, ?+I, upto+I                  */    1+IMM2_SIZE,                   /* exact I                                */
136      1, 1, 1, 1+IMM2_SIZE,          /* *+I, ++I, ?+I, upto+I                  */
137    /* Negative single-char repeats - only for chars < 256                   */    /* Negative single-char repeats - only for chars < 256                   */
138    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */
139    3, 3, 3,                       /* NOT upto, minupto, exact               */    1+IMM2_SIZE, 1+IMM2_SIZE,      /* NOT upto, minupto                      */
140    1, 1, 1, 3,                    /* NOT *+, ++, ?+, upto+                  */    1+IMM2_SIZE,                   /* NOT exact                              */
141      1, 1, 1, 1+IMM2_SIZE,          /* NOT *+, ++, ?+, upto+                  */
142    1, 1, 1, 1, 1, 1,              /* NOT *I, *?I, +I, +?I, ?I, ??I          */    1, 1, 1, 1, 1, 1,              /* NOT *I, *?I, +I, +?I, ?I, ??I          */
143    3, 3, 3,                       /* NOT upto I, minupto I, exact I         */    1+IMM2_SIZE, 1+IMM2_SIZE,      /* NOT upto I, minupto I                  */
144    1, 1, 1, 3,                    /* NOT *+I, ++I, ?+I, upto+I              */    1+IMM2_SIZE,                   /* NOT exact I                            */
145      1, 1, 1, 1+IMM2_SIZE,          /* NOT *+I, ++I, ?+I, upto+I              */
146    /* Positive type repeats                                                 */    /* Positive type repeats                                                 */
147    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */
148    3, 3, 3,                       /* Type upto, minupto, exact              */    1+IMM2_SIZE, 1+IMM2_SIZE,      /* Type upto, minupto                     */
149    1, 1, 1, 3,                    /* Type *+, ++, ?+, upto+                 */    1+IMM2_SIZE,                   /* Type exact                             */
150      1, 1, 1, 1+IMM2_SIZE,          /* Type *+, ++, ?+, upto+                 */
151    /* Character class & ref repeats                                         */    /* Character class & ref repeats                                         */
152    0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */    0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */
153    0, 0,                          /* CRRANGE, CRMINRANGE                    */    0, 0,                          /* CRRANGE, CRMINRANGE                    */
# Line 164  static const uschar coptable[] = { Line 168  static const uschar coptable[] = {
168    0,                             /* Assert not                             */    0,                             /* Assert not                             */
169    0,                             /* Assert behind                          */    0,                             /* Assert behind                          */
170    0,                             /* Assert behind not                      */    0,                             /* Assert behind not                      */
171    0, 0, 0, 0, 0, 0,              /* ONCE, BRA, BRAPOS, CBRA, CBRAPOS, COND */    0, 0,                          /* ONCE, ONCE_NC                          */
172      0, 0, 0, 0, 0,                 /* BRA, BRAPOS, CBRA, CBRAPOS, COND       */
173    0, 0, 0, 0, 0,                 /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND  */    0, 0, 0, 0, 0,                 /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND  */
174    0, 0,                          /* CREF, NCREF                            */    0, 0,                          /* CREF, NCREF                            */
175    0, 0,                          /* RREF, NRREF                            */    0, 0,                          /* RREF, NRREF                            */
# Line 181  remember the fact that a character could Line 186  remember the fact that a character could
186  the subject is reached. ***NOTE*** If the start of this table is modified, the  the subject is reached. ***NOTE*** If the start of this table is modified, the
187  two tables that follow must also be modified. */  two tables that follow must also be modified. */
188    
189  static const uschar poptable[] = {  static const pcre_uint8 poptable[] = {
190    0,                             /* End                                    */    0,                             /* End                                    */
191    0, 0, 0, 1, 1,                 /* \A, \G, \K, \B, \b                     */    0, 0, 0, 1, 1,                 /* \A, \G, \K, \B, \b                     */
192    1, 1, 1, 1, 1, 1,              /* \D, \d, \S, \s, \W, \w                 */    1, 1, 1, 1, 1, 1,              /* \D, \d, \S, \s, \W, \w                 */
# Line 232  static const uschar poptable[] = { Line 237  static const uschar poptable[] = {
237    0,                             /* Assert not                             */    0,                             /* Assert not                             */
238    0,                             /* Assert behind                          */    0,                             /* Assert behind                          */
239    0,                             /* Assert behind not                      */    0,                             /* Assert behind not                      */
240    0, 0, 0, 0, 0, 0,              /* ONCE, BRA, BRAPOS, CBRA, CBRAPOS, COND */    0, 0,                          /* ONCE, ONCE_NC                          */
241      0, 0, 0, 0, 0,                 /* BRA, BRAPOS, CBRA, CBRAPOS, COND       */
242    0, 0, 0, 0, 0,                 /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND  */    0, 0, 0, 0, 0,                 /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND  */
243    0, 0,                          /* CREF, NCREF                            */    0, 0,                          /* CREF, NCREF                            */
244    0, 0,                          /* RREF, NRREF                            */    0, 0,                          /* RREF, NRREF                            */
# Line 247  static const uschar poptable[] = { Line 253  static const uschar poptable[] = {
253  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
254  and \w */  and \w */
255    
256  static const uschar toptable1[] = {  static const pcre_uint8 toptable1[] = {
257    0, 0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
258    ctype_digit, ctype_digit,    ctype_digit, ctype_digit,
259    ctype_space, ctype_space,    ctype_space, ctype_space,
# Line 255  static const uschar toptable1[] = { Line 261  static const uschar toptable1[] = {
261    0, 0                            /* OP_ANY, OP_ALLANY */    0, 0                            /* OP_ANY, OP_ALLANY */
262  };  };
263    
264  static const uschar toptable2[] = {  static const pcre_uint8 toptable2[] = {
265    0, 0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
266    ctype_digit, 0,    ctype_digit, 0,
267    ctype_space, 0,    ctype_space, 0,
# Line 275  typedef struct stateblock { Line 281  typedef struct stateblock {
281    int data;                       /* Some use extra data */    int data;                       /* Some use extra data */
282  } stateblock;  } stateblock;
283    
284  #define INTS_PER_STATEBLOCK  (sizeof(stateblock)/sizeof(int))  #define INTS_PER_STATEBLOCK  (int)(sizeof(stateblock)/sizeof(int))
285    
286    
287  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
# Line 294  Returns:       nothing Line 300  Returns:       nothing
300  */  */
301    
302  static void  static void
303  pchars(unsigned char *p, int length, FILE *f)  pchars(const pcre_uchar *p, int length, FILE *f)
304  {  {
305  int c;  int c;
306  while (length-- > 0)  while (length-- > 0)
# Line 375  for the current character, one for the f Line 381  for the current character, one for the f
381      next_new_state->count  = (y); \      next_new_state->count  = (y); \
382      next_new_state->data   = (z); \      next_new_state->data   = (z); \
383      next_new_state++; \      next_new_state++; \
384      DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \      DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d) line %d\n", rlevel*2-2, SP, \
385          (x), (y), (z), __LINE__)); \
386      } \      } \
387    else return PCRE_ERROR_DFA_WSSIZE    else return PCRE_ERROR_DFA_WSSIZE
388    
# Line 384  for the current character, one for the f Line 391  for the current character, one for the f
391  static int  static int
392  internal_dfa_exec(  internal_dfa_exec(
393    dfa_match_data *md,    dfa_match_data *md,
394    const uschar *this_start_code,    const pcre_uchar *this_start_code,
395    const uschar *current_subject,    const pcre_uchar *current_subject,
396    int start_offset,    int start_offset,
397    int *offsets,    int *offsets,
398    int offsetcount,    int offsetcount,
# Line 396  internal_dfa_exec( Line 403  internal_dfa_exec(
403  stateblock *active_states, *new_states, *temp_states;  stateblock *active_states, *new_states, *temp_states;
404  stateblock *next_active_state, *next_new_state;  stateblock *next_active_state, *next_new_state;
405    
406  const uschar *ctypes, *lcc, *fcc;  const pcre_uint8 *ctypes, *lcc, *fcc;
407  const uschar *ptr;  const pcre_uchar *ptr;
408  const uschar *end_code, *first_op;  const pcre_uchar *end_code, *first_op;
409    
410  dfa_recursion_info new_recursive;  dfa_recursion_info new_recursive;
411    
# Line 407  int active_count, new_count, match_count Line 414  int active_count, new_count, match_count
414  /* Some fields in the md block are frequently referenced, so we load them into  /* Some fields in the md block are frequently referenced, so we load them into
415  independent variables in the hope that this will perform better. */  independent variables in the hope that this will perform better. */
416    
417  const uschar *start_subject = md->start_subject;  const pcre_uchar *start_subject = md->start_subject;
418  const uschar *end_subject = md->end_subject;  const pcre_uchar *end_subject = md->end_subject;
419  const uschar *start_code = md->start_code;  const pcre_uchar *start_code = md->start_code;
420    
421  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
422  BOOL utf8 = (md->poptions & PCRE_UTF8) != 0;  BOOL utf = (md->poptions & PCRE_UTF8) != 0;
423  #else  #else
424  BOOL utf8 = FALSE;  BOOL utf = FALSE;
425  #endif  #endif
426    
427    BOOL reset_could_continue = FALSE;
428    
429  rlevel++;  rlevel++;
430  offsetcount &= (-2);  offsetcount &= (-2);
431    
# Line 440  new_count = 0; Line 449  new_count = 0;
449    
450  first_op = this_start_code + 1 + LINK_SIZE +  first_op = this_start_code + 1 + LINK_SIZE +
451    ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||    ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
452      *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)? 2:0);      *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
453        ? IMM2_SIZE:0);
454    
455  /* The first thing in any (sub) pattern is a bracket of some sort. Push all  /* The first thing in any (sub) pattern is a bracket of some sort. Push all
456  the alternative states onto the list, and find out where the end is. This  the alternative states onto the list, and find out where the end is. This
# Line 468  if (*first_op == OP_REVERSE) Line 478  if (*first_op == OP_REVERSE)
478    /* If we can't go back the amount required for the longest lookbehind    /* If we can't go back the amount required for the longest lookbehind
479    pattern, go back as far as we can; some alternatives may still be viable. */    pattern, go back as far as we can; some alternatives may still be viable. */
480    
481  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
482    /* In character mode we have to step back character by character */    /* In character mode we have to step back character by character */
483    
484    if (utf8)    if (utf)
485      {      {
486      for (gone_back = 0; gone_back < max_back; gone_back++)      for (gone_back = 0; gone_back < max_back; gone_back++)
487        {        {
488        if (current_subject <= start_subject) break;        if (current_subject <= start_subject) break;
489        current_subject--;        current_subject--;
490        while (current_subject > start_subject &&        ACROSSCHAR(current_subject > start_subject, *current_subject, current_subject--);
              (*current_subject & 0xc0) == 0x80)  
         current_subject--;  
491        }        }
492      }      }
493    else    else
# Line 540  else Line 548  else
548      {      {
549      int length = 1 + LINK_SIZE +      int length = 1 + LINK_SIZE +
550        ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||        ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
551          *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)?          *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
552          2:0);          ? IMM2_SIZE:0);
553      do      do
554        {        {
555        ADD_NEW((int)(end_code - start_code + length), 0);        ADD_NEW((int)(end_code - start_code + length), 0);
# Line 554  else Line 562  else
562    
563  workspace[0] = 0;    /* Bit indicating which vector is current */  workspace[0] = 0;    /* Bit indicating which vector is current */
564    
565  DPRINTF(("%.*sEnd state = %d\n", rlevel*2-2, SP, end_code - start_code));  DPRINTF(("%.*sEnd state = %d\n", rlevel*2-2, SP, (int)(end_code - start_code)));
566    
567  /* Loop for scanning the subject */  /* Loop for scanning the subject */
568    
# Line 565  for (;;) Line 573  for (;;)
573    int clen, dlen;    int clen, dlen;
574    unsigned int c, d;    unsigned int c, d;
575    int forced_fail = 0;    int forced_fail = 0;
576    BOOL could_continue = FALSE;    BOOL partial_newline = FALSE;
577      BOOL could_continue = reset_could_continue;
578      reset_could_continue = FALSE;
579    
580    /* Make the new state list into the active state list and empty the    /* Make the new state list into the active state list and empty the
581    new state list. */    new state list. */
# Line 581  for (;;) Line 591  for (;;)
591    
592  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
593    printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP);    printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP);
594    pchars((uschar *)ptr, strlen((char *)ptr), stdout);    pchars(ptr, STRLEN_UC(ptr), stdout);
595    printf("\"\n");    printf("\"\n");
596    
597    printf("%.*sActive states: ", rlevel*2-2, SP);    printf("%.*sActive states: ", rlevel*2-2, SP);
# Line 601  for (;;) Line 611  for (;;)
611    
612    if (ptr < end_subject)    if (ptr < end_subject)
613      {      {
614      clen = 1;        /* Number of bytes in the character */      clen = 1;        /* Number of data items in the character */
615  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
616      if (utf8) { GETCHARLEN(c, ptr, clen); } else      if (utf) { GETCHARLEN(c, ptr, clen); } else
617  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
618      c = *ptr;      c = *ptr;
619      }      }
620    else    else
# Line 621  for (;;) Line 631  for (;;)
631    for (i = 0; i < active_count; i++)    for (i = 0; i < active_count; i++)
632      {      {
633      stateblock *current_state = active_states + i;      stateblock *current_state = active_states + i;
634      BOOL caseless = FALSE;      BOOL caseless = FALSE;
635      const uschar *code;      const pcre_uchar *code;
636      int state_offset = current_state->offset;      int state_offset = current_state->offset;
637      int count, codevalue, rrc;      int count, codevalue, rrc;
638    
# Line 635  for (;;) Line 645  for (;;)
645    
646      /* A negative offset is a special case meaning "hold off going to this      /* A negative offset is a special case meaning "hold off going to this
647      (negated) state until the number of characters in the data field have      (negated) state until the number of characters in the data field have
648      been skipped". */      been skipped". If the could_continue flag was passed over from a previous
649        state, arrange for it to passed on. */
650    
651      if (state_offset < 0)      if (state_offset < 0)
652        {        {
# Line 644  for (;;) Line 655  for (;;)
655          DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP));          DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP));
656          ADD_NEW_DATA(state_offset, current_state->count,          ADD_NEW_DATA(state_offset, current_state->count,
657            current_state->data - 1);            current_state->data - 1);
658            if (could_continue) reset_could_continue = TRUE;
659          continue;          continue;
660          }          }
661        else        else
# Line 683  for (;;) Line 695  for (;;)
695      permitted.      permitted.
696    
697      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
698      argument that is not a data character - but is always one byte long. We      argument that is not a data character - but is always one byte long because
699      have to take special action to deal with  \P, \p, \H, \h, \V, \v and \X in      the values are small. We have to take special action to deal with  \P, \p,
700      this case. To keep the other cases fast, convert these ones to new opcodes.      \H, \h, \V, \v and \X in this case. To keep the other cases fast, convert
701      */      these ones to new opcodes. */
702    
703      if (coptable[codevalue] > 0)      if (coptable[codevalue] > 0)
704        {        {
705        dlen = 1;        dlen = 1;
706  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
707        if (utf8) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else        if (utf) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else
708  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
709        d = code[coptable[codevalue]];        d = code[coptable[codevalue]];
710        if (codevalue >= OP_TYPESTAR)        if (codevalue >= OP_TYPESTAR)
711          {          {
# Line 738  for (;;) Line 750  for (;;)
750    
751  /* ========================================================================== */  /* ========================================================================== */
752        /* Reached a closing bracket. If not at the end of the pattern, carry        /* Reached a closing bracket. If not at the end of the pattern, carry
753        on with the next opcode. For repeating opcodes, also add the repeat        on with the next opcode. For repeating opcodes, also add the repeat
754        state. Note that KETRPOS will always be encountered at the end of the        state. Note that KETRPOS will always be encountered at the end of the
755        subpattern, because the possessive subpattern repeats are always handled        subpattern, because the possessive subpattern repeats are always handled
756        using recursive calls. Thus, it never adds any new states.        using recursive calls. Thus, it never adds any new states.
757    
758        At the end of the (sub)pattern, unless we have an empty string and        At the end of the (sub)pattern, unless we have an empty string and
759        PCRE_NOTEMPTY is set, or PCRE_NOTEMPTY_ATSTART is set and we are at the        PCRE_NOTEMPTY is set, or PCRE_NOTEMPTY_ATSTART is set and we are at the
760        start of the subject, save the match data, shifting up all previous        start of the subject, save the match data, shifting up all previous
# Line 751  for (;;) Line 763  for (;;)
763        case OP_KET:        case OP_KET:
764        case OP_KETRMIN:        case OP_KETRMIN:
765        case OP_KETRMAX:        case OP_KETRMAX:
766        case OP_KETRPOS:        case OP_KETRPOS:
767        if (code != end_code)        if (code != end_code)
768          {          {
769          ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0);          ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0);
# Line 768  for (;;) Line 780  for (;;)
780                  current_subject > start_subject + md->start_offset)))                  current_subject > start_subject + md->start_offset)))
781            {            {
782            if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;            if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;
783              else if (match_count > 0 && ++match_count * 2 >= offsetcount)              else if (match_count > 0 && ++match_count * 2 > offsetcount)
784                match_count = 0;                match_count = 0;
785            count = ((match_count == 0)? offsetcount : match_count * 2) - 2;            count = ((match_count == 0)? offsetcount : match_count * 2) - 2;
786            if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int));            if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int));
# Line 777  for (;;) Line 789  for (;;)
789              offsets[0] = (int)(current_subject - start_subject);              offsets[0] = (int)(current_subject - start_subject);
790              offsets[1] = (int)(ptr - start_subject);              offsets[1] = (int)(ptr - start_subject);
791              DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,              DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,
792                offsets[1] - offsets[0], current_subject));                offsets[1] - offsets[0], (char *)current_subject));
793              }              }
794            if ((md->moptions & PCRE_DFA_SHORTEST) != 0)            if ((md->moptions & PCRE_DFA_SHORTEST) != 0)
795              {              {
# Line 814  for (;;) Line 826  for (;;)
826        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
827        case OP_CBRA:        case OP_CBRA:
828        case OP_SCBRA:        case OP_SCBRA:
829        ADD_ACTIVE((int)(code - start_code + 3 + LINK_SIZE),  0);        ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE + IMM2_SIZE),  0);
830        code += GET(code, 1);        code += GET(code, 1);
831        while (*code == OP_ALT)        while (*code == OP_ALT)
832          {          {
# Line 882  for (;;) Line 894  for (;;)
894        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
895        case OP_ANY:        case OP_ANY:
896        if (clen > 0 && !IS_NEWLINE(ptr))        if (clen > 0 && !IS_NEWLINE(ptr))
897          { ADD_NEW(state_offset + 1, 0); }          {
898            if (ptr + 1 >= md->end_subject &&
899                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
900                NLBLOCK->nltype == NLTYPE_FIXED &&
901                NLBLOCK->nllen == 2 &&
902                c == NLBLOCK->nl[0])
903              {
904              could_continue = partial_newline = TRUE;
905              }
906            else
907              {
908              ADD_NEW(state_offset + 1, 0);
909              }
910            }
911        break;        break;
912    
913        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
# Line 910  for (;;) Line 935  for (;;)
935                 (ptr == end_subject - md->nllen)                 (ptr == end_subject - md->nllen)
936              ))              ))
937            { ADD_ACTIVE(state_offset + 1, 0); }            { ADD_ACTIVE(state_offset + 1, 0); }
938            else if (ptr + 1 >= md->end_subject &&
939                     (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
940                     NLBLOCK->nltype == NLTYPE_FIXED &&
941                     NLBLOCK->nllen == 2 &&
942                     c == NLBLOCK->nl[0])
943              {
944              if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
945                {
946                reset_could_continue = TRUE;
947                ADD_NEW_DATA(-(state_offset + 1), 0, 1);
948                }
949              else could_continue = partial_newline = TRUE;
950              }
951          }          }
952        break;        break;
953    
# Line 922  for (;;) Line 960  for (;;)
960          else if (clen == 0 ||          else if (clen == 0 ||
961              ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr)))              ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr)))
962            { ADD_ACTIVE(state_offset + 1, 0); }            { ADD_ACTIVE(state_offset + 1, 0); }
963            else if (ptr + 1 >= md->end_subject &&
964                     (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
965                     NLBLOCK->nltype == NLTYPE_FIXED &&
966                     NLBLOCK->nllen == 2 &&
967                     c == NLBLOCK->nl[0])
968              {
969              if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
970                {
971                reset_could_continue = TRUE;
972                ADD_NEW_DATA(-(state_offset + 1), 0, 1);
973                }
974              else could_continue = partial_newline = TRUE;
975              }
976          }          }
977        else if (IS_NEWLINE(ptr))        else if (IS_NEWLINE(ptr))
978          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
# Line 954  for (;;) Line 1005  for (;;)
1005    
1006          if (ptr > start_subject)          if (ptr > start_subject)
1007            {            {
1008            const uschar *temp = ptr - 1;            const pcre_uchar *temp = ptr - 1;
1009            if (temp < md->start_used_ptr) md->start_used_ptr = temp;            if (temp < md->start_used_ptr) md->start_used_ptr = temp;
1010  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
1011            if (utf8) BACKCHAR(temp);            if (utf) { BACKCHAR(temp); }
1012  #endif  #endif
1013            GETCHARTEST(d, temp);            GETCHARTEST(d, temp);
1014  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 1022  for (;;) Line 1073  for (;;)
1073            break;            break;
1074    
1075            case PT_GC:            case PT_GC:
1076            OK = _pcre_ucp_gentype[prop->chartype] == code[2];            OK = PRIV(ucp_gentype)[prop->chartype] == code[2];
1077            break;            break;
1078    
1079            case PT_PC:            case PT_PC:
# Line 1036  for (;;) Line 1087  for (;;)
1087            /* These are specials for combination cases. */            /* These are specials for combination cases. */
1088    
1089            case PT_ALNUM:            case PT_ALNUM:
1090            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1091                 _pcre_ucp_gentype[prop->chartype] == ucp_N;                 PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1092            break;            break;
1093    
1094            case PT_SPACE:    /* Perl space */            case PT_SPACE:    /* Perl space */
1095            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1096                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
1097            break;            break;
1098    
1099            case PT_PXSPACE:  /* POSIX space */            case PT_PXSPACE:  /* POSIX space */
1100            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1101                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
1102                 c == CHAR_FF || c == CHAR_CR;                 c == CHAR_FF || c == CHAR_CR;
1103            break;            break;
1104    
1105            case PT_WORD:            case PT_WORD:
1106            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1107                 _pcre_ucp_gentype[prop->chartype] == ucp_N ||                 PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1108                 c == CHAR_UNDERSCORE;                 c == CHAR_UNDERSCORE;
1109            break;            break;
1110    
# Line 1084  for (;;) Line 1135  for (;;)
1135        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1136        if (clen > 0)        if (clen > 0)
1137          {          {
1138          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1139                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1140                NLBLOCK->nltype == NLTYPE_FIXED &&
1141                NLBLOCK->nllen == 2 &&
1142                c == NLBLOCK->nl[0])
1143              {
1144              could_continue = partial_newline = TRUE;
1145              }
1146            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1147              (c < 256 &&              (c < 256 &&
1148                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1149                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
# Line 1107  for (;;) Line 1166  for (;;)
1166        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
1167        if (clen > 0)        if (clen > 0)
1168          {          {
1169          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1170                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1171                NLBLOCK->nltype == NLTYPE_FIXED &&
1172                NLBLOCK->nllen == 2 &&
1173                c == NLBLOCK->nl[0])
1174              {
1175              could_continue = partial_newline = TRUE;
1176              }
1177            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1178              (c < 256 &&              (c < 256 &&
1179                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1180                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
# Line 1129  for (;;) Line 1196  for (;;)
1196        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
1197        if (clen > 0)        if (clen > 0)
1198          {          {
1199          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1200                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1201                NLBLOCK->nltype == NLTYPE_FIXED &&
1202                NLBLOCK->nllen == 2 &&
1203                c == NLBLOCK->nl[0])
1204              {
1205              could_continue = partial_newline = TRUE;
1206              }
1207            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1208              (c < 256 &&              (c < 256 &&
1209                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1210                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
# Line 1149  for (;;) Line 1224  for (;;)
1224        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1225        if (clen > 0)        if (clen > 0)
1226          {          {
1227          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1228                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1229                NLBLOCK->nltype == NLTYPE_FIXED &&
1230                NLBLOCK->nllen == 2 &&
1231                c == NLBLOCK->nl[0])
1232              {
1233              could_continue = partial_newline = TRUE;
1234              }
1235            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1236              (c < 256 &&              (c < 256 &&
1237                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1238                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
1239            {            {
1240            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1241              { ADD_NEW(state_offset + 4, 0); }              { ADD_NEW(state_offset + 1 + IMM2_SIZE + 1, 0); }
1242            else            else
1243              { ADD_NEW(state_offset, count); }              { ADD_NEW(state_offset, count); }
1244            }            }
# Line 1166  for (;;) Line 1249  for (;;)
1249        case OP_TYPEUPTO:        case OP_TYPEUPTO:
1250        case OP_TYPEMINUPTO:        case OP_TYPEMINUPTO:
1251        case OP_TYPEPOSUPTO:        case OP_TYPEPOSUPTO:
1252        ADD_ACTIVE(state_offset + 4, 0);        ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0);
1253        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1254        if (clen > 0)        if (clen > 0)
1255          {          {
1256          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1257                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1258                NLBLOCK->nltype == NLTYPE_FIXED &&
1259                NLBLOCK->nllen == 2 &&
1260                c == NLBLOCK->nl[0])
1261              {
1262              could_continue = partial_newline = TRUE;
1263              }
1264            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1265              (c < 256 &&              (c < 256 &&
1266                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1267                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
# Line 1181  for (;;) Line 1272  for (;;)
1272              next_active_state--;              next_active_state--;
1273              }              }
1274            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1275              { ADD_NEW(state_offset + 4, 0); }              { ADD_NEW(state_offset + 2 + IMM2_SIZE, 0); }
1276            else            else
1277              { ADD_NEW(state_offset, count); }              { ADD_NEW(state_offset, count); }
1278            }            }
# Line 1216  for (;;) Line 1307  for (;;)
1307            break;            break;
1308    
1309            case PT_GC:            case PT_GC:
1310            OK = _pcre_ucp_gentype[prop->chartype] == code[3];            OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
1311            break;            break;
1312    
1313            case PT_PC:            case PT_PC:
# Line 1230  for (;;) Line 1321  for (;;)
1321            /* These are specials for combination cases. */            /* These are specials for combination cases. */
1322    
1323            case PT_ALNUM:            case PT_ALNUM:
1324            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1325                 _pcre_ucp_gentype[prop->chartype] == ucp_N;                 PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1326            break;            break;
1327    
1328            case PT_SPACE:    /* Perl space */            case PT_SPACE:    /* Perl space */
1329            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1330                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
1331            break;            break;
1332    
1333            case PT_PXSPACE:  /* POSIX space */            case PT_PXSPACE:  /* POSIX space */
1334            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1335                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
1336                 c == CHAR_FF || c == CHAR_CR;                 c == CHAR_FF || c == CHAR_CR;
1337            break;            break;
1338    
1339            case PT_WORD:            case PT_WORD:
1340            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1341                 _pcre_ucp_gentype[prop->chartype] == ucp_N ||                 PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1342                 c == CHAR_UNDERSCORE;                 c == CHAR_UNDERSCORE;
1343            break;            break;
1344    
# Line 1279  for (;;) Line 1370  for (;;)
1370        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1371        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
1372          {          {
1373          const uschar *nptr = ptr + clen;          const pcre_uchar *nptr = ptr + clen;
1374          int ncount = 0;          int ncount = 0;
1375          if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)          if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
1376            {            {
# Line 1463  for (;;) Line 1554  for (;;)
1554            break;            break;
1555    
1556            case PT_GC:            case PT_GC:
1557            OK = _pcre_ucp_gentype[prop->chartype] == code[3];            OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
1558            break;            break;
1559    
1560            case PT_PC:            case PT_PC:
# Line 1477  for (;;) Line 1568  for (;;)
1568            /* These are specials for combination cases. */            /* These are specials for combination cases. */
1569    
1570            case PT_ALNUM:            case PT_ALNUM:
1571            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1572                 _pcre_ucp_gentype[prop->chartype] == ucp_N;                 PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1573            break;            break;
1574    
1575            case PT_SPACE:    /* Perl space */            case PT_SPACE:    /* Perl space */
1576            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1577                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
1578            break;            break;
1579    
1580            case PT_PXSPACE:  /* POSIX space */            case PT_PXSPACE:  /* POSIX space */
1581            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1582                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
1583                 c == CHAR_FF || c == CHAR_CR;                 c == CHAR_FF || c == CHAR_CR;
1584            break;            break;
1585    
1586            case PT_WORD:            case PT_WORD:
1587            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1588                 _pcre_ucp_gentype[prop->chartype] == ucp_N ||                 PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1589                 c == CHAR_UNDERSCORE;                 c == CHAR_UNDERSCORE;
1590            break;            break;
1591    
# Line 1535  for (;;) Line 1626  for (;;)
1626        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
1627        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
1628          {          {
1629          const uschar *nptr = ptr + clen;          const pcre_uchar *nptr = ptr + clen;
1630          int ncount = 0;          int ncount = 0;
1631          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
1632              codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)              codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)
# Line 1717  for (;;) Line 1808  for (;;)
1808        case OP_PROP_EXTRA + OP_TYPEMINUPTO:        case OP_PROP_EXTRA + OP_TYPEMINUPTO:
1809        case OP_PROP_EXTRA + OP_TYPEPOSUPTO:        case OP_PROP_EXTRA + OP_TYPEPOSUPTO:
1810        if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)
1811          { ADD_ACTIVE(state_offset + 6, 0); }          { ADD_ACTIVE(state_offset + 1 + IMM2_SIZE + 3, 0); }
1812        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1813        if (clen > 0)        if (clen > 0)
1814          {          {
1815          BOOL OK;          BOOL OK;
1816          const ucd_record * prop = GET_UCD(c);          const ucd_record * prop = GET_UCD(c);
1817          switch(code[4])          switch(code[1 + IMM2_SIZE + 1])
1818            {            {
1819            case PT_ANY:            case PT_ANY:
1820            OK = TRUE;            OK = TRUE;
# Line 1735  for (;;) Line 1826  for (;;)
1826            break;            break;
1827    
1828            case PT_GC:            case PT_GC:
1829            OK = _pcre_ucp_gentype[prop->chartype] == code[5];            OK = PRIV(ucp_gentype)[prop->chartype] == code[1 + IMM2_SIZE + 2];
1830            break;            break;
1831    
1832            case PT_PC:            case PT_PC:
1833            OK = prop->chartype == code[5];            OK = prop->chartype == code[1 + IMM2_SIZE + 2];
1834            break;            break;
1835    
1836            case PT_SC:            case PT_SC:
1837            OK = prop->script == code[5];            OK = prop->script == code[1 + IMM2_SIZE + 2];
1838            break;            break;
1839    
1840            /* These are specials for combination cases. */            /* These are specials for combination cases. */
1841    
1842            case PT_ALNUM:            case PT_ALNUM:
1843            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1844                 _pcre_ucp_gentype[prop->chartype] == ucp_N;                 PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1845            break;            break;
1846    
1847            case PT_SPACE:    /* Perl space */            case PT_SPACE:    /* Perl space */
1848            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1849                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
1850            break;            break;
1851    
1852            case PT_PXSPACE:  /* POSIX space */            case PT_PXSPACE:  /* POSIX space */
1853            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1854                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
1855                 c == CHAR_FF || c == CHAR_CR;                 c == CHAR_FF || c == CHAR_CR;
1856            break;            break;
1857    
1858            case PT_WORD:            case PT_WORD:
1859            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1860                 _pcre_ucp_gentype[prop->chartype] == ucp_N ||                 PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1861                 c == CHAR_UNDERSCORE;                 c == CHAR_UNDERSCORE;
1862            break;            break;
1863    
# Line 1785  for (;;) Line 1876  for (;;)
1876              next_active_state--;              next_active_state--;
1877              }              }
1878            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1879              { ADD_NEW(state_offset + 6, 0); }              { ADD_NEW(state_offset + 1 + IMM2_SIZE + 3, 0); }
1880            else            else
1881              { ADD_NEW(state_offset, count); }              { ADD_NEW(state_offset, count); }
1882            }            }
# Line 1798  for (;;) Line 1889  for (;;)
1889        case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:        case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:
1890        case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:        case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:
1891        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
1892          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1893        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1894        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
1895          {          {
1896          const uschar *nptr = ptr + clen;          const pcre_uchar *nptr = ptr + clen;
1897          int ncount = 0;          int ncount = 0;
1898          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
1899            {            {
# Line 1818  for (;;) Line 1909  for (;;)
1909            ncount++;            ncount++;
1910            nptr += ndlen;            nptr += ndlen;
1911            }            }
1912            if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
1913                reset_could_continue = TRUE;
1914          if (++count >= GET2(code, 1))          if (++count >= GET2(code, 1))
1915            { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }            { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
1916          else          else
1917            { ADD_NEW_DATA(-state_offset, count, ncount); }            { ADD_NEW_DATA(-state_offset, count, ncount); }
1918          }          }
# Line 1832  for (;;) Line 1925  for (;;)
1925        case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:        case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:
1926        case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:        case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:
1927        if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)
1928          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1929        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1930        if (clen > 0)        if (clen > 0)
1931          {          {
# Line 1859  for (;;) Line 1952  for (;;)
1952              next_active_state--;              next_active_state--;
1953              }              }
1954            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1955              { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }              { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
1956            else            else
1957              { ADD_NEW_DATA(-state_offset, count, ncount); }              { ADD_NEW_DATA(-state_offset, count, ncount); }
1958            break;            break;
# Line 1876  for (;;) Line 1969  for (;;)
1969        case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:        case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:
1970        case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:        case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:
1971        if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)
1972          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1973        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1974        if (clen > 0)        if (clen > 0)
1975          {          {
# Line 1905  for (;;) Line 1998  for (;;)
1998              next_active_state--;              next_active_state--;
1999              }              }
2000            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
2001              { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }              { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
2002            else            else
2003              { ADD_NEW_DATA(-state_offset, count, 0); }              { ADD_NEW_DATA(-state_offset, count, 0); }
2004            }            }
# Line 1918  for (;;) Line 2011  for (;;)
2011        case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:        case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:
2012        case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:        case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:
2013        if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)
2014          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
2015        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
2016        if (clen > 0)        if (clen > 0)
2017          {          {
# Line 1960  for (;;) Line 2053  for (;;)
2053              next_active_state--;              next_active_state--;
2054              }              }
2055            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
2056              { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }              { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
2057            else            else
2058              { ADD_NEW_DATA(-state_offset, count, 0); }              { ADD_NEW_DATA(-state_offset, count, 0); }
2059            }            }
# Line 1982  for (;;) Line 2075  for (;;)
2075        case OP_CHARI:        case OP_CHARI:
2076        if (clen == 0) break;        if (clen == 0) break;
2077    
2078  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2079        if (utf8)        if (utf)
2080          {          {
2081          if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else          if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
2082            {            {
2083            unsigned int othercase;            unsigned int othercase;
2084            if (c < 128) othercase = fcc[c]; else            if (c < 128)
2085                othercase = fcc[c];
2086            /* If we have Unicode property support, we can use it to test the            else
2087            other case of the character. */              /* If we have Unicode property support, we can use it to test the
2088                other case of the character. */
2089  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2090            othercase = UCD_OTHERCASE(c);              othercase = UCD_OTHERCASE(c);
2091  #else  #else
2092            othercase = NOTACHAR;              othercase = NOTACHAR;
2093  #endif  #endif
2094    
2095            if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }            if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }
2096            }            }
2097          }          }
2098        else        else
2099  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2100          /* Not UTF mode */
       /* Non-UTF-8 mode */  
2101          {          {
2102          if (lcc[c] == lcc[d]) { ADD_NEW(state_offset + 2, 0); }          if (TABLE_GET(c, lcc, c) == TABLE_GET(d, lcc, d))
2103              { ADD_NEW(state_offset + 2, 0); }
2104          }          }
2105        break;        break;
2106    
# Line 2021  for (;;) Line 2114  for (;;)
2114        case OP_EXTUNI:        case OP_EXTUNI:
2115        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
2116          {          {
2117          const uschar *nptr = ptr + clen;          const pcre_uchar *nptr = ptr + clen;
2118          int ncount = 0;          int ncount = 0;
2119          while (nptr < end_subject)          while (nptr < end_subject)
2120            {            {
# Line 2031  for (;;) Line 2124  for (;;)
2124            ncount++;            ncount++;
2125            nptr += nclen;            nptr += nclen;
2126            }            }
2127            if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
2128                reset_could_continue = TRUE;
2129          ADD_NEW_DATA(-(state_offset + 1), 0, ncount);          ADD_NEW_DATA(-(state_offset + 1), 0, ncount);
2130          }          }
2131        break;        break;
# Line 2056  for (;;) Line 2151  for (;;)
2151          break;          break;
2152    
2153          case 0x000d:          case 0x000d:
2154          if (ptr + 1 < end_subject && ptr[1] == 0x0a)          if (ptr + 1 >= end_subject)
2155              {
2156              ADD_NEW(state_offset + 1, 0);
2157              if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
2158                reset_could_continue = TRUE;
2159              }
2160            else if (ptr[1] == 0x0a)
2161            {            {
2162            ADD_NEW_DATA(-(state_offset + 1), 0, 1);            ADD_NEW_DATA(-(state_offset + 1), 0, 1);
2163            }            }
# Line 2165  for (;;) Line 2266  for (;;)
2266        break;        break;
2267    
2268        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2269        /* Match a negated single character casefully. This is only used for        /* Match a negated single character casefully. */
       one-byte characters, that is, we know that d < 256. The character we are  
       checking (c) can be multibyte. */  
2270    
2271        case OP_NOT:        case OP_NOT:
2272        if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); }        if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); }
2273        break;        break;
2274    
2275        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2276        /* Match a negated single character caselessly. This is only used for        /* Match a negated single character caselessly. */
       one-byte characters, that is, we know that d < 256. The character we are  
       checking (c) can be multibyte. */  
2277    
2278        case OP_NOTI:        case OP_NOTI:
2279        if (clen > 0 && c != d && c != fcc[d])        if (clen > 0)
2280          { ADD_NEW(state_offset + dlen + 1, 0); }          {
2281            unsigned int otherd;
2282    #ifdef SUPPORT_UTF
2283            if (utf && d >= 128)
2284              {
2285    #ifdef SUPPORT_UCP
2286              otherd = UCD_OTHERCASE(d);
2287    #endif  /* SUPPORT_UCP */
2288              }
2289            else
2290    #endif  /* SUPPORT_UTF */
2291            otherd = TABLE_GET(d, fcc, d);
2292            if (c != d && c != otherd)
2293              { ADD_NEW(state_offset + dlen + 1, 0); }
2294            }
2295        break;        break;
2296    
2297        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
# Line 2192  for (;;) Line 2303  for (;;)
2303        case OP_NOTPOSPLUSI:        case OP_NOTPOSPLUSI:
2304        caseless = TRUE;        caseless = TRUE;
2305        codevalue -= OP_STARI - OP_STAR;        codevalue -= OP_STARI - OP_STAR;
2306    
2307        /* Fall through */        /* Fall through */
2308        case OP_PLUS:        case OP_PLUS:
2309        case OP_MINPLUS:        case OP_MINPLUS:
# Line 2207  for (;;) Line 2318  for (;;)
2318          unsigned int otherd = NOTACHAR;          unsigned int otherd = NOTACHAR;
2319          if (caseless)          if (caseless)
2320            {            {
2321  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2322            if (utf8 && d >= 128)            if (utf && d >= 128)
2323              {              {
2324  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2325              otherd = UCD_OTHERCASE(d);              otherd = UCD_OTHERCASE(d);
2326  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2327              }              }
2328            else            else
2329  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2330            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2331            }            }
2332          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2333            {            {
# Line 2254  for (;;) Line 2365  for (;;)
2365          unsigned int otherd = NOTACHAR;          unsigned int otherd = NOTACHAR;
2366          if (caseless)          if (caseless)
2367            {            {
2368  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2369            if (utf8 && d >= 128)            if (utf && d >= 128)
2370              {              {
2371  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2372              otherd = UCD_OTHERCASE(d);              otherd = UCD_OTHERCASE(d);
2373  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2374              }              }
2375            else            else
2376  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2377            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2378            }            }
2379          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2380            {            {
# Line 2299  for (;;) Line 2410  for (;;)
2410          unsigned int otherd = NOTACHAR;          unsigned int otherd = NOTACHAR;
2411          if (caseless)          if (caseless)
2412            {            {
2413  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2414            if (utf8 && d >= 128)            if (utf && d >= 128)
2415              {              {
2416  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2417              otherd = UCD_OTHERCASE(d);              otherd = UCD_OTHERCASE(d);
2418  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2419              }              }
2420            else            else
2421  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2422            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2423            }            }
2424          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2425            {            {
# Line 2336  for (;;) Line 2447  for (;;)
2447          unsigned int otherd = NOTACHAR;          unsigned int otherd = NOTACHAR;
2448          if (caseless)          if (caseless)
2449            {            {
2450  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2451            if (utf8 && d >= 128)            if (utf && d >= 128)
2452              {              {
2453  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2454              otherd = UCD_OTHERCASE(d);              otherd = UCD_OTHERCASE(d);
2455  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2456              }              }
2457            else            else
2458  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2459            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2460            }            }
2461          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2462            {            {
2463            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
2464              { ADD_NEW(state_offset + dlen + 3, 0); }              { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
2465            else            else
2466              { ADD_NEW(state_offset, count); }              { ADD_NEW(state_offset, count); }
2467            }            }
# Line 2373  for (;;) Line 2484  for (;;)
2484        case OP_NOTUPTO:        case OP_NOTUPTO:
2485        case OP_NOTMINUPTO:        case OP_NOTMINUPTO:
2486        case OP_NOTPOSUPTO:        case OP_NOTPOSUPTO:
2487        ADD_ACTIVE(state_offset + dlen + 3, 0);        ADD_ACTIVE(state_offset + dlen + 1 + IMM2_SIZE, 0);
2488        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
2489        if (clen > 0)        if (clen > 0)
2490          {          {
2491          unsigned int otherd = NOTACHAR;          unsigned int otherd = NOTACHAR;
2492          if (caseless)          if (caseless)
2493            {            {
2494  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2495            if (utf8 && d >= 128)            if (utf && d >= 128)
2496              {              {
2497  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2498              otherd = UCD_OTHERCASE(d);              otherd = UCD_OTHERCASE(d);
2499  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2500              }              }
2501            else            else
2502  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2503            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2504            }            }
2505          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2506            {            {
# Line 2399  for (;;) Line 2510  for (;;)
2510              next_active_state--;              next_active_state--;
2511              }              }
2512            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
2513              { ADD_NEW(state_offset + dlen + 3, 0); }              { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
2514            else            else
2515              { ADD_NEW(state_offset, count); }              { ADD_NEW(state_offset, count); }
2516            }            }
# Line 2416  for (;;) Line 2527  for (;;)
2527          {          {
2528          BOOL isinclass = FALSE;          BOOL isinclass = FALSE;
2529          int next_state_offset;          int next_state_offset;
2530          const uschar *ecode;          const pcre_uchar *ecode;
2531    
2532          /* For a simple class, there is always just a 32-byte table, and we          /* For a simple class, there is always just a 32-byte table, and we
2533          can set isinclass from it. */          can set isinclass from it. */
2534    
2535          if (codevalue != OP_XCLASS)          if (codevalue != OP_XCLASS)
2536            {            {
2537            ecode = code + 33;            ecode = code + 1 + (32 / sizeof(pcre_uchar));
2538            if (clen > 0)            if (clen > 0)
2539              {              {
2540              isinclass = (c > 255)? (codevalue == OP_NCLASS) :              isinclass = (c > 255)? (codevalue == OP_NCLASS) :
2541                ((code[1 + c/8] & (1 << (c&7))) != 0);                ((((pcre_uint8 *)(code + 1))[c/8] & (1 << (c&7))) != 0);
2542              }              }
2543            }            }
2544    
# Line 2438  for (;;) Line 2549  for (;;)
2549          else          else
2550           {           {
2551           ecode = code + GET(code, 1);           ecode = code + GET(code, 1);
2552           if (clen > 0) isinclass = _pcre_xclass(c, code + 1 + LINK_SIZE);           if (clen > 0) isinclass = PRIV(xclass)(c, code + 1 + LINK_SIZE, utf);
2553           }           }
2554    
2555          /* At this point, isinclass is set for all kinds of class, and ecode          /* At this point, isinclass is set for all kinds of class, and ecode
# Line 2472  for (;;) Line 2583  for (;;)
2583            case OP_CRMINRANGE:            case OP_CRMINRANGE:
2584            count = current_state->count;  /* Already matched */            count = current_state->count;  /* Already matched */
2585            if (count >= GET2(ecode, 1))            if (count >= GET2(ecode, 1))
2586              { ADD_ACTIVE(next_state_offset + 5, 0); }              { ADD_ACTIVE(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
2587            if (isinclass)            if (isinclass)
2588              {              {
2589              int max = GET2(ecode, 3);              int max = GET2(ecode, 1 + IMM2_SIZE);
2590              if (++count >= max && max != 0)   /* Max 0 => no limit */              if (++count >= max && max != 0)   /* Max 0 => no limit */
2591                { ADD_NEW(next_state_offset + 5, 0); }                { ADD_NEW(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
2592              else              else
2593                { ADD_NEW(state_offset, count); }                { ADD_NEW(state_offset, count); }
2594              }              }
# Line 2508  for (;;) Line 2619  for (;;)
2619          int rc;          int rc;
2620          int local_offsets[2];          int local_offsets[2];
2621          int local_workspace[1000];          int local_workspace[1000];
2622          const uschar *endasscode = code + GET(code, 1);          const pcre_uchar *endasscode = code + GET(code, 1);
2623    
2624          while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);          while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
2625    
# Line 2545  for (;;) Line 2656  for (;;)
2656          if (code[LINK_SIZE+1] == OP_CALLOUT)          if (code[LINK_SIZE+1] == OP_CALLOUT)
2657            {            {
2658            rrc = 0;            rrc = 0;
2659            if (pcre_callout != NULL)            if (PUBL(callout) != NULL)
2660              {              {
2661              pcre_callout_block cb;              PUBL(callout_block) cb;
2662              cb.version          = 1;   /* Version 1 of the callout block */              cb.version          = 1;   /* Version 1 of the callout block */
2663              cb.callout_number   = code[LINK_SIZE+2];              cb.callout_number   = code[LINK_SIZE+2];
2664              cb.offset_vector    = offsets;              cb.offset_vector    = offsets;
2665    #ifdef COMPILE_PCRE8
2666              cb.subject          = (PCRE_SPTR)start_subject;              cb.subject          = (PCRE_SPTR)start_subject;
2667    #else
2668                cb.subject          = (PCRE_SPTR16)start_subject;
2669    #endif
2670              cb.subject_length   = (int)(end_subject - start_subject);              cb.subject_length   = (int)(end_subject - start_subject);
2671              cb.start_match      = (int)(current_subject - start_subject);              cb.start_match      = (int)(current_subject - start_subject);
2672              cb.current_position = (int)(ptr - start_subject);              cb.current_position = (int)(ptr - start_subject);
# Line 2560  for (;;) Line 2675  for (;;)
2675              cb.capture_top      = 1;              cb.capture_top      = 1;
2676              cb.capture_last     = -1;              cb.capture_last     = -1;
2677              cb.callout_data     = md->callout_data;              cb.callout_data     = md->callout_data;
2678              cb.mark             = NULL;   /* No (*MARK) support */              cb.mark             = NULL;   /* No (*MARK) support */
2679              if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */              if ((rrc = (*PUBL(callout))(&cb)) < 0) return rrc;   /* Abandon */
2680              }              }
2681            if (rrc > 0) break;                      /* Fail this thread */            if (rrc > 0) break;                      /* Fail this thread */
2682            code += _pcre_OP_lengths[OP_CALLOUT];    /* Skip callout data */            code += PRIV(OP_lengths)[OP_CALLOUT];    /* Skip callout data */
2683            }            }
2684    
2685          condcode = code[LINK_SIZE+1];          condcode = code[LINK_SIZE+1];
# Line 2585  for (;;) Line 2700  for (;;)
2700    
2701          else if (condcode == OP_RREF || condcode == OP_NRREF)          else if (condcode == OP_RREF || condcode == OP_NRREF)
2702            {            {
2703            int value = GET2(code, LINK_SIZE+2);            int value = GET2(code, LINK_SIZE + 2);
2704            if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;            if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
2705            if (md->recursive != NULL)            if (md->recursive != NULL)
2706              { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }              { ADD_ACTIVE(state_offset + LINK_SIZE + 2 + IMM2_SIZE, 0); }
2707            else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }            else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
2708            }            }
2709    
# Line 2597  for (;;) Line 2712  for (;;)
2712          else          else
2713            {            {
2714            int rc;            int rc;
2715            const uschar *asscode = code + LINK_SIZE + 1;            const pcre_uchar *asscode = code + LINK_SIZE + 1;
2716            const uschar *endasscode = asscode + GET(asscode, 1);            const pcre_uchar *endasscode = asscode + GET(asscode, 1);
2717    
2718            while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);            while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
2719    
# Line 2626  for (;;) Line 2741  for (;;)
2741        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2742        case OP_RECURSE:        case OP_RECURSE:
2743          {          {
2744          dfa_recursion_info *ri;          dfa_recursion_info *ri;
2745          int local_offsets[1000];          int local_offsets[1000];
2746          int local_workspace[1000];          int local_workspace[1000];
2747          const uschar *callpat = start_code + GET(code, 1);          const pcre_uchar *callpat = start_code + GET(code, 1);
2748          int recno = (callpat == md->start_code)? 0 :          int recno = (callpat == md->start_code)? 0 :
2749            GET2(callpat, 1 + LINK_SIZE);            GET2(callpat, 1 + LINK_SIZE);
2750          int rc;          int rc;
2751    
2752          DPRINTF(("%.*sStarting regex recursion\n", rlevel*2-2, SP));          DPRINTF(("%.*sStarting regex recursion\n", rlevel*2-2, SP));
2753    
2754          /* Check for repeating a recursion without advancing the subject          /* Check for repeating a recursion without advancing the subject
2755          pointer. This should catch convoluted mutual recursions. (Some simple          pointer. This should catch convoluted mutual recursions. (Some simple
2756          cases are caught at compile time.) */          cases are caught at compile time.) */
   
         for (ri = md->recursive; ri != NULL; ri = ri->prevrec)  
           if (recno == ri->group_num && ptr == ri->subject_position)  
             return PCRE_ERROR_RECURSELOOP;  
2757    
2758          /* Remember this recursion and where we started it so as to          for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
2759              if (recno == ri->group_num && ptr == ri->subject_position)
2760                return PCRE_ERROR_RECURSELOOP;
2761    
2762            /* Remember this recursion and where we started it so as to
2763          catch infinite loops. */          catch infinite loops. */
2764    
2765          new_recursive.group_num = recno;          new_recursive.group_num = recno;
2766          new_recursive.subject_position = ptr;          new_recursive.subject_position = ptr;
2767          new_recursive.prevrec = md->recursive;          new_recursive.prevrec = md->recursive;
2768          md->recursive = &new_recursive;          md->recursive = &new_recursive;
2769    
2770          rc = internal_dfa_exec(          rc = internal_dfa_exec(
2771            md,                                   /* fixed match data */            md,                                   /* fixed match data */
# Line 2665  for (;;) Line 2780  for (;;)
2780    
2781          md->recursive = new_recursive.prevrec;  /* Done this recursion */          md->recursive = new_recursive.prevrec;  /* Done this recursion */
2782    
2783          DPRINTF(("%.*sReturn from regex recursion: rc=%d\n", rlevel*2-2, SP,          DPRINTF(("%.*sReturn from regex recursion: rc=%d\n", rlevel*2-2, SP,
2784            rc));            rc));
2785    
2786          /* Ran out of internal offsets */          /* Ran out of internal offsets */
# Line 2680  for (;;) Line 2795  for (;;)
2795            {            {
2796            for (rc = rc*2 - 2; rc >= 0; rc -= 2)            for (rc = rc*2 - 2; rc >= 0; rc -= 2)
2797              {              {
             const uschar *p = start_subject + local_offsets[rc];  
             const uschar *pp = start_subject + local_offsets[rc+1];  
2798              int charcount = local_offsets[rc+1] - local_offsets[rc];              int charcount = local_offsets[rc+1] - local_offsets[rc];
2799              while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;  #ifdef SUPPORT_UTF
2800                if (utf)
2801                  {
2802                  const pcre_uchar *p = start_subject + local_offsets[rc];
2803                  const pcre_uchar *pp = start_subject + local_offsets[rc+1];
2804                  while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2805                  }
2806    #endif
2807              if (charcount > 0)              if (charcount > 0)
2808                {                {
2809                ADD_NEW_DATA(-(state_offset + LINK_SIZE + 1), 0, (charcount - 1));                ADD_NEW_DATA(-(state_offset + LINK_SIZE + 1), 0, (charcount - 1));
# Line 2703  for (;;) Line 2823  for (;;)
2823        case OP_SBRAPOS:        case OP_SBRAPOS:
2824        case OP_CBRAPOS:        case OP_CBRAPOS:
2825        case OP_SCBRAPOS:        case OP_SCBRAPOS:
2826        case OP_BRAPOSZERO:        case OP_BRAPOSZERO:
2827          {          {
2828          int charcount, matched_count;          int charcount, matched_count;
2829          const uschar *local_ptr = ptr;          const pcre_uchar *local_ptr = ptr;
2830          BOOL allow_zero;          BOOL allow_zero;
2831    
2832          if (codevalue == OP_BRAPOSZERO)          if (codevalue == OP_BRAPOSZERO)
2833            {            {
2834            allow_zero = TRUE;            allow_zero = TRUE;
2835            codevalue = *(++code);  /* Codevalue will be one of above BRAs */            codevalue = *(++code);  /* Codevalue will be one of above BRAs */
2836            }            }
2837          else allow_zero = FALSE;          else allow_zero = FALSE;
2838    
2839          /* Loop to match the subpattern as many times as possible as if it were          /* Loop to match the subpattern as many times as possible as if it were
2840          a complete pattern. */          a complete pattern. */
2841    
2842          for (matched_count = 0;; matched_count++)          for (matched_count = 0;; matched_count++)
2843            {            {
2844            int local_offsets[2];            int local_offsets[2];
2845            int local_workspace[1000];            int local_workspace[1000];
2846    
2847            int rc = internal_dfa_exec(            int rc = internal_dfa_exec(
2848              md,                                   /* fixed match data */              md,                                   /* fixed match data */
2849              code,                                 /* this subexpression's code */              code,                                 /* this subexpression's code */
# Line 2734  for (;;) Line 2854  for (;;)
2854              local_workspace,                      /* workspace vector */              local_workspace,                      /* workspace vector */
2855              sizeof(local_workspace)/sizeof(int),  /* size of same */              sizeof(local_workspace)/sizeof(int),  /* size of same */
2856              rlevel);                              /* function recursion level */              rlevel);                              /* function recursion level */
2857    
2858            /* Failed to match */            /* Failed to match */
2859    
2860            if (rc < 0)            if (rc < 0)
2861              {              {
2862              if (rc != PCRE_ERROR_NOMATCH) return rc;              if (rc != PCRE_ERROR_NOMATCH) return rc;
2863              break;              break;
2864              }              }
2865    
2866            /* Matched: break the loop if zero characters matched. */            /* Matched: break the loop if zero characters matched. */
2867    
2868            charcount = local_offsets[1] - local_offsets[0];            charcount = local_offsets[1] - local_offsets[0];
2869            if (charcount == 0) break;            if (charcount == 0) break;
2870            local_ptr += charcount;    /* Advance temporary position ptr */            local_ptr += charcount;    /* Advance temporary position ptr */
2871            }            }
2872    
2873          /* At this point we have matched the subpattern matched_count          /* At this point we have matched the subpattern matched_count
2874          times, and local_ptr is pointing to the character after the end of the          times, and local_ptr is pointing to the character after the end of the
2875          last match. */          last match. */
2876    
2877          if (matched_count > 0 || allow_zero)          if (matched_count > 0 || allow_zero)
2878            {            {
2879            const uschar *end_subpattern = code;            const pcre_uchar *end_subpattern = code;
2880            int next_state_offset;            int next_state_offset;
2881    
2882            do { end_subpattern += GET(end_subpattern, 1); }            do { end_subpattern += GET(end_subpattern, 1); }
2883              while (*end_subpattern == OP_ALT);              while (*end_subpattern == OP_ALT);
2884            next_state_offset =            next_state_offset =
# Line 2777  for (;;) Line 2897  for (;;)
2897              }              }
2898            else            else
2899              {              {
2900              const uschar *p = ptr;              const pcre_uchar *p = ptr;
2901              const uschar *pp = local_ptr;              const pcre_uchar *pp = local_ptr;
2902              charcount = pp - p;              charcount = (int)(pp - p);
2903              while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;  #ifdef SUPPORT_UTF
2904                if (utf) while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2905    #endif
2906              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
2907              }              }
2908            }            }
2909          }          }
2910        break;        break;
2911    
2912        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2913        case OP_ONCE:        case OP_ONCE:
2914          case OP_ONCE_NC:
2915          {          {
2916          int local_offsets[2];          int local_offsets[2];
2917          int local_workspace[1000];          int local_workspace[1000];
# Line 2806  for (;;) Line 2929  for (;;)
2929    
2930          if (rc >= 0)          if (rc >= 0)
2931            {            {
2932            const uschar *end_subpattern = code;            const pcre_uchar *end_subpattern = code;
2933            int charcount = local_offsets[1] - local_offsets[0];            int charcount = local_offsets[1] - local_offsets[0];
2934            int next_state_offset, repeat_state_offset;            int next_state_offset, repeat_state_offset;
2935    
# Line 2859  for (;;) Line 2982  for (;;)
2982              }              }
2983            else            else
2984              {              {
2985              const uschar *p = start_subject + local_offsets[0];  #ifdef SUPPORT_UTF
2986              const uschar *pp = start_subject + local_offsets[1];              if (utf)
2987              while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;                {
2988                  const pcre_uchar *p = start_subject + local_offsets[0];
2989                  const pcre_uchar *pp = start_subject + local_offsets[1];
2990                  while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2991                  }
2992    #endif
2993              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
2994              if (repeat_state_offset >= 0)              if (repeat_state_offset >= 0)
2995                { ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); }                { ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); }
# Line 2877  for (;;) Line 3005  for (;;)
3005    
3006        case OP_CALLOUT:        case OP_CALLOUT:
3007        rrc = 0;        rrc = 0;
3008        if (pcre_callout != NULL)        if (PUBL(callout) != NULL)
3009          {          {
3010          pcre_callout_block cb;          PUBL(callout_block) cb;
3011          cb.version          = 1;   /* Version 1 of the callout block */          cb.version          = 1;   /* Version 1 of the callout block */
3012          cb.callout_number   = code[1];          cb.callout_number   = code[1];
3013          cb.offset_vector    = offsets;          cb.offset_vector    = offsets;
3014    #ifdef COMPILE_PCRE8
3015          cb.subject          = (PCRE_SPTR)start_subject;          cb.subject          = (PCRE_SPTR)start_subject;
3016    #else
3017            cb.subject          = (PCRE_SPTR16)start_subject;
3018    #endif
3019          cb.subject_length   = (int)(end_subject - start_subject);          cb.subject_length   = (int)(end_subject - start_subject);
3020          cb.start_match      = (int)(current_subject - start_subject);          cb.start_match      = (int)(current_subject - start_subject);
3021          cb.current_position = (int)(ptr - start_subject);          cb.current_position = (int)(ptr - start_subject);
# Line 2892  for (;;) Line 3024  for (;;)
3024          cb.capture_top      = 1;          cb.capture_top      = 1;
3025          cb.capture_last     = -1;          cb.capture_last     = -1;
3026          cb.callout_data     = md->callout_data;          cb.callout_data     = md->callout_data;
3027          cb.mark             = NULL;   /* No (*MARK) support */          cb.mark             = NULL;   /* No (*MARK) support */
3028          if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */          if ((rrc = (*PUBL(callout))(&cb)) < 0) return rrc;   /* Abandon */
3029          }          }
3030        if (rrc == 0)        if (rrc == 0)
3031          { ADD_ACTIVE(state_offset + _pcre_OP_lengths[OP_CALLOUT], 0); }          { ADD_ACTIVE(state_offset + PRIV(OP_lengths)[OP_CALLOUT], 0); }
3032        break;        break;
3033    
3034    
# Line 2925  for (;;) Line 3057  for (;;)
3057    if (new_count <= 0)    if (new_count <= 0)
3058      {      {
3059      if (rlevel == 1 &&                               /* Top level, and */      if (rlevel == 1 &&                               /* Top level, and */
3060          could_continue &&                            /* Some could go on */          could_continue &&                            /* Some could go on, and */
3061          forced_fail != workspace[1] &&               /* Not all forced fail & */          forced_fail != workspace[1] &&               /* Not all forced fail & */
3062          (                                            /* either... */          (                                            /* either... */
3063          (md->moptions & PCRE_PARTIAL_HARD) != 0      /* Hard partial */          (md->moptions & PCRE_PARTIAL_HARD) != 0      /* Hard partial */
# Line 2933  for (;;) Line 3065  for (;;)
3065          ((md->moptions & PCRE_PARTIAL_SOFT) != 0 &&  /* Soft partial and */          ((md->moptions & PCRE_PARTIAL_SOFT) != 0 &&  /* Soft partial and */
3066           match_count < 0)                            /* no matches */           match_count < 0)                            /* no matches */
3067          ) &&                                         /* And... */          ) &&                                         /* And... */
3068          ptr >= end_subject &&                  /* Reached end of subject */          (
3069          ptr > md->start_used_ptr)              /* Inspected non-empty string */          partial_newline ||                           /* Either partial NL */
3070              (                                          /* or ... */
3071              ptr >= end_subject &&                /* End of subject and */
3072              ptr > md->start_used_ptr)            /* Inspected non-empty string */
3073              )
3074            )
3075        {        {
3076        if (offsetcount >= 2)        if (offsetcount >= 2)
3077          {          {
# Line 2993  Returns:          > 0 => number of match Line 3130  Returns:          > 0 => number of match
3130                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
3131  */  */
3132    
3133    #ifdef COMPILE_PCRE8
3134  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
3135  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
3136    const char *subject, int length, int start_offset, int options, int *offsets,    const char *subject, int length, int start_offset, int options, int *offsets,
3137    int offsetcount, int *workspace, int wscount)    int offsetcount, int *workspace, int wscount)
3138    #else
3139    PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
3140    pcre16_dfa_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
3141      PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
3142      int offsetcount, int *workspace, int wscount)
3143    #endif
3144  {  {
3145  real_pcre *re = (real_pcre *)argument_re;  REAL_PCRE *re = (REAL_PCRE *)argument_re;
3146  dfa_match_data match_block;  dfa_match_data match_block;
3147  dfa_match_data *md = &match_block;  dfa_match_data *md = &match_block;
3148  BOOL utf8, anchored, startline, firstline;  BOOL utf, anchored, startline, firstline;
3149  const uschar *current_subject, *end_subject, *lcc;  const pcre_uchar *current_subject, *end_subject;
   
 pcre_study_data internal_study;  
3150  const pcre_study_data *study = NULL;  const pcre_study_data *study = NULL;
 real_pcre internal_re;  
3151    
3152  const uschar *req_byte_ptr;  const pcre_uchar *req_char_ptr;
3153  const uschar *start_bits = NULL;  const pcre_uint8 *start_bits = NULL;
3154  BOOL first_byte_caseless = FALSE;  BOOL has_first_char = FALSE;
3155  BOOL req_byte_caseless = FALSE;  BOOL has_req_char = FALSE;
3156  int first_byte = -1;  pcre_uchar first_char = 0;
3157  int req_byte = -1;  pcre_uchar first_char2 = 0;
3158  int req_byte2 = -1;  pcre_uchar req_char = 0;
3159    pcre_uchar req_char2 = 0;
3160  int newline;  int newline;
3161    
3162  /* Plausibility checks */  /* Plausibility checks */
# Line 3026  if (offsetcount < 0) return PCRE_ERROR_B Line 3168  if (offsetcount < 0) return PCRE_ERROR_B
3168  if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;  if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;
3169  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
3170    
3171  /* We need to find the pointer to any study data before we test for byte  /* Check that the first field in the block is the magic number. If it is not,
3172  flipping, so we scan the extra_data block first. This may set two fields in the  return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
3173  match block, so we must initialize them beforehand. However, the other fields  REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
3174  in the match block must not be set until after the byte flipping. */  means that the pattern is likely compiled with different endianness. */
3175    
3176    if (re->magic_number != MAGIC_NUMBER)
3177      return re->magic_number == REVERSED_MAGIC_NUMBER?
3178        PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
3179    if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
3180    
3181    /* If restarting after a partial match, do some sanity checks on the contents
3182    of the workspace. */
3183    
3184    if ((options & PCRE_DFA_RESTART) != 0)
3185      {
3186      if ((workspace[0] & (-2)) != 0 || workspace[1] < 1 ||
3187        workspace[1] > (wscount - 2)/INTS_PER_STATEBLOCK)
3188          return PCRE_ERROR_DFA_BADRESTART;
3189      }
3190    
3191    /* Set up study, callout, and table data */
3192    
3193  md->tables = re->tables;  md->tables = re->tables;
3194  md->callout_data = NULL;  md->callout_data = NULL;
# Line 3048  if (extra_data != NULL) Line 3207  if (extra_data != NULL)
3207      md->tables = extra_data->tables;      md->tables = extra_data->tables;
3208    }    }
3209    
 /* Check that the first field in the block is the magic number. If it is not,  
 test for a regex that was compiled on a host of opposite endianness. If this is  
 the case, flipped values are put in internal_re and internal_study if there was  
 study data too. */  
   
 if (re->magic_number != MAGIC_NUMBER)  
   {  
   re = _pcre_try_flipped(re, &internal_re, study, &internal_study);  
   if (re == NULL) return PCRE_ERROR_BADMAGIC;  
   if (study != NULL) study = &internal_study;  
   }  
   
3210  /* Set some local values */  /* Set some local values */
3211    
3212  current_subject = (const unsigned char *)subject + start_offset;  current_subject = (const pcre_uchar *)subject + start_offset;
3213  end_subject = (const unsigned char *)subject + length;  end_subject = (const pcre_uchar *)subject + length;
3214  req_byte_ptr = current_subject - 1;  req_char_ptr = current_subject - 1;
3215    
3216  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3217  utf8 = (re->options & PCRE_UTF8) != 0;  /* PCRE_UTF16 has the same value as PCRE_UTF8. */
3218    utf = (re->options & PCRE_UTF8) != 0;
3219  #else  #else
3220  utf8 = FALSE;  utf = FALSE;
3221  #endif  #endif
3222    
3223  anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||  anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||
# Line 3077  anchored = (options & (PCRE_ANCHORED|PCR Line 3225  anchored = (options & (PCRE_ANCHORED|PCR
3225    
3226  /* The remaining fixed data for passing around. */  /* The remaining fixed data for passing around. */
3227    
3228  md->start_code = (const uschar *)argument_re +  md->start_code = (const pcre_uchar *)argument_re +
3229      re->name_table_offset + re->name_count * re->name_entry_size;      re->name_table_offset + re->name_count * re->name_entry_size;
3230  md->start_subject = (const unsigned char *)subject;  md->start_subject = (const pcre_uchar *)subject;
3231  md->end_subject = end_subject;  md->end_subject = end_subject;
3232  md->start_offset = start_offset;  md->start_offset = start_offset;
3233  md->moptions = options;  md->moptions = options;
# Line 3140  else Line 3288  else
3288  /* Check a UTF-8 string if required. Unfortunately there's no way of passing  /* Check a UTF-8 string if required. Unfortunately there's no way of passing
3289  back the character offset. */  back the character offset. */
3290    
3291  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3292  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
3293    {    {
3294    int erroroffset;    int erroroffset;
3295    int errorcode = _pcre_valid_utf8((uschar *)subject, length, &erroroffset);    int errorcode = PRIV(valid_utf)((pcre_uchar *)subject, length, &erroroffset);
3296    if (errorcode != 0)    if (errorcode != 0)
3297      {      {
3298      if (offsetcount >= 2)      if (offsetcount >= 2)
3299        {        {
3300        offsets[0] = erroroffset;        offsets[0] = erroroffset;
3301        offsets[1] = errorcode;        offsets[1] = errorcode;
3302        }        }
3303      return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != 0)?      return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != 0)?
3304        PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;        PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
3305      }      }
3306    if (start_offset > 0 && start_offset < length &&    if (start_offset > 0 && start_offset < length &&
3307          (((USPTR)subject)[start_offset] & 0xc0) == 0x80)          NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
3308      return PCRE_ERROR_BADUTF8_OFFSET;      return PCRE_ERROR_BADUTF8_OFFSET;
3309    }    }
3310  #endif  #endif
# Line 3165  if (utf8 && (options & PCRE_NO_UTF8_CHEC Line 3313  if (utf8 && (options & PCRE_NO_UTF8_CHEC
3313  is a feature that makes it possible to save compiled regex and re-use them  is a feature that makes it possible to save compiled regex and re-use them
3314  in other programs later. */  in other programs later. */
3315    
3316  if (md->tables == NULL) md->tables = _pcre_default_tables;  if (md->tables == NULL) md->tables = PRIV(default_tables);
3317    
3318  /* The lower casing table and the "must be at the start of a line" flag are  /* The "must be at the start of a line" flags are used in a loop when finding
3319  used in a loop when finding where to start. */  where to start. */
3320    
 lcc = md->tables + lcc_offset;  
3321  startline = (re->flags & PCRE_STARTLINE) != 0;  startline = (re->flags & PCRE_STARTLINE) != 0;
3322  firstline = (re->options & PCRE_FIRSTLINE) != 0;  firstline = (re->options & PCRE_FIRSTLINE) != 0;
3323    
# Line 3184  if (!anchored) Line 3331  if (!anchored)
3331    {    {
3332    if ((re->flags & PCRE_FIRSTSET) != 0)    if ((re->flags & PCRE_FIRSTSET) != 0)
3333      {      {
3334      first_byte = re->first_byte & 255;      has_first_char = TRUE;
3335      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)      first_char = first_char2 = (pcre_uchar)(re->first_char);
3336        first_byte = lcc[first_byte];      if ((re->flags & PCRE_FCH_CASELESS) != 0)
3337          {
3338          first_char2 = TABLE_GET(first_char, md->tables + fcc_offset, first_char);
3339    #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3340          if (utf && first_char > 127)
3341            first_char2 = UCD_OTHERCASE(first_char);
3342    #endif
3343          }
3344      }      }
3345    else    else
3346      {      {
# Line 3201  character" set. */ Line 3355  character" set. */
3355    
3356  if ((re->flags & PCRE_REQCHSET) != 0)  if ((re->flags & PCRE_REQCHSET) != 0)
3357    {    {
3358    req_byte = re->req_byte & 255;    has_req_char = TRUE;
3359    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;    req_char = req_char2 = (pcre_uchar)(re->req_char);
3360    req_byte2 = (md->tables + fcc_offset)[req_byte];  /* case flipped */    if ((re->flags & PCRE_RCH_CASELESS) != 0)
3361        {
3362        req_char2 = TABLE_GET(req_char, md->tables + fcc_offset, req_char);
3363    #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3364        if (utf && req_char > 127)
3365          req_char2 = UCD_OTHERCASE(req_char);
3366    #endif
3367        }
3368    }    }
3369    
3370  /* Call the main matching function, looping for a non-anchored regex after a  /* Call the main matching function, looping for a non-anchored regex after a
# Line 3216  for (;;) Line 3377  for (;;)
3377    
3378    if ((options & PCRE_DFA_RESTART) == 0)    if ((options & PCRE_DFA_RESTART) == 0)
3379      {      {
3380      const uschar *save_end_subject = end_subject;      const pcre_uchar *save_end_subject = end_subject;
3381    
3382      /* If firstline is TRUE, the start of the match is constrained to the first      /* If firstline is TRUE, the start of the match is constrained to the first
3383      line of a multiline string. Implement this by temporarily adjusting      line of a multiline string. Implement this by temporarily adjusting
# Line 3225  for (;;) Line 3386  for (;;)
3386    
3387      if (firstline)      if (firstline)
3388        {        {
3389        USPTR t = current_subject;        PCRE_PUCHAR t = current_subject;
3390  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3391        if (utf8)        if (utf)
3392          {          {
3393          while (t < md->end_subject && !IS_NEWLINE(t))          while (t < md->end_subject && !IS_NEWLINE(t))
3394            {            {
3395            t++;            t++;
3396            while (t < end_subject && (*t & 0xc0) == 0x80) t++;            ACROSSCHAR(t < end_subject, *t, t++);
3397            }            }
3398          }          }
3399        else        else
# Line 3249  for (;;) Line 3410  for (;;)
3410    
3411      if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)      if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
3412        {        {
3413        /* Advance to a known first byte. */        /* Advance to a known first char. */
3414    
3415        if (first_byte >= 0)        if (has_first_char)
3416          {          {
3417          if (first_byte_caseless)          if (first_char != first_char2)
3418            while (current_subject < end_subject &&            while (current_subject < end_subject &&
3419                   lcc[*current_subject] != first_byte)                *current_subject != first_char && *current_subject != first_char2)
3420              current_subject++;              current_subject++;
3421          else          else
3422            while (current_subject < end_subject &&            while (current_subject < end_subject &&
3423                   *current_subject != first_byte)                   *current_subject != first_char)
3424              current_subject++;              current_subject++;
3425          }          }
3426    
# Line 3269  for (;;) Line 3430  for (;;)
3430          {          {
3431          if (current_subject > md->start_subject + start_offset)          if (current_subject > md->start_subject + start_offset)
3432            {            {
3433  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3434            if (utf8)            if (utf)
3435              {              {
3436              while (current_subject < end_subject &&              while (current_subject < end_subject &&
3437                     !WAS_NEWLINE(current_subject))                     !WAS_NEWLINE(current_subject))
3438                {                {
3439                current_subject++;                current_subject++;
3440                while(current_subject < end_subject &&                ACROSSCHAR(current_subject < end_subject, *current_subject,
3441                      (*current_subject & 0xc0) == 0x80)                  current_subject++);
                 current_subject++;  
3442                }                }
3443              }              }
3444            else            else
# Line 3305  for (;;) Line 3465  for (;;)
3465          while (current_subject < end_subject)          while (current_subject < end_subject)
3466            {            {
3467            register unsigned int c = *current_subject;            register unsigned int c = *current_subject;
3468    #ifndef COMPILE_PCRE8
3469              if (c > 255) c = 255;
3470    #endif
3471            if ((start_bits[c/8] & (1 << (c&7))) == 0)            if ((start_bits[c/8] & (1 << (c&7))) == 0)
3472              {              {
3473              current_subject++;              current_subject++;
3474  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3475              if (utf8)              /* In non 8-bit mode, the iteration will stop for
3476                while(current_subject < end_subject &&              characters > 255 at the beginning or not stop at all. */
3477                      (*current_subject & 0xc0) == 0x80) current_subject++;              if (utf)
3478                  ACROSSCHAR(current_subject < end_subject, *current_subject,
3479                    current_subject++);
3480  #endif  #endif
3481              }              }
3482            else break;            else break;
# Line 3327  for (;;) Line 3492  for (;;)
3492      disabling is explicitly requested (and of course, by the test above, this      disabling is explicitly requested (and of course, by the test above, this
3493      code is not obeyed when restarting after a partial match). */      code is not obeyed when restarting after a partial match). */
3494    
3495      if ((options & PCRE_NO_START_OPTIMIZE) == 0 &&      if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0 &&
3496          (options & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) == 0)          (options & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) == 0)
3497        {        {
3498        /* If the pattern was studied, a minimum subject length may be set. This        /* If the pattern was studied, a minimum subject length may be set. This
# Line 3339  for (;;) Line 3504  for (;;)
3504            (pcre_uint32)(end_subject - current_subject) < study->minlength)            (pcre_uint32)(end_subject - current_subject) < study->minlength)
3505          return PCRE_ERROR_NOMATCH;          return PCRE_ERROR_NOMATCH;
3506    
3507        /* If req_byte is set, we know that that character must appear in the        /* If req_char is set, we know that that character must appear in the
3508        subject for the match to succeed. If the first character is set, req_byte        subject for the match to succeed. If the first character is set, req_char
3509        must be later in the subject; otherwise the test starts at the match        must be later in the subject; otherwise the test starts at the match
3510        point. This optimization can save a huge amount of work in patterns with        point. This optimization can save a huge amount of work in patterns with
3511        nested unlimited repeats that aren't going to match. Writing separate        nested unlimited repeats that aren't going to match. Writing separate
# Line 3352  for (;;) Line 3517  for (;;)
3517        patterns. This showed up when somebody was matching /^C/ on a 32-megabyte        patterns. This showed up when somebody was matching /^C/ on a 32-megabyte
3518        string... so we don't do this when the string is sufficiently long. */        string... so we don't do this when the string is sufficiently long. */
3519    
3520        if (req_byte >= 0 && end_subject - current_subject < REQ_BYTE_MAX)        if (has_req_char && end_subject - current_subject < REQ_BYTE_MAX)
3521          {          {
3522          register const uschar *p = current_subject + ((first_byte >= 0)? 1 : 0);          register PCRE_PUCHAR p = current_subject + (has_first_char? 1:0);
3523    
3524          /* We don't need to repeat the search if we haven't yet reached the          /* We don't need to repeat the search if we haven't yet reached the
3525          place we found it at last time. */          place we found it at last time. */
3526    
3527          if (p > req_byte_ptr)          if (p > req_char_ptr)
3528            {            {
3529            if (req_byte_caseless)            if (req_char != req_char2)
3530              {              {
3531              while (p < end_subject)              while (p < end_subject)
3532                {                {
3533                register int pp = *p++;                register int pp = *p++;
3534                if (pp == req_byte || pp == req_byte2) { p--; break; }                if (pp == req_char || pp == req_char2) { p--; break; }
3535                }                }
3536              }              }
3537            else            else
3538              {              {
3539              while (p < end_subject)              while (p < end_subject)
3540                {                {
3541                if (*p++ == req_byte) { p--; break; }                if (*p++ == req_char) { p--; break; }
3542                }                }
3543              }              }
3544    
# Line 3386  for (;;) Line 3551  for (;;)
3551            found it, so that we don't search again next time round the loop if            found it, so that we don't search again next time round the loop if
3552            the start hasn't passed this character yet. */            the start hasn't passed this character yet. */
3553    
3554            req_byte_ptr = p;            req_char_ptr = p;
3555            }            }
3556          }          }
3557        }        }
# Line 3395  for (;;) Line 3560  for (;;)
3560    /* OK, now we can do the business */    /* OK, now we can do the business */
3561    
3562    md->start_used_ptr = current_subject;    md->start_used_ptr = current_subject;
3563    md->recursive = NULL;    md->recursive = NULL;
3564    
3565    rc = internal_dfa_exec(    rc = internal_dfa_exec(
3566      md,                                /* fixed match data */      md,                                /* fixed match data */
# Line 3418  for (;;) Line 3583  for (;;)
3583    
3584    if (firstline && IS_NEWLINE(current_subject)) break;    if (firstline && IS_NEWLINE(current_subject)) break;
3585    current_subject++;    current_subject++;
3586    if (utf8)  #ifdef SUPPORT_UTF
3587      if (utf)
3588      {      {
3589      while (current_subject < end_subject && (*current_subject & 0xc0) == 0x80)      ACROSSCHAR(current_subject < end_subject, *current_subject,
3590        current_subject++;        current_subject++);
3591      }      }
3592    #endif
3593    if (current_subject > end_subject) break;    if (current_subject > end_subject) break;
3594    
3595    /* If we have just passed a CR and we are now at a LF, and the pattern does    /* If we have just passed a CR and we are now at a LF, and the pattern does

Legend:
Removed from v.645  
changed lines
  Added in v.982

  ViewVC Help
Powered by ViewVC 1.1.5