/[pcre]/code/trunk/pcre_dfa_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_dfa_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 654 by ph10, Tue Aug 2 11:00:40 2011 UTC revision 916 by ph10, Wed Feb 15 09:50:53 2012 UTC
# Line 7  and semantics are as close as possible t Line 7  and semantics are as close as possible t
7  below for why this module is different).  below for why this module is different).
8    
9                         Written by Philip Hazel                         Written by Philip Hazel
10             Copyright (c) 1997-2011 University of Cambridge             Copyright (c) 1997-2012 University of Cambridge
11    
12  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
13  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 113  small value. Non-zero values in the tabl Line 113  small value. Non-zero values in the tabl
113  the character is to be found. ***NOTE*** If the start of this table is  the character is to be found. ***NOTE*** If the start of this table is
114  modified, the three tables that follow must also be modified. */  modified, the three tables that follow must also be modified. */
115    
116  static const uschar coptable[] = {  static const pcre_uint8 coptable[] = {
117    0,                             /* End                                    */    0,                             /* End                                    */
118    0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */    0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */
119    0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */    0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */
# Line 128  static const uschar coptable[] = { Line 128  static const uschar coptable[] = {
128    1,                             /* noti                                   */    1,                             /* noti                                   */
129    /* Positive single-char repeats                                          */    /* Positive single-char repeats                                          */
130    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
131    3, 3, 3,                       /* upto, minupto, exact                   */    1+IMM2_SIZE, 1+IMM2_SIZE,      /* upto, minupto                          */
132    1, 1, 1, 3,                    /* *+, ++, ?+, upto+                      */    1+IMM2_SIZE,                   /* exact                                  */
133      1, 1, 1, 1+IMM2_SIZE,          /* *+, ++, ?+, upto+                      */
134    1, 1, 1, 1, 1, 1,              /* *I, *?I, +I, +?I, ?I, ??I              */    1, 1, 1, 1, 1, 1,              /* *I, *?I, +I, +?I, ?I, ??I              */
135    3, 3, 3,                       /* upto I, minupto I, exact I             */    1+IMM2_SIZE, 1+IMM2_SIZE,      /* upto I, minupto I                      */
136    1, 1, 1, 3,                    /* *+I, ++I, ?+I, upto+I                  */    1+IMM2_SIZE,                   /* exact I                                */
137      1, 1, 1, 1+IMM2_SIZE,          /* *+I, ++I, ?+I, upto+I                  */
138    /* Negative single-char repeats - only for chars < 256                   */    /* Negative single-char repeats - only for chars < 256                   */
139    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */
140    3, 3, 3,                       /* NOT upto, minupto, exact               */    1+IMM2_SIZE, 1+IMM2_SIZE,      /* NOT upto, minupto                      */
141    1, 1, 1, 3,                    /* NOT *+, ++, ?+, upto+                  */    1+IMM2_SIZE,                   /* NOT exact                              */
142      1, 1, 1, 1+IMM2_SIZE,          /* NOT *+, ++, ?+, upto+                  */
143    1, 1, 1, 1, 1, 1,              /* NOT *I, *?I, +I, +?I, ?I, ??I          */    1, 1, 1, 1, 1, 1,              /* NOT *I, *?I, +I, +?I, ?I, ??I          */
144    3, 3, 3,                       /* NOT upto I, minupto I, exact I         */    1+IMM2_SIZE, 1+IMM2_SIZE,      /* NOT upto I, minupto I                  */
145    1, 1, 1, 3,                    /* NOT *+I, ++I, ?+I, upto+I              */    1+IMM2_SIZE,                   /* NOT exact I                            */
146      1, 1, 1, 1+IMM2_SIZE,          /* NOT *+I, ++I, ?+I, upto+I              */
147    /* Positive type repeats                                                 */    /* Positive type repeats                                                 */
148    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */
149    3, 3, 3,                       /* Type upto, minupto, exact              */    1+IMM2_SIZE, 1+IMM2_SIZE,      /* Type upto, minupto                     */
150    1, 1, 1, 3,                    /* Type *+, ++, ?+, upto+                 */    1+IMM2_SIZE,                   /* Type exact                             */
151      1, 1, 1, 1+IMM2_SIZE,          /* Type *+, ++, ?+, upto+                 */
152    /* Character class & ref repeats                                         */    /* Character class & ref repeats                                         */
153    0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */    0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */
154    0, 0,                          /* CRRANGE, CRMINRANGE                    */    0, 0,                          /* CRRANGE, CRMINRANGE                    */
# Line 164  static const uschar coptable[] = { Line 169  static const uschar coptable[] = {
169    0,                             /* Assert not                             */    0,                             /* Assert not                             */
170    0,                             /* Assert behind                          */    0,                             /* Assert behind                          */
171    0,                             /* Assert behind not                      */    0,                             /* Assert behind not                      */
172    0, 0, 0, 0, 0, 0,              /* ONCE, BRA, BRAPOS, CBRA, CBRAPOS, COND */    0, 0,                          /* ONCE, ONCE_NC                          */
173      0, 0, 0, 0, 0,                 /* BRA, BRAPOS, CBRA, CBRAPOS, COND       */
174    0, 0, 0, 0, 0,                 /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND  */    0, 0, 0, 0, 0,                 /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND  */
175    0, 0,                          /* CREF, NCREF                            */    0, 0,                          /* CREF, NCREF                            */
176    0, 0,                          /* RREF, NRREF                            */    0, 0,                          /* RREF, NRREF                            */
# Line 181  remember the fact that a character could Line 187  remember the fact that a character could
187  the subject is reached. ***NOTE*** If the start of this table is modified, the  the subject is reached. ***NOTE*** If the start of this table is modified, the
188  two tables that follow must also be modified. */  two tables that follow must also be modified. */
189    
190  static const uschar poptable[] = {  static const pcre_uint8 poptable[] = {
191    0,                             /* End                                    */    0,                             /* End                                    */
192    0, 0, 0, 1, 1,                 /* \A, \G, \K, \B, \b                     */    0, 0, 0, 1, 1,                 /* \A, \G, \K, \B, \b                     */
193    1, 1, 1, 1, 1, 1,              /* \D, \d, \S, \s, \W, \w                 */    1, 1, 1, 1, 1, 1,              /* \D, \d, \S, \s, \W, \w                 */
# Line 232  static const uschar poptable[] = { Line 238  static const uschar poptable[] = {
238    0,                             /* Assert not                             */    0,                             /* Assert not                             */
239    0,                             /* Assert behind                          */    0,                             /* Assert behind                          */
240    0,                             /* Assert behind not                      */    0,                             /* Assert behind not                      */
241    0, 0, 0, 0, 0, 0,              /* ONCE, BRA, BRAPOS, CBRA, CBRAPOS, COND */    0, 0,                          /* ONCE, ONCE_NC                          */
242      0, 0, 0, 0, 0,                 /* BRA, BRAPOS, CBRA, CBRAPOS, COND       */
243    0, 0, 0, 0, 0,                 /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND  */    0, 0, 0, 0, 0,                 /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND  */
244    0, 0,                          /* CREF, NCREF                            */    0, 0,                          /* CREF, NCREF                            */
245    0, 0,                          /* RREF, NRREF                            */    0, 0,                          /* RREF, NRREF                            */
# Line 247  static const uschar poptable[] = { Line 254  static const uschar poptable[] = {
254  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
255  and \w */  and \w */
256    
257  static const uschar toptable1[] = {  static const pcre_uint8 toptable1[] = {
258    0, 0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
259    ctype_digit, ctype_digit,    ctype_digit, ctype_digit,
260    ctype_space, ctype_space,    ctype_space, ctype_space,
# Line 255  static const uschar toptable1[] = { Line 262  static const uschar toptable1[] = {
262    0, 0                            /* OP_ANY, OP_ALLANY */    0, 0                            /* OP_ANY, OP_ALLANY */
263  };  };
264    
265  static const uschar toptable2[] = {  static const pcre_uint8 toptable2[] = {
266    0, 0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
267    ctype_digit, 0,    ctype_digit, 0,
268    ctype_space, 0,    ctype_space, 0,
# Line 294  Returns:       nothing Line 301  Returns:       nothing
301  */  */
302    
303  static void  static void
304  pchars(unsigned char *p, int length, FILE *f)  pchars(const pcre_uchar *p, int length, FILE *f)
305  {  {
306  int c;  int c;
307  while (length-- > 0)  while (length-- > 0)
# Line 384  for the current character, one for the f Line 391  for the current character, one for the f
391  static int  static int
392  internal_dfa_exec(  internal_dfa_exec(
393    dfa_match_data *md,    dfa_match_data *md,
394    const uschar *this_start_code,    const pcre_uchar *this_start_code,
395    const uschar *current_subject,    const pcre_uchar *current_subject,
396    int start_offset,    int start_offset,
397    int *offsets,    int *offsets,
398    int offsetcount,    int offsetcount,
# Line 396  internal_dfa_exec( Line 403  internal_dfa_exec(
403  stateblock *active_states, *new_states, *temp_states;  stateblock *active_states, *new_states, *temp_states;
404  stateblock *next_active_state, *next_new_state;  stateblock *next_active_state, *next_new_state;
405    
406  const uschar *ctypes, *lcc, *fcc;  const pcre_uint8 *ctypes, *lcc, *fcc;
407  const uschar *ptr;  const pcre_uchar *ptr;
408  const uschar *end_code, *first_op;  const pcre_uchar *end_code, *first_op;
409    
410  dfa_recursion_info new_recursive;  dfa_recursion_info new_recursive;
411    
# Line 407  int active_count, new_count, match_count Line 414  int active_count, new_count, match_count
414  /* Some fields in the md block are frequently referenced, so we load them into  /* Some fields in the md block are frequently referenced, so we load them into
415  independent variables in the hope that this will perform better. */  independent variables in the hope that this will perform better. */
416    
417  const uschar *start_subject = md->start_subject;  const pcre_uchar *start_subject = md->start_subject;
418  const uschar *end_subject = md->end_subject;  const pcre_uchar *end_subject = md->end_subject;
419  const uschar *start_code = md->start_code;  const pcre_uchar *start_code = md->start_code;
420    
421  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
422  BOOL utf8 = (md->poptions & PCRE_UTF8) != 0;  BOOL utf = (md->poptions & PCRE_UTF8) != 0;
423  #else  #else
424  BOOL utf8 = FALSE;  BOOL utf = FALSE;
425  #endif  #endif
426    
427    BOOL reset_could_continue = FALSE;
428    
429  rlevel++;  rlevel++;
430  offsetcount &= (-2);  offsetcount &= (-2);
431    
# Line 440  new_count = 0; Line 449  new_count = 0;
449    
450  first_op = this_start_code + 1 + LINK_SIZE +  first_op = this_start_code + 1 + LINK_SIZE +
451    ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||    ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
452      *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)? 2:0);      *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
453        ? IMM2_SIZE:0);
454    
455  /* The first thing in any (sub) pattern is a bracket of some sort. Push all  /* The first thing in any (sub) pattern is a bracket of some sort. Push all
456  the alternative states onto the list, and find out where the end is. This  the alternative states onto the list, and find out where the end is. This
# Line 468  if (*first_op == OP_REVERSE) Line 478  if (*first_op == OP_REVERSE)
478    /* If we can't go back the amount required for the longest lookbehind    /* If we can't go back the amount required for the longest lookbehind
479    pattern, go back as far as we can; some alternatives may still be viable. */    pattern, go back as far as we can; some alternatives may still be viable. */
480    
481  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
482    /* In character mode we have to step back character by character */    /* In character mode we have to step back character by character */
483    
484    if (utf8)    if (utf)
485      {      {
486      for (gone_back = 0; gone_back < max_back; gone_back++)      for (gone_back = 0; gone_back < max_back; gone_back++)
487        {        {
488        if (current_subject <= start_subject) break;        if (current_subject <= start_subject) break;
489        current_subject--;        current_subject--;
490        while (current_subject > start_subject &&        ACROSSCHAR(current_subject > start_subject, *current_subject, current_subject--);
              (*current_subject & 0xc0) == 0x80)  
         current_subject--;  
491        }        }
492      }      }
493    else    else
# Line 540  else Line 548  else
548      {      {
549      int length = 1 + LINK_SIZE +      int length = 1 + LINK_SIZE +
550        ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||        ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
551          *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)?          *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
552          2:0);          ? IMM2_SIZE:0);
553      do      do
554        {        {
555        ADD_NEW((int)(end_code - start_code + length), 0);        ADD_NEW((int)(end_code - start_code + length), 0);
# Line 554  else Line 562  else
562    
563  workspace[0] = 0;    /* Bit indicating which vector is current */  workspace[0] = 0;    /* Bit indicating which vector is current */
564    
565  DPRINTF(("%.*sEnd state = %d\n", rlevel*2-2, SP, end_code - start_code));  DPRINTF(("%.*sEnd state = %d\n", rlevel*2-2, SP, (int)(end_code - start_code)));
566    
567  /* Loop for scanning the subject */  /* Loop for scanning the subject */
568    
# Line 565  for (;;) Line 573  for (;;)
573    int clen, dlen;    int clen, dlen;
574    unsigned int c, d;    unsigned int c, d;
575    int forced_fail = 0;    int forced_fail = 0;
576    BOOL could_continue = FALSE;    BOOL partial_newline = FALSE;
577      BOOL could_continue = reset_could_continue;
578      reset_could_continue = FALSE;
579    
580    /* Make the new state list into the active state list and empty the    /* Make the new state list into the active state list and empty the
581    new state list. */    new state list. */
582    
# Line 581  for (;;) Line 591  for (;;)
591    
592  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
593    printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP);    printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP);
594    pchars((uschar *)ptr, strlen((char *)ptr), stdout);    pchars(ptr, STRLEN_UC(ptr), stdout);
595    printf("\"\n");    printf("\"\n");
596    
597    printf("%.*sActive states: ", rlevel*2-2, SP);    printf("%.*sActive states: ", rlevel*2-2, SP);
# Line 602  for (;;) Line 612  for (;;)
612    if (ptr < end_subject)    if (ptr < end_subject)
613      {      {
614      clen = 1;        /* Number of bytes in the character */      clen = 1;        /* Number of bytes in the character */
615  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
616      if (utf8) { GETCHARLEN(c, ptr, clen); } else      if (utf) { GETCHARLEN(c, ptr, clen); } else
617  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
618      c = *ptr;      c = *ptr;
619      }      }
620    else    else
# Line 622  for (;;) Line 632  for (;;)
632      {      {
633      stateblock *current_state = active_states + i;      stateblock *current_state = active_states + i;
634      BOOL caseless = FALSE;      BOOL caseless = FALSE;
635      const uschar *code;      const pcre_uchar *code;
636      int state_offset = current_state->offset;      int state_offset = current_state->offset;
637      int count, codevalue, rrc;      int count, codevalue, rrc;
638    
# Line 635  for (;;) Line 645  for (;;)
645    
646      /* A negative offset is a special case meaning "hold off going to this      /* A negative offset is a special case meaning "hold off going to this
647      (negated) state until the number of characters in the data field have      (negated) state until the number of characters in the data field have
648      been skipped". */      been skipped". If the could_continue flag was passed over from a previous
649        state, arrange for it to passed on. */
650    
651      if (state_offset < 0)      if (state_offset < 0)
652        {        {
# Line 644  for (;;) Line 655  for (;;)
655          DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP));          DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP));
656          ADD_NEW_DATA(state_offset, current_state->count,          ADD_NEW_DATA(state_offset, current_state->count,
657            current_state->data - 1);            current_state->data - 1);
658            if (could_continue) reset_could_continue = TRUE;
659          continue;          continue;
660          }          }
661        else        else
# Line 691  for (;;) Line 703  for (;;)
703      if (coptable[codevalue] > 0)      if (coptable[codevalue] > 0)
704        {        {
705        dlen = 1;        dlen = 1;
706  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
707        if (utf8) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else        if (utf) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else
708  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
709        d = code[coptable[codevalue]];        d = code[coptable[codevalue]];
710        if (codevalue >= OP_TYPESTAR)        if (codevalue >= OP_TYPESTAR)
711          {          {
# Line 768  for (;;) Line 780  for (;;)
780                  current_subject > start_subject + md->start_offset)))                  current_subject > start_subject + md->start_offset)))
781            {            {
782            if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;            if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;
783              else if (match_count > 0 && ++match_count * 2 >= offsetcount)              else if (match_count > 0 && ++match_count * 2 > offsetcount)
784                match_count = 0;                match_count = 0;
785            count = ((match_count == 0)? offsetcount : match_count * 2) - 2;            count = ((match_count == 0)? offsetcount : match_count * 2) - 2;
786            if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int));            if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int));
# Line 814  for (;;) Line 826  for (;;)
826        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
827        case OP_CBRA:        case OP_CBRA:
828        case OP_SCBRA:        case OP_SCBRA:
829        ADD_ACTIVE((int)(code - start_code + 3 + LINK_SIZE),  0);        ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE + IMM2_SIZE),  0);
830        code += GET(code, 1);        code += GET(code, 1);
831        while (*code == OP_ALT)        while (*code == OP_ALT)
832          {          {
# Line 910  for (;;) Line 922  for (;;)
922                 (ptr == end_subject - md->nllen)                 (ptr == end_subject - md->nllen)
923              ))              ))
924            { ADD_ACTIVE(state_offset + 1, 0); }            { ADD_ACTIVE(state_offset + 1, 0); }
925            else if (ptr + 1 >= md->end_subject &&
926                     (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
927                     NLBLOCK->nltype == NLTYPE_FIXED &&
928                     NLBLOCK->nllen == 2 &&
929                     c == NLBLOCK->nl[0])
930              {
931              if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
932                {
933                reset_could_continue = TRUE;
934                ADD_NEW_DATA(-(state_offset + 1), 0, 1);
935                }
936              else could_continue = partial_newline = TRUE;
937              }
938          }          }
939        break;        break;
940    
# Line 922  for (;;) Line 947  for (;;)
947          else if (clen == 0 ||          else if (clen == 0 ||
948              ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr)))              ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr)))
949            { ADD_ACTIVE(state_offset + 1, 0); }            { ADD_ACTIVE(state_offset + 1, 0); }
950            else if (ptr + 1 >= md->end_subject &&
951                     (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
952                     NLBLOCK->nltype == NLTYPE_FIXED &&
953                     NLBLOCK->nllen == 2 &&
954                     c == NLBLOCK->nl[0])
955              {
956              if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
957                {
958                reset_could_continue = TRUE;
959                ADD_NEW_DATA(-(state_offset + 1), 0, 1);
960                }
961              else could_continue = partial_newline = TRUE;
962              }
963          }          }
964        else if (IS_NEWLINE(ptr))        else if (IS_NEWLINE(ptr))
965          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
# Line 954  for (;;) Line 992  for (;;)
992    
993          if (ptr > start_subject)          if (ptr > start_subject)
994            {            {
995            const uschar *temp = ptr - 1;            const pcre_uchar *temp = ptr - 1;
996            if (temp < md->start_used_ptr) md->start_used_ptr = temp;            if (temp < md->start_used_ptr) md->start_used_ptr = temp;
997  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
998            if (utf8) BACKCHAR(temp);            if (utf) { BACKCHAR(temp); }
999  #endif  #endif
1000            GETCHARTEST(d, temp);            GETCHARTEST(d, temp);
1001  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 1022  for (;;) Line 1060  for (;;)
1060            break;            break;
1061    
1062            case PT_GC:            case PT_GC:
1063            OK = _pcre_ucp_gentype[prop->chartype] == code[2];            OK = PRIV(ucp_gentype)[prop->chartype] == code[2];
1064            break;            break;
1065    
1066            case PT_PC:            case PT_PC:
# Line 1036  for (;;) Line 1074  for (;;)
1074            /* These are specials for combination cases. */            /* These are specials for combination cases. */
1075    
1076            case PT_ALNUM:            case PT_ALNUM:
1077            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1078                 _pcre_ucp_gentype[prop->chartype] == ucp_N;                 PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1079            break;            break;
1080    
1081            case PT_SPACE:    /* Perl space */            case PT_SPACE:    /* Perl space */
1082            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1083                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
1084            break;            break;
1085    
1086            case PT_PXSPACE:  /* POSIX space */            case PT_PXSPACE:  /* POSIX space */
1087            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1088                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
1089                 c == CHAR_FF || c == CHAR_CR;                 c == CHAR_FF || c == CHAR_CR;
1090            break;            break;
1091    
1092            case PT_WORD:            case PT_WORD:
1093            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1094                 _pcre_ucp_gentype[prop->chartype] == ucp_N ||                 PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1095                 c == CHAR_UNDERSCORE;                 c == CHAR_UNDERSCORE;
1096            break;            break;
1097    
# Line 1155  for (;;) Line 1193  for (;;)
1193                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
1194            {            {
1195            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1196              { ADD_NEW(state_offset + 4, 0); }              { ADD_NEW(state_offset + 1 + IMM2_SIZE + 1, 0); }
1197            else            else
1198              { ADD_NEW(state_offset, count); }              { ADD_NEW(state_offset, count); }
1199            }            }
# Line 1166  for (;;) Line 1204  for (;;)
1204        case OP_TYPEUPTO:        case OP_TYPEUPTO:
1205        case OP_TYPEMINUPTO:        case OP_TYPEMINUPTO:
1206        case OP_TYPEPOSUPTO:        case OP_TYPEPOSUPTO:
1207        ADD_ACTIVE(state_offset + 4, 0);        ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0);
1208        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1209        if (clen > 0)        if (clen > 0)
1210          {          {
# Line 1181  for (;;) Line 1219  for (;;)
1219              next_active_state--;              next_active_state--;
1220              }              }
1221            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1222              { ADD_NEW(state_offset + 4, 0); }              { ADD_NEW(state_offset + 2 + IMM2_SIZE, 0); }
1223            else            else
1224              { ADD_NEW(state_offset, count); }              { ADD_NEW(state_offset, count); }
1225            }            }
# Line 1216  for (;;) Line 1254  for (;;)
1254            break;            break;
1255    
1256            case PT_GC:            case PT_GC:
1257            OK = _pcre_ucp_gentype[prop->chartype] == code[3];            OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
1258            break;            break;
1259    
1260            case PT_PC:            case PT_PC:
# Line 1230  for (;;) Line 1268  for (;;)
1268            /* These are specials for combination cases. */            /* These are specials for combination cases. */
1269    
1270            case PT_ALNUM:            case PT_ALNUM:
1271            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1272                 _pcre_ucp_gentype[prop->chartype] == ucp_N;                 PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1273            break;            break;
1274    
1275            case PT_SPACE:    /* Perl space */            case PT_SPACE:    /* Perl space */
1276            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1277                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
1278            break;            break;
1279    
1280            case PT_PXSPACE:  /* POSIX space */            case PT_PXSPACE:  /* POSIX space */
1281            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1282                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
1283                 c == CHAR_FF || c == CHAR_CR;                 c == CHAR_FF || c == CHAR_CR;
1284            break;            break;
1285    
1286            case PT_WORD:            case PT_WORD:
1287            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1288                 _pcre_ucp_gentype[prop->chartype] == ucp_N ||                 PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1289                 c == CHAR_UNDERSCORE;                 c == CHAR_UNDERSCORE;
1290            break;            break;
1291    
# Line 1279  for (;;) Line 1317  for (;;)
1317        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1318        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
1319          {          {
1320          const uschar *nptr = ptr + clen;          const pcre_uchar *nptr = ptr + clen;
1321          int ncount = 0;          int ncount = 0;
1322          if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)          if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
1323            {            {
# Line 1463  for (;;) Line 1501  for (;;)
1501            break;            break;
1502    
1503            case PT_GC:            case PT_GC:
1504            OK = _pcre_ucp_gentype[prop->chartype] == code[3];            OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
1505            break;            break;
1506    
1507            case PT_PC:            case PT_PC:
# Line 1477  for (;;) Line 1515  for (;;)
1515            /* These are specials for combination cases. */            /* These are specials for combination cases. */
1516    
1517            case PT_ALNUM:            case PT_ALNUM:
1518            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1519                 _pcre_ucp_gentype[prop->chartype] == ucp_N;                 PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1520            break;            break;
1521    
1522            case PT_SPACE:    /* Perl space */            case PT_SPACE:    /* Perl space */
1523            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1524                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
1525            break;            break;
1526    
1527            case PT_PXSPACE:  /* POSIX space */            case PT_PXSPACE:  /* POSIX space */
1528            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1529                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
1530                 c == CHAR_FF || c == CHAR_CR;                 c == CHAR_FF || c == CHAR_CR;
1531            break;            break;
1532    
1533            case PT_WORD:            case PT_WORD:
1534            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1535                 _pcre_ucp_gentype[prop->chartype] == ucp_N ||                 PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1536                 c == CHAR_UNDERSCORE;                 c == CHAR_UNDERSCORE;
1537            break;            break;
1538    
# Line 1535  for (;;) Line 1573  for (;;)
1573        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
1574        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
1575          {          {
1576          const uschar *nptr = ptr + clen;          const pcre_uchar *nptr = ptr + clen;
1577          int ncount = 0;          int ncount = 0;
1578          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
1579              codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)              codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)
# Line 1717  for (;;) Line 1755  for (;;)
1755        case OP_PROP_EXTRA + OP_TYPEMINUPTO:        case OP_PROP_EXTRA + OP_TYPEMINUPTO:
1756        case OP_PROP_EXTRA + OP_TYPEPOSUPTO:        case OP_PROP_EXTRA + OP_TYPEPOSUPTO:
1757        if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)
1758          { ADD_ACTIVE(state_offset + 6, 0); }          { ADD_ACTIVE(state_offset + 1 + IMM2_SIZE + 3, 0); }
1759        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1760        if (clen > 0)        if (clen > 0)
1761          {          {
1762          BOOL OK;          BOOL OK;
1763          const ucd_record * prop = GET_UCD(c);          const ucd_record * prop = GET_UCD(c);
1764          switch(code[4])          switch(code[1 + IMM2_SIZE + 1])
1765            {            {
1766            case PT_ANY:            case PT_ANY:
1767            OK = TRUE;            OK = TRUE;
# Line 1735  for (;;) Line 1773  for (;;)
1773            break;            break;
1774    
1775            case PT_GC:            case PT_GC:
1776            OK = _pcre_ucp_gentype[prop->chartype] == code[5];            OK = PRIV(ucp_gentype)[prop->chartype] == code[1 + IMM2_SIZE + 2];
1777            break;            break;
1778    
1779            case PT_PC:            case PT_PC:
1780            OK = prop->chartype == code[5];            OK = prop->chartype == code[1 + IMM2_SIZE + 2];
1781            break;            break;
1782    
1783            case PT_SC:            case PT_SC:
1784            OK = prop->script == code[5];            OK = prop->script == code[1 + IMM2_SIZE + 2];
1785            break;            break;
1786    
1787            /* These are specials for combination cases. */            /* These are specials for combination cases. */
1788    
1789            case PT_ALNUM:            case PT_ALNUM:
1790            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1791                 _pcre_ucp_gentype[prop->chartype] == ucp_N;                 PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1792            break;            break;
1793    
1794            case PT_SPACE:    /* Perl space */            case PT_SPACE:    /* Perl space */
1795            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1796                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
1797            break;            break;
1798    
1799            case PT_PXSPACE:  /* POSIX space */            case PT_PXSPACE:  /* POSIX space */
1800            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1801                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
1802                 c == CHAR_FF || c == CHAR_CR;                 c == CHAR_FF || c == CHAR_CR;
1803            break;            break;
1804    
1805            case PT_WORD:            case PT_WORD:
1806            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1807                 _pcre_ucp_gentype[prop->chartype] == ucp_N ||                 PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1808                 c == CHAR_UNDERSCORE;                 c == CHAR_UNDERSCORE;
1809            break;            break;
1810    
# Line 1785  for (;;) Line 1823  for (;;)
1823              next_active_state--;              next_active_state--;
1824              }              }
1825            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1826              { ADD_NEW(state_offset + 6, 0); }              { ADD_NEW(state_offset + 1 + IMM2_SIZE + 3, 0); }
1827            else            else
1828              { ADD_NEW(state_offset, count); }              { ADD_NEW(state_offset, count); }
1829            }            }
# Line 1798  for (;;) Line 1836  for (;;)
1836        case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:        case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:
1837        case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:        case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:
1838        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
1839          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1840        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1841        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
1842          {          {
1843          const uschar *nptr = ptr + clen;          const pcre_uchar *nptr = ptr + clen;
1844          int ncount = 0;          int ncount = 0;
1845          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
1846            {            {
# Line 1818  for (;;) Line 1856  for (;;)
1856            ncount++;            ncount++;
1857            nptr += ndlen;            nptr += ndlen;
1858            }            }
1859            if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
1860                reset_could_continue = TRUE;
1861          if (++count >= GET2(code, 1))          if (++count >= GET2(code, 1))
1862            { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }            { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
1863          else          else
1864            { ADD_NEW_DATA(-state_offset, count, ncount); }            { ADD_NEW_DATA(-state_offset, count, ncount); }
1865          }          }
# Line 1832  for (;;) Line 1872  for (;;)
1872        case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:        case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:
1873        case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:        case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:
1874        if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)
1875          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1876        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1877        if (clen > 0)        if (clen > 0)
1878          {          {
# Line 1859  for (;;) Line 1899  for (;;)
1899              next_active_state--;              next_active_state--;
1900              }              }
1901            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1902              { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }              { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
1903            else            else
1904              { ADD_NEW_DATA(-state_offset, count, ncount); }              { ADD_NEW_DATA(-state_offset, count, ncount); }
1905            break;            break;
# Line 1876  for (;;) Line 1916  for (;;)
1916        case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:        case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:
1917        case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:        case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:
1918        if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)
1919          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1920        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1921        if (clen > 0)        if (clen > 0)
1922          {          {
# Line 1905  for (;;) Line 1945  for (;;)
1945              next_active_state--;              next_active_state--;
1946              }              }
1947            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1948              { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }              { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
1949            else            else
1950              { ADD_NEW_DATA(-state_offset, count, 0); }              { ADD_NEW_DATA(-state_offset, count, 0); }
1951            }            }
# Line 1918  for (;;) Line 1958  for (;;)
1958        case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:        case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:
1959        case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:        case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:
1960        if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)
1961          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1962        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1963        if (clen > 0)        if (clen > 0)
1964          {          {
# Line 1960  for (;;) Line 2000  for (;;)
2000              next_active_state--;              next_active_state--;
2001              }              }
2002            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
2003              { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }              { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
2004            else            else
2005              { ADD_NEW_DATA(-state_offset, count, 0); }              { ADD_NEW_DATA(-state_offset, count, 0); }
2006            }            }
# Line 1982  for (;;) Line 2022  for (;;)
2022        case OP_CHARI:        case OP_CHARI:
2023        if (clen == 0) break;        if (clen == 0) break;
2024    
2025  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2026        if (utf8)        if (utf)
2027          {          {
2028          if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else          if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
2029            {            {
2030            unsigned int othercase;            unsigned int othercase;
2031            if (c < 128) othercase = fcc[c]; else            if (c < 128)
2032                othercase = fcc[c];
2033            /* If we have Unicode property support, we can use it to test the            else
2034            other case of the character. */              /* If we have Unicode property support, we can use it to test the
2035                other case of the character. */
2036  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2037            othercase = UCD_OTHERCASE(c);              othercase = UCD_OTHERCASE(c);
2038  #else  #else
2039            othercase = NOTACHAR;              othercase = NOTACHAR;
2040  #endif  #endif
2041    
2042            if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }            if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }
2043            }            }
2044          }          }
2045        else        else
2046  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2047          /* Not UTF mode */
       /* Non-UTF-8 mode */  
2048          {          {
2049          if (lcc[c] == lcc[d]) { ADD_NEW(state_offset + 2, 0); }          if (TABLE_GET(c, lcc, c) == TABLE_GET(d, lcc, d))
2050              { ADD_NEW(state_offset + 2, 0); }
2051          }          }
2052        break;        break;
2053    
# Line 2021  for (;;) Line 2061  for (;;)
2061        case OP_EXTUNI:        case OP_EXTUNI:
2062        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
2063          {          {
2064          const uschar *nptr = ptr + clen;          const pcre_uchar *nptr = ptr + clen;
2065          int ncount = 0;          int ncount = 0;
2066          while (nptr < end_subject)          while (nptr < end_subject)
2067            {            {
# Line 2031  for (;;) Line 2071  for (;;)
2071            ncount++;            ncount++;
2072            nptr += nclen;            nptr += nclen;
2073            }            }
2074            if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
2075                reset_could_continue = TRUE;
2076          ADD_NEW_DATA(-(state_offset + 1), 0, ncount);          ADD_NEW_DATA(-(state_offset + 1), 0, ncount);
2077          }          }
2078        break;        break;
# Line 2056  for (;;) Line 2098  for (;;)
2098          break;          break;
2099    
2100          case 0x000d:          case 0x000d:
2101          if (ptr + 1 < end_subject && ptr[1] == 0x0a)          if (ptr + 1 >= end_subject)
2102              {
2103              ADD_NEW(state_offset + 1, 0);
2104              if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
2105                reset_could_continue = TRUE;
2106              }
2107            else if (ptr[1] == 0x0a)
2108            {            {
2109            ADD_NEW_DATA(-(state_offset + 1), 0, 1);            ADD_NEW_DATA(-(state_offset + 1), 0, 1);
2110            }            }
2111          else          else
2112            {            {
2113            ADD_NEW(state_offset + 1, 0);            ADD_NEW(state_offset + 1, 0);
2114            }            }
2115          break;          break;
2116          }          }
2117        break;        break;
# Line 2207  for (;;) Line 2255  for (;;)
2255          unsigned int otherd = NOTACHAR;          unsigned int otherd = NOTACHAR;
2256          if (caseless)          if (caseless)
2257            {            {
2258  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2259            if (utf8 && d >= 128)            if (utf && d >= 128)
2260              {              {
2261  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2262              otherd = UCD_OTHERCASE(d);              otherd = UCD_OTHERCASE(d);
2263  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2264              }              }
2265            else            else
2266  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2267            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2268            }            }
2269          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2270            {            {
# Line 2254  for (;;) Line 2302  for (;;)
2302          unsigned int otherd = NOTACHAR;          unsigned int otherd = NOTACHAR;
2303          if (caseless)          if (caseless)
2304            {            {
2305  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2306            if (utf8 && d >= 128)            if (utf && d >= 128)
2307              {              {
2308  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2309              otherd = UCD_OTHERCASE(d);              otherd = UCD_OTHERCASE(d);
2310  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2311              }              }
2312            else            else
2313  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2314            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2315            }            }
2316          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2317            {            {
# Line 2299  for (;;) Line 2347  for (;;)
2347          unsigned int otherd = NOTACHAR;          unsigned int otherd = NOTACHAR;
2348          if (caseless)          if (caseless)
2349            {            {
2350  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2351            if (utf8 && d >= 128)            if (utf && d >= 128)
2352              {              {
2353  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2354              otherd = UCD_OTHERCASE(d);              otherd = UCD_OTHERCASE(d);
2355  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2356              }              }
2357            else            else
2358  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2359            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2360            }            }
2361          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2362            {            {
# Line 2336  for (;;) Line 2384  for (;;)
2384          unsigned int otherd = NOTACHAR;          unsigned int otherd = NOTACHAR;
2385          if (caseless)          if (caseless)
2386            {            {
2387  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2388            if (utf8 && d >= 128)            if (utf && d >= 128)
2389              {              {
2390  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2391              otherd = UCD_OTHERCASE(d);              otherd = UCD_OTHERCASE(d);
2392  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2393              }              }
2394            else            else
2395  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2396            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2397            }            }
2398          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2399            {            {
2400            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
2401              { ADD_NEW(state_offset + dlen + 3, 0); }              { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
2402            else            else
2403              { ADD_NEW(state_offset, count); }              { ADD_NEW(state_offset, count); }
2404            }            }
# Line 2373  for (;;) Line 2421  for (;;)
2421        case OP_NOTUPTO:        case OP_NOTUPTO:
2422        case OP_NOTMINUPTO:        case OP_NOTMINUPTO:
2423        case OP_NOTPOSUPTO:        case OP_NOTPOSUPTO:
2424        ADD_ACTIVE(state_offset + dlen + 3, 0);        ADD_ACTIVE(state_offset + dlen + 1 + IMM2_SIZE, 0);
2425        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
2426        if (clen > 0)        if (clen > 0)
2427          {          {
2428          unsigned int otherd = NOTACHAR;          unsigned int otherd = NOTACHAR;
2429          if (caseless)          if (caseless)
2430            {            {
2431  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2432            if (utf8 && d >= 128)            if (utf && d >= 128)
2433              {              {
2434  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2435              otherd = UCD_OTHERCASE(d);              otherd = UCD_OTHERCASE(d);
2436  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2437              }              }
2438            else            else
2439  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2440            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2441            }            }
2442          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2443            {            {
# Line 2399  for (;;) Line 2447  for (;;)
2447              next_active_state--;              next_active_state--;
2448              }              }
2449            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
2450              { ADD_NEW(state_offset + dlen + 3, 0); }              { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
2451            else            else
2452              { ADD_NEW(state_offset, count); }              { ADD_NEW(state_offset, count); }
2453            }            }
# Line 2416  for (;;) Line 2464  for (;;)
2464          {          {
2465          BOOL isinclass = FALSE;          BOOL isinclass = FALSE;
2466          int next_state_offset;          int next_state_offset;
2467          const uschar *ecode;          const pcre_uchar *ecode;
2468    
2469          /* For a simple class, there is always just a 32-byte table, and we          /* For a simple class, there is always just a 32-byte table, and we
2470          can set isinclass from it. */          can set isinclass from it. */
2471    
2472          if (codevalue != OP_XCLASS)          if (codevalue != OP_XCLASS)
2473            {            {
2474            ecode = code + 33;            ecode = code + 1 + (32 / sizeof(pcre_uchar));
2475            if (clen > 0)            if (clen > 0)
2476              {              {
2477              isinclass = (c > 255)? (codevalue == OP_NCLASS) :              isinclass = (c > 255)? (codevalue == OP_NCLASS) :
2478                ((code[1 + c/8] & (1 << (c&7))) != 0);                ((((pcre_uint8 *)(code + 1))[c/8] & (1 << (c&7))) != 0);
2479              }              }
2480            }            }
2481    
# Line 2438  for (;;) Line 2486  for (;;)
2486          else          else
2487           {           {
2488           ecode = code + GET(code, 1);           ecode = code + GET(code, 1);
2489           if (clen > 0) isinclass = _pcre_xclass(c, code + 1 + LINK_SIZE);           if (clen > 0) isinclass = PRIV(xclass)(c, code + 1 + LINK_SIZE, utf);
2490           }           }
2491    
2492          /* At this point, isinclass is set for all kinds of class, and ecode          /* At this point, isinclass is set for all kinds of class, and ecode
# Line 2472  for (;;) Line 2520  for (;;)
2520            case OP_CRMINRANGE:            case OP_CRMINRANGE:
2521            count = current_state->count;  /* Already matched */            count = current_state->count;  /* Already matched */
2522            if (count >= GET2(ecode, 1))            if (count >= GET2(ecode, 1))
2523              { ADD_ACTIVE(next_state_offset + 5, 0); }              { ADD_ACTIVE(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
2524            if (isinclass)            if (isinclass)
2525              {              {
2526              int max = GET2(ecode, 3);              int max = GET2(ecode, 1 + IMM2_SIZE);
2527              if (++count >= max && max != 0)   /* Max 0 => no limit */              if (++count >= max && max != 0)   /* Max 0 => no limit */
2528                { ADD_NEW(next_state_offset + 5, 0); }                { ADD_NEW(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
2529              else              else
2530                { ADD_NEW(state_offset, count); }                { ADD_NEW(state_offset, count); }
2531              }              }
# Line 2508  for (;;) Line 2556  for (;;)
2556          int rc;          int rc;
2557          int local_offsets[2];          int local_offsets[2];
2558          int local_workspace[1000];          int local_workspace[1000];
2559          const uschar *endasscode = code + GET(code, 1);          const pcre_uchar *endasscode = code + GET(code, 1);
2560    
2561          while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);          while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
2562    
# Line 2545  for (;;) Line 2593  for (;;)
2593          if (code[LINK_SIZE+1] == OP_CALLOUT)          if (code[LINK_SIZE+1] == OP_CALLOUT)
2594            {            {
2595            rrc = 0;            rrc = 0;
2596            if (pcre_callout != NULL)            if (PUBL(callout) != NULL)
2597              {              {
2598              pcre_callout_block cb;              PUBL(callout_block) cb;
2599              cb.version          = 1;   /* Version 1 of the callout block */              cb.version          = 1;   /* Version 1 of the callout block */
2600              cb.callout_number   = code[LINK_SIZE+2];              cb.callout_number   = code[LINK_SIZE+2];
2601              cb.offset_vector    = offsets;              cb.offset_vector    = offsets;
2602    #ifdef COMPILE_PCRE8
2603              cb.subject          = (PCRE_SPTR)start_subject;              cb.subject          = (PCRE_SPTR)start_subject;
2604    #else
2605                cb.subject          = (PCRE_SPTR16)start_subject;
2606    #endif
2607              cb.subject_length   = (int)(end_subject - start_subject);              cb.subject_length   = (int)(end_subject - start_subject);
2608              cb.start_match      = (int)(current_subject - start_subject);              cb.start_match      = (int)(current_subject - start_subject);
2609              cb.current_position = (int)(ptr - start_subject);              cb.current_position = (int)(ptr - start_subject);
# Line 2561  for (;;) Line 2613  for (;;)
2613              cb.capture_last     = -1;              cb.capture_last     = -1;
2614              cb.callout_data     = md->callout_data;              cb.callout_data     = md->callout_data;
2615              cb.mark             = NULL;   /* No (*MARK) support */              cb.mark             = NULL;   /* No (*MARK) support */
2616              if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */              if ((rrc = (*PUBL(callout))(&cb)) < 0) return rrc;   /* Abandon */
2617              }              }
2618            if (rrc > 0) break;                      /* Fail this thread */            if (rrc > 0) break;                      /* Fail this thread */
2619            code += _pcre_OP_lengths[OP_CALLOUT];    /* Skip callout data */            code += PRIV(OP_lengths)[OP_CALLOUT];    /* Skip callout data */
2620            }            }
2621    
2622          condcode = code[LINK_SIZE+1];          condcode = code[LINK_SIZE+1];
# Line 2585  for (;;) Line 2637  for (;;)
2637    
2638          else if (condcode == OP_RREF || condcode == OP_NRREF)          else if (condcode == OP_RREF || condcode == OP_NRREF)
2639            {            {
2640            int value = GET2(code, LINK_SIZE+2);            int value = GET2(code, LINK_SIZE + 2);
2641            if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;            if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
2642            if (md->recursive != NULL)            if (md->recursive != NULL)
2643              { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }              { ADD_ACTIVE(state_offset + LINK_SIZE + 2 + IMM2_SIZE, 0); }
2644            else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }            else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
2645            }            }
2646    
# Line 2597  for (;;) Line 2649  for (;;)
2649          else          else
2650            {            {
2651            int rc;            int rc;
2652            const uschar *asscode = code + LINK_SIZE + 1;            const pcre_uchar *asscode = code + LINK_SIZE + 1;
2653            const uschar *endasscode = asscode + GET(asscode, 1);            const pcre_uchar *endasscode = asscode + GET(asscode, 1);
2654    
2655            while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);            while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
2656    
# Line 2629  for (;;) Line 2681  for (;;)
2681          dfa_recursion_info *ri;          dfa_recursion_info *ri;
2682          int local_offsets[1000];          int local_offsets[1000];
2683          int local_workspace[1000];          int local_workspace[1000];
2684          const uschar *callpat = start_code + GET(code, 1);          const pcre_uchar *callpat = start_code + GET(code, 1);
2685          int recno = (callpat == md->start_code)? 0 :          int recno = (callpat == md->start_code)? 0 :
2686            GET2(callpat, 1 + LINK_SIZE);            GET2(callpat, 1 + LINK_SIZE);
2687          int rc;          int rc;
# Line 2680  for (;;) Line 2732  for (;;)
2732            {            {
2733            for (rc = rc*2 - 2; rc >= 0; rc -= 2)            for (rc = rc*2 - 2; rc >= 0; rc -= 2)
2734              {              {
             const uschar *p = start_subject + local_offsets[rc];  
             const uschar *pp = start_subject + local_offsets[rc+1];  
2735              int charcount = local_offsets[rc+1] - local_offsets[rc];              int charcount = local_offsets[rc+1] - local_offsets[rc];
2736              while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;  #ifdef SUPPORT_UTF
2737                const pcre_uchar *p = start_subject + local_offsets[rc];
2738                const pcre_uchar *pp = start_subject + local_offsets[rc+1];
2739                while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2740    #endif
2741              if (charcount > 0)              if (charcount > 0)
2742                {                {
2743                ADD_NEW_DATA(-(state_offset + LINK_SIZE + 1), 0, (charcount - 1));                ADD_NEW_DATA(-(state_offset + LINK_SIZE + 1), 0, (charcount - 1));
# Line 2706  for (;;) Line 2760  for (;;)
2760        case OP_BRAPOSZERO:        case OP_BRAPOSZERO:
2761          {          {
2762          int charcount, matched_count;          int charcount, matched_count;
2763          const uschar *local_ptr = ptr;          const pcre_uchar *local_ptr = ptr;
2764          BOOL allow_zero;          BOOL allow_zero;
2765    
2766          if (codevalue == OP_BRAPOSZERO)          if (codevalue == OP_BRAPOSZERO)
# Line 2756  for (;;) Line 2810  for (;;)
2810    
2811          if (matched_count > 0 || allow_zero)          if (matched_count > 0 || allow_zero)
2812            {            {
2813            const uschar *end_subpattern = code;            const pcre_uchar *end_subpattern = code;
2814            int next_state_offset;            int next_state_offset;
2815    
2816            do { end_subpattern += GET(end_subpattern, 1); }            do { end_subpattern += GET(end_subpattern, 1); }
# Line 2777  for (;;) Line 2831  for (;;)
2831              }              }
2832            else            else
2833              {              {
2834              const uschar *p = ptr;              const pcre_uchar *p = ptr;
2835              const uschar *pp = local_ptr;              const pcre_uchar *pp = local_ptr;
2836              charcount = pp - p;              charcount = (int)(pp - p);
2837              while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;  #ifdef SUPPORT_UTF
2838                while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2839    #endif
2840              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
2841              }              }
2842            }            }
# Line 2789  for (;;) Line 2845  for (;;)
2845    
2846        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2847        case OP_ONCE:        case OP_ONCE:
2848          case OP_ONCE_NC:
2849          {          {
2850          int local_offsets[2];          int local_offsets[2];
2851          int local_workspace[1000];          int local_workspace[1000];
# Line 2806  for (;;) Line 2863  for (;;)
2863    
2864          if (rc >= 0)          if (rc >= 0)
2865            {            {
2866            const uschar *end_subpattern = code;            const pcre_uchar *end_subpattern = code;
2867            int charcount = local_offsets[1] - local_offsets[0];            int charcount = local_offsets[1] - local_offsets[0];
2868            int next_state_offset, repeat_state_offset;            int next_state_offset, repeat_state_offset;
2869    
# Line 2859  for (;;) Line 2916  for (;;)
2916              }              }
2917            else            else
2918              {              {
2919              const uschar *p = start_subject + local_offsets[0];  #ifdef SUPPORT_UTF
2920              const uschar *pp = start_subject + local_offsets[1];              const pcre_uchar *p = start_subject + local_offsets[0];
2921              while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;              const pcre_uchar *pp = start_subject + local_offsets[1];
2922                while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2923    #endif
2924              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
2925              if (repeat_state_offset >= 0)              if (repeat_state_offset >= 0)
2926                { ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); }                { ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); }
# Line 2877  for (;;) Line 2936  for (;;)
2936    
2937        case OP_CALLOUT:        case OP_CALLOUT:
2938        rrc = 0;        rrc = 0;
2939        if (pcre_callout != NULL)        if (PUBL(callout) != NULL)
2940          {          {
2941          pcre_callout_block cb;          PUBL(callout_block) cb;
2942          cb.version          = 1;   /* Version 1 of the callout block */          cb.version          = 1;   /* Version 1 of the callout block */
2943          cb.callout_number   = code[1];          cb.callout_number   = code[1];
2944          cb.offset_vector    = offsets;          cb.offset_vector    = offsets;
2945    #ifdef COMPILE_PCRE8
2946          cb.subject          = (PCRE_SPTR)start_subject;          cb.subject          = (PCRE_SPTR)start_subject;
2947    #else
2948            cb.subject          = (PCRE_SPTR16)start_subject;
2949    #endif
2950          cb.subject_length   = (int)(end_subject - start_subject);          cb.subject_length   = (int)(end_subject - start_subject);
2951          cb.start_match      = (int)(current_subject - start_subject);          cb.start_match      = (int)(current_subject - start_subject);
2952          cb.current_position = (int)(ptr - start_subject);          cb.current_position = (int)(ptr - start_subject);
# Line 2893  for (;;) Line 2956  for (;;)
2956          cb.capture_last     = -1;          cb.capture_last     = -1;
2957          cb.callout_data     = md->callout_data;          cb.callout_data     = md->callout_data;
2958          cb.mark             = NULL;   /* No (*MARK) support */          cb.mark             = NULL;   /* No (*MARK) support */
2959          if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */          if ((rrc = (*PUBL(callout))(&cb)) < 0) return rrc;   /* Abandon */
2960          }          }
2961        if (rrc == 0)        if (rrc == 0)
2962          { ADD_ACTIVE(state_offset + _pcre_OP_lengths[OP_CALLOUT], 0); }          { ADD_ACTIVE(state_offset + PRIV(OP_lengths)[OP_CALLOUT], 0); }
2963        break;        break;
2964    
2965    
# Line 2921  for (;;) Line 2984  for (;;)
2984    
2985    The "could_continue" variable is true if a state could have continued but    The "could_continue" variable is true if a state could have continued but
2986    for the fact that the end of the subject was reached. */    for the fact that the end of the subject was reached. */
2987    
2988    if (new_count <= 0)    if (new_count <= 0)
2989      {      {
2990      if (rlevel == 1 &&                               /* Top level, and */      if (rlevel == 1 &&                               /* Top level, and */
# Line 2933  for (;;) Line 2996  for (;;)
2996          ((md->moptions & PCRE_PARTIAL_SOFT) != 0 &&  /* Soft partial and */          ((md->moptions & PCRE_PARTIAL_SOFT) != 0 &&  /* Soft partial and */
2997           match_count < 0)                            /* no matches */           match_count < 0)                            /* no matches */
2998          ) &&                                         /* And... */          ) &&                                         /* And... */
2999          ptr >= end_subject &&                  /* Reached end of subject */          (
3000            ptr >= end_subject ||                  /* Reached end of subject or */
3001            partial_newline                        /* a partial newline */
3002            ) &&
3003          ptr > md->start_used_ptr)              /* Inspected non-empty string */          ptr > md->start_used_ptr)              /* Inspected non-empty string */
3004        {        {
3005        if (offsetcount >= 2)        if (offsetcount >= 2)
# Line 2993  Returns:          > 0 => number of match Line 3059  Returns:          > 0 => number of match
3059                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
3060  */  */
3061    
3062    #ifdef COMPILE_PCRE8
3063  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
3064  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
3065    const char *subject, int length, int start_offset, int options, int *offsets,    const char *subject, int length, int start_offset, int options, int *offsets,
3066    int offsetcount, int *workspace, int wscount)    int offsetcount, int *workspace, int wscount)
3067    #else
3068    PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
3069    pcre16_dfa_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
3070      PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
3071      int offsetcount, int *workspace, int wscount)
3072    #endif
3073  {  {
3074  real_pcre *re = (real_pcre *)argument_re;  REAL_PCRE *re = (REAL_PCRE *)argument_re;
3075  dfa_match_data match_block;  dfa_match_data match_block;
3076  dfa_match_data *md = &match_block;  dfa_match_data *md = &match_block;
3077  BOOL utf8, anchored, startline, firstline;  BOOL utf, anchored, startline, firstline;
3078  const uschar *current_subject, *end_subject, *lcc;  const pcre_uchar *current_subject, *end_subject;
   
 pcre_study_data internal_study;  
3079  const pcre_study_data *study = NULL;  const pcre_study_data *study = NULL;
 real_pcre internal_re;  
3080    
3081  const uschar *req_byte_ptr;  const pcre_uchar *req_char_ptr;
3082  const uschar *start_bits = NULL;  const pcre_uint8 *start_bits = NULL;
3083  BOOL first_byte_caseless = FALSE;  BOOL has_first_char = FALSE;
3084  BOOL req_byte_caseless = FALSE;  BOOL has_req_char = FALSE;
3085  int first_byte = -1;  pcre_uchar first_char = 0;
3086  int req_byte = -1;  pcre_uchar first_char2 = 0;
3087  int req_byte2 = -1;  pcre_uchar req_char = 0;
3088    pcre_uchar req_char2 = 0;
3089  int newline;  int newline;
3090    
3091  /* Plausibility checks */  /* Plausibility checks */
# Line 3049  if (extra_data != NULL) Line 3120  if (extra_data != NULL)
3120    }    }
3121    
3122  /* Check that the first field in the block is the magic number. If it is not,  /* Check that the first field in the block is the magic number. If it is not,
3123  test for a regex that was compiled on a host of opposite endianness. If this is  return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
3124  the case, flipped values are put in internal_re and internal_study if there was  REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
3125  study data too. */  means that the pattern is likely compiled with different endianness. */
3126    
3127  if (re->magic_number != MAGIC_NUMBER)  if (re->magic_number != MAGIC_NUMBER)
3128    {    return re->magic_number == REVERSED_MAGIC_NUMBER?
3129    re = _pcre_try_flipped(re, &internal_re, study, &internal_study);      PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
3130    if (re == NULL) return PCRE_ERROR_BADMAGIC;  if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
   if (study != NULL) study = &internal_study;  
   }  
3131    
3132  /* Set some local values */  /* Set some local values */
3133    
3134  current_subject = (const unsigned char *)subject + start_offset;  current_subject = (const pcre_uchar *)subject + start_offset;
3135  end_subject = (const unsigned char *)subject + length;  end_subject = (const pcre_uchar *)subject + length;
3136  req_byte_ptr = current_subject - 1;  req_char_ptr = current_subject - 1;
3137    
3138  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3139  utf8 = (re->options & PCRE_UTF8) != 0;  /* PCRE_UTF16 has the same value as PCRE_UTF8. */
3140    utf = (re->options & PCRE_UTF8) != 0;
3141  #else  #else
3142  utf8 = FALSE;  utf = FALSE;
3143  #endif  #endif
3144    
3145  anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||  anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||
# Line 3077  anchored = (options & (PCRE_ANCHORED|PCR Line 3147  anchored = (options & (PCRE_ANCHORED|PCR
3147    
3148  /* The remaining fixed data for passing around. */  /* The remaining fixed data for passing around. */
3149    
3150  md->start_code = (const uschar *)argument_re +  md->start_code = (const pcre_uchar *)argument_re +
3151      re->name_table_offset + re->name_count * re->name_entry_size;      re->name_table_offset + re->name_count * re->name_entry_size;
3152  md->start_subject = (const unsigned char *)subject;  md->start_subject = (const pcre_uchar *)subject;
3153  md->end_subject = end_subject;  md->end_subject = end_subject;
3154  md->start_offset = start_offset;  md->start_offset = start_offset;
3155  md->moptions = options;  md->moptions = options;
# Line 3140  else Line 3210  else
3210  /* Check a UTF-8 string if required. Unfortunately there's no way of passing  /* Check a UTF-8 string if required. Unfortunately there's no way of passing
3211  back the character offset. */  back the character offset. */
3212    
3213  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3214  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
3215    {    {
3216    int erroroffset;    int erroroffset;
3217    int errorcode = _pcre_valid_utf8((uschar *)subject, length, &erroroffset);    int errorcode = PRIV(valid_utf)((pcre_uchar *)subject, length, &erroroffset);
3218    if (errorcode != 0)    if (errorcode != 0)
3219      {      {
3220      if (offsetcount >= 2)      if (offsetcount >= 2)
# Line 3156  if (utf8 && (options & PCRE_NO_UTF8_CHEC Line 3226  if (utf8 && (options & PCRE_NO_UTF8_CHEC
3226        PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;        PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
3227      }      }
3228    if (start_offset > 0 && start_offset < length &&    if (start_offset > 0 && start_offset < length &&
3229          (((USPTR)subject)[start_offset] & 0xc0) == 0x80)          NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
3230      return PCRE_ERROR_BADUTF8_OFFSET;      return PCRE_ERROR_BADUTF8_OFFSET;
3231    }    }
3232  #endif  #endif
# Line 3165  if (utf8 && (options & PCRE_NO_UTF8_CHEC Line 3235  if (utf8 && (options & PCRE_NO_UTF8_CHEC
3235  is a feature that makes it possible to save compiled regex and re-use them  is a feature that makes it possible to save compiled regex and re-use them
3236  in other programs later. */  in other programs later. */
3237    
3238  if (md->tables == NULL) md->tables = _pcre_default_tables;  if (md->tables == NULL) md->tables = PRIV(default_tables);
3239    
3240  /* The lower casing table and the "must be at the start of a line" flag are  /* The "must be at the start of a line" flags are used in a loop when finding
3241  used in a loop when finding where to start. */  where to start. */
3242    
 lcc = md->tables + lcc_offset;  
3243  startline = (re->flags & PCRE_STARTLINE) != 0;  startline = (re->flags & PCRE_STARTLINE) != 0;
3244  firstline = (re->options & PCRE_FIRSTLINE) != 0;  firstline = (re->options & PCRE_FIRSTLINE) != 0;
3245    
# Line 3184  if (!anchored) Line 3253  if (!anchored)
3253    {    {
3254    if ((re->flags & PCRE_FIRSTSET) != 0)    if ((re->flags & PCRE_FIRSTSET) != 0)
3255      {      {
3256      first_byte = re->first_byte & 255;      has_first_char = TRUE;
3257      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)      first_char = first_char2 = (pcre_uchar)(re->first_char);
3258        first_byte = lcc[first_byte];      if ((re->flags & PCRE_FCH_CASELESS) != 0)
3259          {
3260          first_char2 = TABLE_GET(first_char, md->tables + fcc_offset, first_char);
3261    #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3262          if (utf && first_char > 127)
3263            first_char2 = UCD_OTHERCASE(first_char);
3264    #endif
3265          }
3266      }      }
3267    else    else
3268      {      {
# Line 3201  character" set. */ Line 3277  character" set. */
3277    
3278  if ((re->flags & PCRE_REQCHSET) != 0)  if ((re->flags & PCRE_REQCHSET) != 0)
3279    {    {
3280    req_byte = re->req_byte & 255;    has_req_char = TRUE;
3281    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;    req_char = req_char2 = (pcre_uchar)(re->req_char);
3282    req_byte2 = (md->tables + fcc_offset)[req_byte];  /* case flipped */    if ((re->flags & PCRE_RCH_CASELESS) != 0)
3283        {
3284        req_char2 = TABLE_GET(req_char, md->tables + fcc_offset, req_char);
3285    #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3286        if (utf && req_char > 127)
3287          req_char2 = UCD_OTHERCASE(req_char);
3288    #endif
3289        }
3290    }    }
3291    
3292  /* Call the main matching function, looping for a non-anchored regex after a  /* Call the main matching function, looping for a non-anchored regex after a
# Line 3216  for (;;) Line 3299  for (;;)
3299    
3300    if ((options & PCRE_DFA_RESTART) == 0)    if ((options & PCRE_DFA_RESTART) == 0)
3301      {      {
3302      const uschar *save_end_subject = end_subject;      const pcre_uchar *save_end_subject = end_subject;
3303    
3304      /* If firstline is TRUE, the start of the match is constrained to the first      /* If firstline is TRUE, the start of the match is constrained to the first
3305      line of a multiline string. Implement this by temporarily adjusting      line of a multiline string. Implement this by temporarily adjusting
# Line 3225  for (;;) Line 3308  for (;;)
3308    
3309      if (firstline)      if (firstline)
3310        {        {
3311        USPTR t = current_subject;        PCRE_PUCHAR t = current_subject;
3312  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3313        if (utf8)        if (utf)
3314          {          {
3315          while (t < md->end_subject && !IS_NEWLINE(t))          while (t < md->end_subject && !IS_NEWLINE(t))
3316            {            {
3317            t++;            t++;
3318            while (t < end_subject && (*t & 0xc0) == 0x80) t++;            ACROSSCHAR(t < end_subject, *t, t++);
3319            }            }
3320          }          }
3321        else        else
# Line 3249  for (;;) Line 3332  for (;;)
3332    
3333      if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)      if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
3334        {        {
3335        /* Advance to a known first byte. */        /* Advance to a known first char. */
3336    
3337        if (first_byte >= 0)        if (has_first_char)
3338          {          {
3339          if (first_byte_caseless)          if (first_char != first_char2)
3340            while (current_subject < end_subject &&            while (current_subject < end_subject &&
3341                   lcc[*current_subject] != first_byte)                *current_subject != first_char && *current_subject != first_char2)
3342              current_subject++;              current_subject++;
3343          else          else
3344            while (current_subject < end_subject &&            while (current_subject < end_subject &&
3345                   *current_subject != first_byte)                   *current_subject != first_char)
3346              current_subject++;              current_subject++;
3347          }          }
3348    
# Line 3269  for (;;) Line 3352  for (;;)
3352          {          {
3353          if (current_subject > md->start_subject + start_offset)          if (current_subject > md->start_subject + start_offset)
3354            {            {
3355  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3356            if (utf8)            if (utf)
3357              {              {
3358              while (current_subject < end_subject &&              while (current_subject < end_subject &&
3359                     !WAS_NEWLINE(current_subject))                     !WAS_NEWLINE(current_subject))
3360                {                {
3361                current_subject++;                current_subject++;
3362                while(current_subject < end_subject &&                ACROSSCHAR(current_subject < end_subject, *current_subject,
3363                      (*current_subject & 0xc0) == 0x80)                  current_subject++);
                 current_subject++;  
3364                }                }
3365              }              }
3366            else            else
# Line 3305  for (;;) Line 3387  for (;;)
3387          while (current_subject < end_subject)          while (current_subject < end_subject)
3388            {            {
3389            register unsigned int c = *current_subject;            register unsigned int c = *current_subject;
3390    #ifndef COMPILE_PCRE8
3391              if (c > 255) c = 255;
3392    #endif
3393            if ((start_bits[c/8] & (1 << (c&7))) == 0)            if ((start_bits[c/8] & (1 << (c&7))) == 0)
3394              {              {
3395              current_subject++;              current_subject++;
3396  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3397              if (utf8)              /* In non 8-bit mode, the iteration will stop for
3398                while(current_subject < end_subject &&              characters > 255 at the beginning or not stop at all. */
3399                      (*current_subject & 0xc0) == 0x80) current_subject++;              if (utf)
3400                  ACROSSCHAR(current_subject < end_subject, *current_subject,
3401                    current_subject++);
3402  #endif  #endif
3403              }              }
3404            else break;            else break;
# Line 3327  for (;;) Line 3414  for (;;)
3414      disabling is explicitly requested (and of course, by the test above, this      disabling is explicitly requested (and of course, by the test above, this
3415      code is not obeyed when restarting after a partial match). */      code is not obeyed when restarting after a partial match). */
3416    
3417      if ((options & PCRE_NO_START_OPTIMIZE) == 0 &&      if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0 &&
3418          (options & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) == 0)          (options & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) == 0)
3419        {        {
3420        /* If the pattern was studied, a minimum subject length may be set. This        /* If the pattern was studied, a minimum subject length may be set. This
# Line 3339  for (;;) Line 3426  for (;;)
3426            (pcre_uint32)(end_subject - current_subject) < study->minlength)            (pcre_uint32)(end_subject - current_subject) < study->minlength)
3427          return PCRE_ERROR_NOMATCH;          return PCRE_ERROR_NOMATCH;
3428    
3429        /* If req_byte is set, we know that that character must appear in the        /* If req_char is set, we know that that character must appear in the
3430        subject for the match to succeed. If the first character is set, req_byte        subject for the match to succeed. If the first character is set, req_char
3431        must be later in the subject; otherwise the test starts at the match        must be later in the subject; otherwise the test starts at the match
3432        point. This optimization can save a huge amount of work in patterns with        point. This optimization can save a huge amount of work in patterns with
3433        nested unlimited repeats that aren't going to match. Writing separate        nested unlimited repeats that aren't going to match. Writing separate
# Line 3352  for (;;) Line 3439  for (;;)
3439        patterns. This showed up when somebody was matching /^C/ on a 32-megabyte        patterns. This showed up when somebody was matching /^C/ on a 32-megabyte
3440        string... so we don't do this when the string is sufficiently long. */        string... so we don't do this when the string is sufficiently long. */
3441    
3442        if (req_byte >= 0 && end_subject - current_subject < REQ_BYTE_MAX)        if (has_req_char && end_subject - current_subject < REQ_BYTE_MAX)
3443          {          {
3444          register const uschar *p = current_subject + ((first_byte >= 0)? 1 : 0);          register PCRE_PUCHAR p = current_subject + (has_first_char? 1:0);
3445    
3446          /* We don't need to repeat the search if we haven't yet reached the          /* We don't need to repeat the search if we haven't yet reached the
3447          place we found it at last time. */          place we found it at last time. */
3448    
3449          if (p > req_byte_ptr)          if (p > req_char_ptr)
3450            {            {
3451            if (req_byte_caseless)            if (req_char != req_char2)
3452              {              {
3453              while (p < end_subject)              while (p < end_subject)
3454                {                {
3455                register int pp = *p++;                register int pp = *p++;
3456                if (pp == req_byte || pp == req_byte2) { p--; break; }                if (pp == req_char || pp == req_char2) { p--; break; }
3457                }                }
3458              }              }
3459            else            else
3460              {              {
3461              while (p < end_subject)              while (p < end_subject)
3462                {                {
3463                if (*p++ == req_byte) { p--; break; }                if (*p++ == req_char) { p--; break; }
3464                }                }
3465              }              }
3466    
# Line 3386  for (;;) Line 3473  for (;;)
3473            found it, so that we don't search again next time round the loop if            found it, so that we don't search again next time round the loop if
3474            the start hasn't passed this character yet. */            the start hasn't passed this character yet. */
3475    
3476            req_byte_ptr = p;            req_char_ptr = p;
3477            }            }
3478          }          }
3479        }        }
# Line 3418  for (;;) Line 3505  for (;;)
3505    
3506    if (firstline && IS_NEWLINE(current_subject)) break;    if (firstline && IS_NEWLINE(current_subject)) break;
3507    current_subject++;    current_subject++;
3508    if (utf8)  #ifdef SUPPORT_UTF
3509      if (utf)
3510      {      {
3511      while (current_subject < end_subject && (*current_subject & 0xc0) == 0x80)      ACROSSCHAR(current_subject < end_subject, *current_subject,
3512        current_subject++;        current_subject++);
3513      }      }
3514    #endif
3515    if (current_subject > end_subject) break;    if (current_subject > end_subject) break;
3516    
3517    /* If we have just passed a CR and we are now at a LF, and the pattern does    /* If we have just passed a CR and we are now at a LF, and the pattern does

Legend:
Removed from v.654  
changed lines
  Added in v.916

  ViewVC Help
Powered by ViewVC 1.1.5