/[pcre]/code/trunk/pcre_dfa_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_dfa_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 680 by ph10, Tue Sep 6 09:15:54 2011 UTC revision 852 by zherczeg, Thu Jan 5 19:18:12 2012 UTC
# Line 7  and semantics are as close as possible t Line 7  and semantics are as close as possible t
7  below for why this module is different).  below for why this module is different).
8    
9                         Written by Philip Hazel                         Written by Philip Hazel
10             Copyright (c) 1997-2011 University of Cambridge             Copyright (c) 1997-2012 University of Cambridge
11    
12  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
13  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 113  small value. Non-zero values in the tabl Line 113  small value. Non-zero values in the tabl
113  the character is to be found. ***NOTE*** If the start of this table is  the character is to be found. ***NOTE*** If the start of this table is
114  modified, the three tables that follow must also be modified. */  modified, the three tables that follow must also be modified. */
115    
116  static const uschar coptable[] = {  static const pcre_uint8 coptable[] = {
117    0,                             /* End                                    */    0,                             /* End                                    */
118    0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */    0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */
119    0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */    0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */
# Line 128  static const uschar coptable[] = { Line 128  static const uschar coptable[] = {
128    1,                             /* noti                                   */    1,                             /* noti                                   */
129    /* Positive single-char repeats                                          */    /* Positive single-char repeats                                          */
130    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
131    3, 3, 3,                       /* upto, minupto, exact                   */    1+IMM2_SIZE, 1+IMM2_SIZE,      /* upto, minupto                          */
132    1, 1, 1, 3,                    /* *+, ++, ?+, upto+                      */    1+IMM2_SIZE,                   /* exact                                  */
133      1, 1, 1, 1+IMM2_SIZE,          /* *+, ++, ?+, upto+                      */
134    1, 1, 1, 1, 1, 1,              /* *I, *?I, +I, +?I, ?I, ??I              */    1, 1, 1, 1, 1, 1,              /* *I, *?I, +I, +?I, ?I, ??I              */
135    3, 3, 3,                       /* upto I, minupto I, exact I             */    1+IMM2_SIZE, 1+IMM2_SIZE,      /* upto I, minupto I                      */
136    1, 1, 1, 3,                    /* *+I, ++I, ?+I, upto+I                  */    1+IMM2_SIZE,                   /* exact I                                */
137      1, 1, 1, 1+IMM2_SIZE,          /* *+I, ++I, ?+I, upto+I                  */
138    /* Negative single-char repeats - only for chars < 256                   */    /* Negative single-char repeats - only for chars < 256                   */
139    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */
140    3, 3, 3,                       /* NOT upto, minupto, exact               */    1+IMM2_SIZE, 1+IMM2_SIZE,      /* NOT upto, minupto                      */
141    1, 1, 1, 3,                    /* NOT *+, ++, ?+, upto+                  */    1+IMM2_SIZE,                   /* NOT exact                              */
142      1, 1, 1, 1+IMM2_SIZE,          /* NOT *+, ++, ?+, upto+                  */
143    1, 1, 1, 1, 1, 1,              /* NOT *I, *?I, +I, +?I, ?I, ??I          */    1, 1, 1, 1, 1, 1,              /* NOT *I, *?I, +I, +?I, ?I, ??I          */
144    3, 3, 3,                       /* NOT upto I, minupto I, exact I         */    1+IMM2_SIZE, 1+IMM2_SIZE,      /* NOT upto I, minupto I                  */
145    1, 1, 1, 3,                    /* NOT *+I, ++I, ?+I, upto+I              */    1+IMM2_SIZE,                   /* NOT exact I                            */
146      1, 1, 1, 1+IMM2_SIZE,          /* NOT *+I, ++I, ?+I, upto+I              */
147    /* Positive type repeats                                                 */    /* Positive type repeats                                                 */
148    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */
149    3, 3, 3,                       /* Type upto, minupto, exact              */    1+IMM2_SIZE, 1+IMM2_SIZE,      /* Type upto, minupto                     */
150    1, 1, 1, 3,                    /* Type *+, ++, ?+, upto+                 */    1+IMM2_SIZE,                   /* Type exact                             */
151      1, 1, 1, 1+IMM2_SIZE,          /* Type *+, ++, ?+, upto+                 */
152    /* Character class & ref repeats                                         */    /* Character class & ref repeats                                         */
153    0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */    0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */
154    0, 0,                          /* CRRANGE, CRMINRANGE                    */    0, 0,                          /* CRRANGE, CRMINRANGE                    */
# Line 164  static const uschar coptable[] = { Line 169  static const uschar coptable[] = {
169    0,                             /* Assert not                             */    0,                             /* Assert not                             */
170    0,                             /* Assert behind                          */    0,                             /* Assert behind                          */
171    0,                             /* Assert behind not                      */    0,                             /* Assert behind not                      */
172    0, 0, 0, 0, 0, 0,              /* ONCE, BRA, BRAPOS, CBRA, CBRAPOS, COND */    0, 0,                          /* ONCE, ONCE_NC                          */
173      0, 0, 0, 0, 0,                 /* BRA, BRAPOS, CBRA, CBRAPOS, COND       */
174    0, 0, 0, 0, 0,                 /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND  */    0, 0, 0, 0, 0,                 /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND  */
175    0, 0,                          /* CREF, NCREF                            */    0, 0,                          /* CREF, NCREF                            */
176    0, 0,                          /* RREF, NRREF                            */    0, 0,                          /* RREF, NRREF                            */
# Line 181  remember the fact that a character could Line 187  remember the fact that a character could
187  the subject is reached. ***NOTE*** If the start of this table is modified, the  the subject is reached. ***NOTE*** If the start of this table is modified, the
188  two tables that follow must also be modified. */  two tables that follow must also be modified. */
189    
190  static const uschar poptable[] = {  static const pcre_uint8 poptable[] = {
191    0,                             /* End                                    */    0,                             /* End                                    */
192    0, 0, 0, 1, 1,                 /* \A, \G, \K, \B, \b                     */    0, 0, 0, 1, 1,                 /* \A, \G, \K, \B, \b                     */
193    1, 1, 1, 1, 1, 1,              /* \D, \d, \S, \s, \W, \w                 */    1, 1, 1, 1, 1, 1,              /* \D, \d, \S, \s, \W, \w                 */
# Line 232  static const uschar poptable[] = { Line 238  static const uschar poptable[] = {
238    0,                             /* Assert not                             */    0,                             /* Assert not                             */
239    0,                             /* Assert behind                          */    0,                             /* Assert behind                          */
240    0,                             /* Assert behind not                      */    0,                             /* Assert behind not                      */
241    0, 0, 0, 0, 0, 0,              /* ONCE, BRA, BRAPOS, CBRA, CBRAPOS, COND */    0, 0,                          /* ONCE, ONCE_NC                          */
242      0, 0, 0, 0, 0,                 /* BRA, BRAPOS, CBRA, CBRAPOS, COND       */
243    0, 0, 0, 0, 0,                 /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND  */    0, 0, 0, 0, 0,                 /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND  */
244    0, 0,                          /* CREF, NCREF                            */    0, 0,                          /* CREF, NCREF                            */
245    0, 0,                          /* RREF, NRREF                            */    0, 0,                          /* RREF, NRREF                            */
# Line 247  static const uschar poptable[] = { Line 254  static const uschar poptable[] = {
254  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
255  and \w */  and \w */
256    
257  static const uschar toptable1[] = {  static const pcre_uint8 toptable1[] = {
258    0, 0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
259    ctype_digit, ctype_digit,    ctype_digit, ctype_digit,
260    ctype_space, ctype_space,    ctype_space, ctype_space,
# Line 255  static const uschar toptable1[] = { Line 262  static const uschar toptable1[] = {
262    0, 0                            /* OP_ANY, OP_ALLANY */    0, 0                            /* OP_ANY, OP_ALLANY */
263  };  };
264    
265  static const uschar toptable2[] = {  static const pcre_uint8 toptable2[] = {
266    0, 0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
267    ctype_digit, 0,    ctype_digit, 0,
268    ctype_space, 0,    ctype_space, 0,
# Line 294  Returns:       nothing Line 301  Returns:       nothing
301  */  */
302    
303  static void  static void
304  pchars(unsigned char *p, int length, FILE *f)  pchars(const pcre_uchar *p, int length, FILE *f)
305  {  {
306  int c;  int c;
307  while (length-- > 0)  while (length-- > 0)
# Line 384  for the current character, one for the f Line 391  for the current character, one for the f
391  static int  static int
392  internal_dfa_exec(  internal_dfa_exec(
393    dfa_match_data *md,    dfa_match_data *md,
394    const uschar *this_start_code,    const pcre_uchar *this_start_code,
395    const uschar *current_subject,    const pcre_uchar *current_subject,
396    int start_offset,    int start_offset,
397    int *offsets,    int *offsets,
398    int offsetcount,    int offsetcount,
# Line 396  internal_dfa_exec( Line 403  internal_dfa_exec(
403  stateblock *active_states, *new_states, *temp_states;  stateblock *active_states, *new_states, *temp_states;
404  stateblock *next_active_state, *next_new_state;  stateblock *next_active_state, *next_new_state;
405    
406  const uschar *ctypes, *lcc, *fcc;  const pcre_uint8 *ctypes, *lcc, *fcc;
407  const uschar *ptr;  const pcre_uchar *ptr;
408  const uschar *end_code, *first_op;  const pcre_uchar *end_code, *first_op;
409    
410  dfa_recursion_info new_recursive;  dfa_recursion_info new_recursive;
411    
# Line 407  int active_count, new_count, match_count Line 414  int active_count, new_count, match_count
414  /* Some fields in the md block are frequently referenced, so we load them into  /* Some fields in the md block are frequently referenced, so we load them into
415  independent variables in the hope that this will perform better. */  independent variables in the hope that this will perform better. */
416    
417  const uschar *start_subject = md->start_subject;  const pcre_uchar *start_subject = md->start_subject;
418  const uschar *end_subject = md->end_subject;  const pcre_uchar *end_subject = md->end_subject;
419  const uschar *start_code = md->start_code;  const pcre_uchar *start_code = md->start_code;
420    
421  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
422  BOOL utf8 = (md->poptions & PCRE_UTF8) != 0;  BOOL utf = (md->poptions & PCRE_UTF8) != 0;
423  #else  #else
424  BOOL utf8 = FALSE;  BOOL utf = FALSE;
425  #endif  #endif
426    
427  rlevel++;  rlevel++;
# Line 440  new_count = 0; Line 447  new_count = 0;
447    
448  first_op = this_start_code + 1 + LINK_SIZE +  first_op = this_start_code + 1 + LINK_SIZE +
449    ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||    ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
450      *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)? 2:0);      *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
451        ? IMM2_SIZE:0);
452    
453  /* The first thing in any (sub) pattern is a bracket of some sort. Push all  /* The first thing in any (sub) pattern is a bracket of some sort. Push all
454  the alternative states onto the list, and find out where the end is. This  the alternative states onto the list, and find out where the end is. This
# Line 468  if (*first_op == OP_REVERSE) Line 476  if (*first_op == OP_REVERSE)
476    /* If we can't go back the amount required for the longest lookbehind    /* If we can't go back the amount required for the longest lookbehind
477    pattern, go back as far as we can; some alternatives may still be viable. */    pattern, go back as far as we can; some alternatives may still be viable. */
478    
479  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
480    /* In character mode we have to step back character by character */    /* In character mode we have to step back character by character */
481    
482    if (utf8)    if (utf)
483      {      {
484      for (gone_back = 0; gone_back < max_back; gone_back++)      for (gone_back = 0; gone_back < max_back; gone_back++)
485        {        {
486        if (current_subject <= start_subject) break;        if (current_subject <= start_subject) break;
487        current_subject--;        current_subject--;
488        while (current_subject > start_subject &&        ACROSSCHAR(current_subject > start_subject, *current_subject, current_subject--);
              (*current_subject & 0xc0) == 0x80)  
         current_subject--;  
489        }        }
490      }      }
491    else    else
# Line 540  else Line 546  else
546      {      {
547      int length = 1 + LINK_SIZE +      int length = 1 + LINK_SIZE +
548        ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||        ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
549          *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)?          *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
550          2:0);          ? IMM2_SIZE:0);
551      do      do
552        {        {
553        ADD_NEW((int)(end_code - start_code + length), 0);        ADD_NEW((int)(end_code - start_code + length), 0);
# Line 554  else Line 560  else
560    
561  workspace[0] = 0;    /* Bit indicating which vector is current */  workspace[0] = 0;    /* Bit indicating which vector is current */
562    
563  DPRINTF(("%.*sEnd state = %d\n", rlevel*2-2, SP, end_code - start_code));  DPRINTF(("%.*sEnd state = %d\n", rlevel*2-2, SP, (int)(end_code - start_code)));
564    
565  /* Loop for scanning the subject */  /* Loop for scanning the subject */
566    
# Line 581  for (;;) Line 587  for (;;)
587    
588  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
589    printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP);    printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP);
590    pchars((uschar *)ptr, strlen((char *)ptr), stdout);    pchars(ptr, STRLEN_UC(ptr), stdout);
591    printf("\"\n");    printf("\"\n");
592    
593    printf("%.*sActive states: ", rlevel*2-2, SP);    printf("%.*sActive states: ", rlevel*2-2, SP);
# Line 602  for (;;) Line 608  for (;;)
608    if (ptr < end_subject)    if (ptr < end_subject)
609      {      {
610      clen = 1;        /* Number of bytes in the character */      clen = 1;        /* Number of bytes in the character */
611  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
612      if (utf8) { GETCHARLEN(c, ptr, clen); } else      if (utf) { GETCHARLEN(c, ptr, clen); } else
613  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
614      c = *ptr;      c = *ptr;
615      }      }
616    else    else
# Line 622  for (;;) Line 628  for (;;)
628      {      {
629      stateblock *current_state = active_states + i;      stateblock *current_state = active_states + i;
630      BOOL caseless = FALSE;      BOOL caseless = FALSE;
631      const uschar *code;      const pcre_uchar *code;
632      int state_offset = current_state->offset;      int state_offset = current_state->offset;
633      int count, codevalue, rrc;      int count, codevalue, rrc;
634    
# Line 691  for (;;) Line 697  for (;;)
697      if (coptable[codevalue] > 0)      if (coptable[codevalue] > 0)
698        {        {
699        dlen = 1;        dlen = 1;
700  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
701        if (utf8) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else        if (utf) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else
702  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
703        d = code[coptable[codevalue]];        d = code[coptable[codevalue]];
704        if (codevalue >= OP_TYPESTAR)        if (codevalue >= OP_TYPESTAR)
705          {          {
# Line 814  for (;;) Line 820  for (;;)
820        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
821        case OP_CBRA:        case OP_CBRA:
822        case OP_SCBRA:        case OP_SCBRA:
823        ADD_ACTIVE((int)(code - start_code + 3 + LINK_SIZE),  0);        ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE + IMM2_SIZE),  0);
824        code += GET(code, 1);        code += GET(code, 1);
825        while (*code == OP_ALT)        while (*code == OP_ALT)
826          {          {
# Line 954  for (;;) Line 960  for (;;)
960    
961          if (ptr > start_subject)          if (ptr > start_subject)
962            {            {
963            const uschar *temp = ptr - 1;            const pcre_uchar *temp = ptr - 1;
964            if (temp < md->start_used_ptr) md->start_used_ptr = temp;            if (temp < md->start_used_ptr) md->start_used_ptr = temp;
965  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
966            if (utf8) BACKCHAR(temp);            if (utf) { BACKCHAR(temp); }
967  #endif  #endif
968            GETCHARTEST(d, temp);            GETCHARTEST(d, temp);
969  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 1022  for (;;) Line 1028  for (;;)
1028            break;            break;
1029    
1030            case PT_GC:            case PT_GC:
1031            OK = _pcre_ucp_gentype[prop->chartype] == code[2];            OK = PRIV(ucp_gentype)[prop->chartype] == code[2];
1032            break;            break;
1033    
1034            case PT_PC:            case PT_PC:
# Line 1036  for (;;) Line 1042  for (;;)
1042            /* These are specials for combination cases. */            /* These are specials for combination cases. */
1043    
1044            case PT_ALNUM:            case PT_ALNUM:
1045            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1046                 _pcre_ucp_gentype[prop->chartype] == ucp_N;                 PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1047            break;            break;
1048    
1049            case PT_SPACE:    /* Perl space */            case PT_SPACE:    /* Perl space */
1050            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1051                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
1052            break;            break;
1053    
1054            case PT_PXSPACE:  /* POSIX space */            case PT_PXSPACE:  /* POSIX space */
1055            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1056                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
1057                 c == CHAR_FF || c == CHAR_CR;                 c == CHAR_FF || c == CHAR_CR;
1058            break;            break;
1059    
1060            case PT_WORD:            case PT_WORD:
1061            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1062                 _pcre_ucp_gentype[prop->chartype] == ucp_N ||                 PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1063                 c == CHAR_UNDERSCORE;                 c == CHAR_UNDERSCORE;
1064            break;            break;
1065    
# Line 1155  for (;;) Line 1161  for (;;)
1161                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
1162            {            {
1163            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1164              { ADD_NEW(state_offset + 4, 0); }              { ADD_NEW(state_offset + 1 + IMM2_SIZE + 1, 0); }
1165            else            else
1166              { ADD_NEW(state_offset, count); }              { ADD_NEW(state_offset, count); }
1167            }            }
# Line 1166  for (;;) Line 1172  for (;;)
1172        case OP_TYPEUPTO:        case OP_TYPEUPTO:
1173        case OP_TYPEMINUPTO:        case OP_TYPEMINUPTO:
1174        case OP_TYPEPOSUPTO:        case OP_TYPEPOSUPTO:
1175        ADD_ACTIVE(state_offset + 4, 0);        ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0);
1176        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1177        if (clen > 0)        if (clen > 0)
1178          {          {
# Line 1181  for (;;) Line 1187  for (;;)
1187              next_active_state--;              next_active_state--;
1188              }              }
1189            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1190              { ADD_NEW(state_offset + 4, 0); }              { ADD_NEW(state_offset + 2 + IMM2_SIZE, 0); }
1191            else            else
1192              { ADD_NEW(state_offset, count); }              { ADD_NEW(state_offset, count); }
1193            }            }
# Line 1216  for (;;) Line 1222  for (;;)
1222            break;            break;
1223    
1224            case PT_GC:            case PT_GC:
1225            OK = _pcre_ucp_gentype[prop->chartype] == code[3];            OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
1226            break;            break;
1227    
1228            case PT_PC:            case PT_PC:
# Line 1230  for (;;) Line 1236  for (;;)
1236            /* These are specials for combination cases. */            /* These are specials for combination cases. */
1237    
1238            case PT_ALNUM:            case PT_ALNUM:
1239            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1240                 _pcre_ucp_gentype[prop->chartype] == ucp_N;                 PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1241            break;            break;
1242    
1243            case PT_SPACE:    /* Perl space */            case PT_SPACE:    /* Perl space */
1244            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1245                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
1246            break;            break;
1247    
1248            case PT_PXSPACE:  /* POSIX space */            case PT_PXSPACE:  /* POSIX space */
1249            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1250                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
1251                 c == CHAR_FF || c == CHAR_CR;                 c == CHAR_FF || c == CHAR_CR;
1252            break;            break;
1253    
1254            case PT_WORD:            case PT_WORD:
1255            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1256                 _pcre_ucp_gentype[prop->chartype] == ucp_N ||                 PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1257                 c == CHAR_UNDERSCORE;                 c == CHAR_UNDERSCORE;
1258            break;            break;
1259    
# Line 1279  for (;;) Line 1285  for (;;)
1285        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1286        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
1287          {          {
1288          const uschar *nptr = ptr + clen;          const pcre_uchar *nptr = ptr + clen;
1289          int ncount = 0;          int ncount = 0;
1290          if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)          if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
1291            {            {
# Line 1463  for (;;) Line 1469  for (;;)
1469            break;            break;
1470    
1471            case PT_GC:            case PT_GC:
1472            OK = _pcre_ucp_gentype[prop->chartype] == code[3];            OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
1473            break;            break;
1474    
1475            case PT_PC:            case PT_PC:
# Line 1477  for (;;) Line 1483  for (;;)
1483            /* These are specials for combination cases. */            /* These are specials for combination cases. */
1484    
1485            case PT_ALNUM:            case PT_ALNUM:
1486            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1487                 _pcre_ucp_gentype[prop->chartype] == ucp_N;                 PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1488            break;            break;
1489    
1490            case PT_SPACE:    /* Perl space */            case PT_SPACE:    /* Perl space */
1491            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1492                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
1493            break;            break;
1494    
1495            case PT_PXSPACE:  /* POSIX space */            case PT_PXSPACE:  /* POSIX space */
1496            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1497                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
1498                 c == CHAR_FF || c == CHAR_CR;                 c == CHAR_FF || c == CHAR_CR;
1499            break;            break;
1500    
1501            case PT_WORD:            case PT_WORD:
1502            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1503                 _pcre_ucp_gentype[prop->chartype] == ucp_N ||                 PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1504                 c == CHAR_UNDERSCORE;                 c == CHAR_UNDERSCORE;
1505            break;            break;
1506    
# Line 1535  for (;;) Line 1541  for (;;)
1541        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
1542        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
1543          {          {
1544          const uschar *nptr = ptr + clen;          const pcre_uchar *nptr = ptr + clen;
1545          int ncount = 0;          int ncount = 0;
1546          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
1547              codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)              codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)
# Line 1717  for (;;) Line 1723  for (;;)
1723        case OP_PROP_EXTRA + OP_TYPEMINUPTO:        case OP_PROP_EXTRA + OP_TYPEMINUPTO:
1724        case OP_PROP_EXTRA + OP_TYPEPOSUPTO:        case OP_PROP_EXTRA + OP_TYPEPOSUPTO:
1725        if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)
1726          { ADD_ACTIVE(state_offset + 6, 0); }          { ADD_ACTIVE(state_offset + 1 + IMM2_SIZE + 3, 0); }
1727        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1728        if (clen > 0)        if (clen > 0)
1729          {          {
1730          BOOL OK;          BOOL OK;
1731          const ucd_record * prop = GET_UCD(c);          const ucd_record * prop = GET_UCD(c);
1732          switch(code[4])          switch(code[1 + IMM2_SIZE + 1])
1733            {            {
1734            case PT_ANY:            case PT_ANY:
1735            OK = TRUE;            OK = TRUE;
# Line 1735  for (;;) Line 1741  for (;;)
1741            break;            break;
1742    
1743            case PT_GC:            case PT_GC:
1744            OK = _pcre_ucp_gentype[prop->chartype] == code[5];            OK = PRIV(ucp_gentype)[prop->chartype] == code[1 + IMM2_SIZE + 2];
1745            break;            break;
1746    
1747            case PT_PC:            case PT_PC:
1748            OK = prop->chartype == code[5];            OK = prop->chartype == code[1 + IMM2_SIZE + 2];
1749            break;            break;
1750    
1751            case PT_SC:            case PT_SC:
1752            OK = prop->script == code[5];            OK = prop->script == code[1 + IMM2_SIZE + 2];
1753            break;            break;
1754    
1755            /* These are specials for combination cases. */            /* These are specials for combination cases. */
1756    
1757            case PT_ALNUM:            case PT_ALNUM:
1758            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1759                 _pcre_ucp_gentype[prop->chartype] == ucp_N;                 PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1760            break;            break;
1761    
1762            case PT_SPACE:    /* Perl space */            case PT_SPACE:    /* Perl space */
1763            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1764                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
1765            break;            break;
1766    
1767            case PT_PXSPACE:  /* POSIX space */            case PT_PXSPACE:  /* POSIX space */
1768            OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1769                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
1770                 c == CHAR_FF || c == CHAR_CR;                 c == CHAR_FF || c == CHAR_CR;
1771            break;            break;
1772    
1773            case PT_WORD:            case PT_WORD:
1774            OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||            OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1775                 _pcre_ucp_gentype[prop->chartype] == ucp_N ||                 PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1776                 c == CHAR_UNDERSCORE;                 c == CHAR_UNDERSCORE;
1777            break;            break;
1778    
# Line 1785  for (;;) Line 1791  for (;;)
1791              next_active_state--;              next_active_state--;
1792              }              }
1793            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1794              { ADD_NEW(state_offset + 6, 0); }              { ADD_NEW(state_offset + 1 + IMM2_SIZE + 3, 0); }
1795            else            else
1796              { ADD_NEW(state_offset, count); }              { ADD_NEW(state_offset, count); }
1797            }            }
# Line 1798  for (;;) Line 1804  for (;;)
1804        case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:        case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:
1805        case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:        case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:
1806        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
1807          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1808        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1809        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
1810          {          {
1811          const uschar *nptr = ptr + clen;          const pcre_uchar *nptr = ptr + clen;
1812          int ncount = 0;          int ncount = 0;
1813          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
1814            {            {
# Line 1819  for (;;) Line 1825  for (;;)
1825            nptr += ndlen;            nptr += ndlen;
1826            }            }
1827          if (++count >= GET2(code, 1))          if (++count >= GET2(code, 1))
1828            { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }            { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
1829          else          else
1830            { ADD_NEW_DATA(-state_offset, count, ncount); }            { ADD_NEW_DATA(-state_offset, count, ncount); }
1831          }          }
# Line 1832  for (;;) Line 1838  for (;;)
1838        case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:        case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:
1839        case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:        case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:
1840        if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)
1841          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1842        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1843        if (clen > 0)        if (clen > 0)
1844          {          {
# Line 1859  for (;;) Line 1865  for (;;)
1865              next_active_state--;              next_active_state--;
1866              }              }
1867            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1868              { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }              { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
1869            else            else
1870              { ADD_NEW_DATA(-state_offset, count, ncount); }              { ADD_NEW_DATA(-state_offset, count, ncount); }
1871            break;            break;
# Line 1876  for (;;) Line 1882  for (;;)
1882        case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:        case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:
1883        case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:        case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:
1884        if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)
1885          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1886        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1887        if (clen > 0)        if (clen > 0)
1888          {          {
# Line 1905  for (;;) Line 1911  for (;;)
1911              next_active_state--;              next_active_state--;
1912              }              }
1913            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1914              { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }              { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
1915            else            else
1916              { ADD_NEW_DATA(-state_offset, count, 0); }              { ADD_NEW_DATA(-state_offset, count, 0); }
1917            }            }
# Line 1918  for (;;) Line 1924  for (;;)
1924        case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:        case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:
1925        case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:        case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:
1926        if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)
1927          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1928        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1929        if (clen > 0)        if (clen > 0)
1930          {          {
# Line 1960  for (;;) Line 1966  for (;;)
1966              next_active_state--;              next_active_state--;
1967              }              }
1968            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1969              { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }              { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
1970            else            else
1971              { ADD_NEW_DATA(-state_offset, count, 0); }              { ADD_NEW_DATA(-state_offset, count, 0); }
1972            }            }
# Line 1982  for (;;) Line 1988  for (;;)
1988        case OP_CHARI:        case OP_CHARI:
1989        if (clen == 0) break;        if (clen == 0) break;
1990    
1991  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
1992        if (utf8)        if (utf)
1993          {          {
1994          if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else          if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
1995            {            {
1996            unsigned int othercase;            unsigned int othercase;
1997            if (c < 128) othercase = fcc[c]; else            if (c < 128)
1998                othercase = fcc[c];
1999            /* If we have Unicode property support, we can use it to test the            else
2000            other case of the character. */              /* If we have Unicode property support, we can use it to test the
2001                other case of the character. */
2002  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2003            othercase = UCD_OTHERCASE(c);              othercase = UCD_OTHERCASE(c);
2004  #else  #else
2005            othercase = NOTACHAR;              othercase = NOTACHAR;
2006  #endif  #endif
2007    
2008            if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }            if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }
2009            }            }
2010          }          }
2011        else        else
2012  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2013          /* Not UTF mode */
       /* Non-UTF-8 mode */  
2014          {          {
2015          if (lcc[c] == lcc[d]) { ADD_NEW(state_offset + 2, 0); }          if (TABLE_GET(c, lcc, c) == TABLE_GET(d, lcc, d))
2016              { ADD_NEW(state_offset + 2, 0); }
2017          }          }
2018        break;        break;
2019    
# Line 2021  for (;;) Line 2027  for (;;)
2027        case OP_EXTUNI:        case OP_EXTUNI:
2028        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
2029          {          {
2030          const uschar *nptr = ptr + clen;          const pcre_uchar *nptr = ptr + clen;
2031          int ncount = 0;          int ncount = 0;
2032          while (nptr < end_subject)          while (nptr < end_subject)
2033            {            {
# Line 2207  for (;;) Line 2213  for (;;)
2213          unsigned int otherd = NOTACHAR;          unsigned int otherd = NOTACHAR;
2214          if (caseless)          if (caseless)
2215            {            {
2216  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2217            if (utf8 && d >= 128)            if (utf && d >= 128)
2218              {              {
2219  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2220              otherd = UCD_OTHERCASE(d);              otherd = UCD_OTHERCASE(d);
2221  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2222              }              }
2223            else            else
2224  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2225            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2226            }            }
2227          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2228            {            {
# Line 2254  for (;;) Line 2260  for (;;)
2260          unsigned int otherd = NOTACHAR;          unsigned int otherd = NOTACHAR;
2261          if (caseless)          if (caseless)
2262            {            {
2263  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2264            if (utf8 && d >= 128)            if (utf && d >= 128)
2265              {              {
2266  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2267              otherd = UCD_OTHERCASE(d);              otherd = UCD_OTHERCASE(d);
2268  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2269              }              }
2270            else            else
2271  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2272            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2273            }            }
2274          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2275            {            {
# Line 2299  for (;;) Line 2305  for (;;)
2305          unsigned int otherd = NOTACHAR;          unsigned int otherd = NOTACHAR;
2306          if (caseless)          if (caseless)
2307            {            {
2308  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2309            if (utf8 && d >= 128)            if (utf && d >= 128)
2310              {              {
2311  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2312              otherd = UCD_OTHERCASE(d);              otherd = UCD_OTHERCASE(d);
2313  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2314              }              }
2315            else            else
2316  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2317            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2318            }            }
2319          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2320            {            {
# Line 2336  for (;;) Line 2342  for (;;)
2342          unsigned int otherd = NOTACHAR;          unsigned int otherd = NOTACHAR;
2343          if (caseless)          if (caseless)
2344            {            {
2345  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2346            if (utf8 && d >= 128)            if (utf && d >= 128)
2347              {              {
2348  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2349              otherd = UCD_OTHERCASE(d);              otherd = UCD_OTHERCASE(d);
2350  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2351              }              }
2352            else            else
2353  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2354            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2355            }            }
2356          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2357            {            {
2358            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
2359              { ADD_NEW(state_offset + dlen + 3, 0); }              { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
2360            else            else
2361              { ADD_NEW(state_offset, count); }              { ADD_NEW(state_offset, count); }
2362            }            }
# Line 2373  for (;;) Line 2379  for (;;)
2379        case OP_NOTUPTO:        case OP_NOTUPTO:
2380        case OP_NOTMINUPTO:        case OP_NOTMINUPTO:
2381        case OP_NOTPOSUPTO:        case OP_NOTPOSUPTO:
2382        ADD_ACTIVE(state_offset + dlen + 3, 0);        ADD_ACTIVE(state_offset + dlen + 1 + IMM2_SIZE, 0);
2383        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
2384        if (clen > 0)        if (clen > 0)
2385          {          {
2386          unsigned int otherd = NOTACHAR;          unsigned int otherd = NOTACHAR;
2387          if (caseless)          if (caseless)
2388            {            {
2389  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2390            if (utf8 && d >= 128)            if (utf && d >= 128)
2391              {              {
2392  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2393              otherd = UCD_OTHERCASE(d);              otherd = UCD_OTHERCASE(d);
2394  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2395              }              }
2396            else            else
2397  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2398            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2399            }            }
2400          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2401            {            {
# Line 2399  for (;;) Line 2405  for (;;)
2405              next_active_state--;              next_active_state--;
2406              }              }
2407            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
2408              { ADD_NEW(state_offset + dlen + 3, 0); }              { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
2409            else            else
2410              { ADD_NEW(state_offset, count); }              { ADD_NEW(state_offset, count); }
2411            }            }
# Line 2416  for (;;) Line 2422  for (;;)
2422          {          {
2423          BOOL isinclass = FALSE;          BOOL isinclass = FALSE;
2424          int next_state_offset;          int next_state_offset;
2425          const uschar *ecode;          const pcre_uchar *ecode;
2426    
2427          /* For a simple class, there is always just a 32-byte table, and we          /* For a simple class, there is always just a 32-byte table, and we
2428          can set isinclass from it. */          can set isinclass from it. */
2429    
2430          if (codevalue != OP_XCLASS)          if (codevalue != OP_XCLASS)
2431            {            {
2432            ecode = code + 33;            ecode = code + 1 + (32 / sizeof(pcre_uchar));
2433            if (clen > 0)            if (clen > 0)
2434              {              {
2435              isinclass = (c > 255)? (codevalue == OP_NCLASS) :              isinclass = (c > 255)? (codevalue == OP_NCLASS) :
2436                ((code[1 + c/8] & (1 << (c&7))) != 0);                ((((pcre_uint8 *)(code + 1))[c/8] & (1 << (c&7))) != 0);
2437              }              }
2438            }            }
2439    
# Line 2438  for (;;) Line 2444  for (;;)
2444          else          else
2445           {           {
2446           ecode = code + GET(code, 1);           ecode = code + GET(code, 1);
2447           if (clen > 0) isinclass = _pcre_xclass(c, code + 1 + LINK_SIZE);           if (clen > 0) isinclass = PRIV(xclass)(c, code + 1 + LINK_SIZE, utf);
2448           }           }
2449    
2450          /* At this point, isinclass is set for all kinds of class, and ecode          /* At this point, isinclass is set for all kinds of class, and ecode
# Line 2472  for (;;) Line 2478  for (;;)
2478            case OP_CRMINRANGE:            case OP_CRMINRANGE:
2479            count = current_state->count;  /* Already matched */            count = current_state->count;  /* Already matched */
2480            if (count >= GET2(ecode, 1))            if (count >= GET2(ecode, 1))
2481              { ADD_ACTIVE(next_state_offset + 5, 0); }              { ADD_ACTIVE(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
2482            if (isinclass)            if (isinclass)
2483              {              {
2484              int max = GET2(ecode, 3);              int max = GET2(ecode, 1 + IMM2_SIZE);
2485              if (++count >= max && max != 0)   /* Max 0 => no limit */              if (++count >= max && max != 0)   /* Max 0 => no limit */
2486                { ADD_NEW(next_state_offset + 5, 0); }                { ADD_NEW(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
2487              else              else
2488                { ADD_NEW(state_offset, count); }                { ADD_NEW(state_offset, count); }
2489              }              }
# Line 2508  for (;;) Line 2514  for (;;)
2514          int rc;          int rc;
2515          int local_offsets[2];          int local_offsets[2];
2516          int local_workspace[1000];          int local_workspace[1000];
2517          const uschar *endasscode = code + GET(code, 1);          const pcre_uchar *endasscode = code + GET(code, 1);
2518    
2519          while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);          while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
2520    
# Line 2545  for (;;) Line 2551  for (;;)
2551          if (code[LINK_SIZE+1] == OP_CALLOUT)          if (code[LINK_SIZE+1] == OP_CALLOUT)
2552            {            {
2553            rrc = 0;            rrc = 0;
2554            if (pcre_callout != NULL)            if (PUBL(callout) != NULL)
2555              {              {
2556              pcre_callout_block cb;              PUBL(callout_block) cb;
2557              cb.version          = 1;   /* Version 1 of the callout block */              cb.version          = 1;   /* Version 1 of the callout block */
2558              cb.callout_number   = code[LINK_SIZE+2];              cb.callout_number   = code[LINK_SIZE+2];
2559              cb.offset_vector    = offsets;              cb.offset_vector    = offsets;
2560    #ifdef COMPILE_PCRE8
2561              cb.subject          = (PCRE_SPTR)start_subject;              cb.subject          = (PCRE_SPTR)start_subject;
2562    #else
2563                cb.subject          = (PCRE_SPTR16)start_subject;
2564    #endif
2565              cb.subject_length   = (int)(end_subject - start_subject);              cb.subject_length   = (int)(end_subject - start_subject);
2566              cb.start_match      = (int)(current_subject - start_subject);              cb.start_match      = (int)(current_subject - start_subject);
2567              cb.current_position = (int)(ptr - start_subject);              cb.current_position = (int)(ptr - start_subject);
# Line 2561  for (;;) Line 2571  for (;;)
2571              cb.capture_last     = -1;              cb.capture_last     = -1;
2572              cb.callout_data     = md->callout_data;              cb.callout_data     = md->callout_data;
2573              cb.mark             = NULL;   /* No (*MARK) support */              cb.mark             = NULL;   /* No (*MARK) support */
2574              if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */              if ((rrc = (*PUBL(callout))(&cb)) < 0) return rrc;   /* Abandon */
2575              }              }
2576            if (rrc > 0) break;                      /* Fail this thread */            if (rrc > 0) break;                      /* Fail this thread */
2577            code += _pcre_OP_lengths[OP_CALLOUT];    /* Skip callout data */            code += PRIV(OP_lengths)[OP_CALLOUT];    /* Skip callout data */
2578            }            }
2579    
2580          condcode = code[LINK_SIZE+1];          condcode = code[LINK_SIZE+1];
# Line 2585  for (;;) Line 2595  for (;;)
2595    
2596          else if (condcode == OP_RREF || condcode == OP_NRREF)          else if (condcode == OP_RREF || condcode == OP_NRREF)
2597            {            {
2598            int value = GET2(code, LINK_SIZE+2);            int value = GET2(code, LINK_SIZE + 2);
2599            if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;            if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
2600            if (md->recursive != NULL)            if (md->recursive != NULL)
2601              { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }              { ADD_ACTIVE(state_offset + LINK_SIZE + 2 + IMM2_SIZE, 0); }
2602            else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }            else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
2603            }            }
2604    
# Line 2597  for (;;) Line 2607  for (;;)
2607          else          else
2608            {            {
2609            int rc;            int rc;
2610            const uschar *asscode = code + LINK_SIZE + 1;            const pcre_uchar *asscode = code + LINK_SIZE + 1;
2611            const uschar *endasscode = asscode + GET(asscode, 1);            const pcre_uchar *endasscode = asscode + GET(asscode, 1);
2612    
2613            while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);            while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
2614    
# Line 2629  for (;;) Line 2639  for (;;)
2639          dfa_recursion_info *ri;          dfa_recursion_info *ri;
2640          int local_offsets[1000];          int local_offsets[1000];
2641          int local_workspace[1000];          int local_workspace[1000];
2642          const uschar *callpat = start_code + GET(code, 1);          const pcre_uchar *callpat = start_code + GET(code, 1);
2643          int recno = (callpat == md->start_code)? 0 :          int recno = (callpat == md->start_code)? 0 :
2644            GET2(callpat, 1 + LINK_SIZE);            GET2(callpat, 1 + LINK_SIZE);
2645          int rc;          int rc;
# Line 2680  for (;;) Line 2690  for (;;)
2690            {            {
2691            for (rc = rc*2 - 2; rc >= 0; rc -= 2)            for (rc = rc*2 - 2; rc >= 0; rc -= 2)
2692              {              {
2693              const uschar *p = start_subject + local_offsets[rc];              const pcre_uchar *p = start_subject + local_offsets[rc];
2694              const uschar *pp = start_subject + local_offsets[rc+1];              const pcre_uchar *pp = start_subject + local_offsets[rc+1];
2695              int charcount = local_offsets[rc+1] - local_offsets[rc];              int charcount = local_offsets[rc+1] - local_offsets[rc];
2696              while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;  #ifdef SUPPORT_UTF
2697                while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2698    #endif
2699              if (charcount > 0)              if (charcount > 0)
2700                {                {
2701                ADD_NEW_DATA(-(state_offset + LINK_SIZE + 1), 0, (charcount - 1));                ADD_NEW_DATA(-(state_offset + LINK_SIZE + 1), 0, (charcount - 1));
# Line 2706  for (;;) Line 2718  for (;;)
2718        case OP_BRAPOSZERO:        case OP_BRAPOSZERO:
2719          {          {
2720          int charcount, matched_count;          int charcount, matched_count;
2721          const uschar *local_ptr = ptr;          const pcre_uchar *local_ptr = ptr;
2722          BOOL allow_zero;          BOOL allow_zero;
2723    
2724          if (codevalue == OP_BRAPOSZERO)          if (codevalue == OP_BRAPOSZERO)
# Line 2756  for (;;) Line 2768  for (;;)
2768    
2769          if (matched_count > 0 || allow_zero)          if (matched_count > 0 || allow_zero)
2770            {            {
2771            const uschar *end_subpattern = code;            const pcre_uchar *end_subpattern = code;
2772            int next_state_offset;            int next_state_offset;
2773    
2774            do { end_subpattern += GET(end_subpattern, 1); }            do { end_subpattern += GET(end_subpattern, 1); }
# Line 2777  for (;;) Line 2789  for (;;)
2789              }              }
2790            else            else
2791              {              {
2792              const uschar *p = ptr;              const pcre_uchar *p = ptr;
2793              const uschar *pp = local_ptr;              const pcre_uchar *pp = local_ptr;
2794              charcount = pp - p;              charcount = (int)(pp - p);
2795              while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;  #ifdef SUPPORT_UTF
2796                while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2797    #endif
2798              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
2799              }              }
2800            }            }
# Line 2789  for (;;) Line 2803  for (;;)
2803    
2804        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2805        case OP_ONCE:        case OP_ONCE:
2806          case OP_ONCE_NC:
2807          {          {
2808          int local_offsets[2];          int local_offsets[2];
2809          int local_workspace[1000];          int local_workspace[1000];
# Line 2806  for (;;) Line 2821  for (;;)
2821    
2822          if (rc >= 0)          if (rc >= 0)
2823            {            {
2824            const uschar *end_subpattern = code;            const pcre_uchar *end_subpattern = code;
2825            int charcount = local_offsets[1] - local_offsets[0];            int charcount = local_offsets[1] - local_offsets[0];
2826            int next_state_offset, repeat_state_offset;            int next_state_offset, repeat_state_offset;
2827    
# Line 2859  for (;;) Line 2874  for (;;)
2874              }              }
2875            else            else
2876              {              {
2877              const uschar *p = start_subject + local_offsets[0];  #ifdef SUPPORT_UTF
2878              const uschar *pp = start_subject + local_offsets[1];              const pcre_uchar *p = start_subject + local_offsets[0];
2879              while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;              const pcre_uchar *pp = start_subject + local_offsets[1];
2880                while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2881    #endif
2882              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
2883              if (repeat_state_offset >= 0)              if (repeat_state_offset >= 0)
2884                { ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); }                { ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); }
# Line 2877  for (;;) Line 2894  for (;;)
2894    
2895        case OP_CALLOUT:        case OP_CALLOUT:
2896        rrc = 0;        rrc = 0;
2897        if (pcre_callout != NULL)        if (PUBL(callout) != NULL)
2898          {          {
2899          pcre_callout_block cb;          PUBL(callout_block) cb;
2900          cb.version          = 1;   /* Version 1 of the callout block */          cb.version          = 1;   /* Version 1 of the callout block */
2901          cb.callout_number   = code[1];          cb.callout_number   = code[1];
2902          cb.offset_vector    = offsets;          cb.offset_vector    = offsets;
2903    #ifdef COMPILE_PCRE8
2904          cb.subject          = (PCRE_SPTR)start_subject;          cb.subject          = (PCRE_SPTR)start_subject;
2905    #else
2906            cb.subject          = (PCRE_SPTR16)start_subject;
2907    #endif
2908          cb.subject_length   = (int)(end_subject - start_subject);          cb.subject_length   = (int)(end_subject - start_subject);
2909          cb.start_match      = (int)(current_subject - start_subject);          cb.start_match      = (int)(current_subject - start_subject);
2910          cb.current_position = (int)(ptr - start_subject);          cb.current_position = (int)(ptr - start_subject);
# Line 2893  for (;;) Line 2914  for (;;)
2914          cb.capture_last     = -1;          cb.capture_last     = -1;
2915          cb.callout_data     = md->callout_data;          cb.callout_data     = md->callout_data;
2916          cb.mark             = NULL;   /* No (*MARK) support */          cb.mark             = NULL;   /* No (*MARK) support */
2917          if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */          if ((rrc = (*PUBL(callout))(&cb)) < 0) return rrc;   /* Abandon */
2918          }          }
2919        if (rrc == 0)        if (rrc == 0)
2920          { ADD_ACTIVE(state_offset + _pcre_OP_lengths[OP_CALLOUT], 0); }          { ADD_ACTIVE(state_offset + PRIV(OP_lengths)[OP_CALLOUT], 0); }
2921        break;        break;
2922    
2923    
# Line 2993  Returns:          > 0 => number of match Line 3014  Returns:          > 0 => number of match
3014                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
3015  */  */
3016    
3017    #ifdef COMPILE_PCRE8
3018  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
3019  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
3020    const char *subject, int length, int start_offset, int options, int *offsets,    const char *subject, int length, int start_offset, int options, int *offsets,
3021    int offsetcount, int *workspace, int wscount)    int offsetcount, int *workspace, int wscount)
3022    #else
3023    PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
3024    pcre16_dfa_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
3025      PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
3026      int offsetcount, int *workspace, int wscount)
3027    #endif
3028  {  {
3029  real_pcre *re = (real_pcre *)argument_re;  REAL_PCRE *re = (REAL_PCRE *)argument_re;
3030  dfa_match_data match_block;  dfa_match_data match_block;
3031  dfa_match_data *md = &match_block;  dfa_match_data *md = &match_block;
3032  BOOL utf8, anchored, startline, firstline;  BOOL utf, anchored, startline, firstline;
3033  const uschar *current_subject, *end_subject, *lcc;  const pcre_uchar *current_subject, *end_subject;
3034    const pcre_uint8 *lcc;
3035    
 pcre_study_data internal_study;  
3036  const pcre_study_data *study = NULL;  const pcre_study_data *study = NULL;
 real_pcre internal_re;  
3037    
3038  const uschar *req_byte_ptr;  const pcre_uchar *req_char_ptr;
3039  const uschar *start_bits = NULL;  const pcre_uint8 *start_bits = NULL;
3040  BOOL first_byte_caseless = FALSE;  BOOL has_first_char = FALSE;
3041  BOOL req_byte_caseless = FALSE;  BOOL has_req_char = FALSE;
3042  int first_byte = -1;  pcre_uchar first_char = 0;
3043  int req_byte = -1;  pcre_uchar first_char2 = 0;
3044  int req_byte2 = -1;  pcre_uchar req_char = 0;
3045    pcre_uchar req_char2 = 0;
3046  int newline;  int newline;
3047    
3048  /* Plausibility checks */  /* Plausibility checks */
# Line 3049  if (extra_data != NULL) Line 3077  if (extra_data != NULL)
3077    }    }
3078    
3079  /* Check that the first field in the block is the magic number. If it is not,  /* Check that the first field in the block is the magic number. If it is not,
3080  test for a regex that was compiled on a host of opposite endianness. If this is  return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
3081  the case, flipped values are put in internal_re and internal_study if there was  REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
3082  study data too. */  means that the pattern is likely compiled with different endianness. */
3083    
3084  if (re->magic_number != MAGIC_NUMBER)  if (re->magic_number != MAGIC_NUMBER)
3085    {    return re->magic_number == REVERSED_MAGIC_NUMBER?
3086    re = _pcre_try_flipped(re, &internal_re, study, &internal_study);      PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
3087    if (re == NULL) return PCRE_ERROR_BADMAGIC;  if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
   if (study != NULL) study = &internal_study;  
   }  
3088    
3089  /* Set some local values */  /* Set some local values */
3090    
3091  current_subject = (const unsigned char *)subject + start_offset;  current_subject = (const pcre_uchar *)subject + start_offset;
3092  end_subject = (const unsigned char *)subject + length;  end_subject = (const pcre_uchar *)subject + length;
3093  req_byte_ptr = current_subject - 1;  req_char_ptr = current_subject - 1;
3094    
3095  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3096  utf8 = (re->options & PCRE_UTF8) != 0;  /* PCRE_UTF16 has the same value as PCRE_UTF8. */
3097    utf = (re->options & PCRE_UTF8) != 0;
3098  #else  #else
3099  utf8 = FALSE;  utf = FALSE;
3100  #endif  #endif
3101    
3102  anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||  anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||
# Line 3077  anchored = (options & (PCRE_ANCHORED|PCR Line 3104  anchored = (options & (PCRE_ANCHORED|PCR
3104    
3105  /* The remaining fixed data for passing around. */  /* The remaining fixed data for passing around. */
3106    
3107  md->start_code = (const uschar *)argument_re +  md->start_code = (const pcre_uchar *)argument_re +
3108      re->name_table_offset + re->name_count * re->name_entry_size;      re->name_table_offset + re->name_count * re->name_entry_size;
3109  md->start_subject = (const unsigned char *)subject;  md->start_subject = (const pcre_uchar *)subject;
3110  md->end_subject = end_subject;  md->end_subject = end_subject;
3111  md->start_offset = start_offset;  md->start_offset = start_offset;
3112  md->moptions = options;  md->moptions = options;
# Line 3140  else Line 3167  else
3167  /* Check a UTF-8 string if required. Unfortunately there's no way of passing  /* Check a UTF-8 string if required. Unfortunately there's no way of passing
3168  back the character offset. */  back the character offset. */
3169    
3170  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3171  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
3172    {    {
3173    int erroroffset;    int erroroffset;
3174    int errorcode = _pcre_valid_utf8((uschar *)subject, length, &erroroffset);    int errorcode = PRIV(valid_utf)((pcre_uchar *)subject, length, &erroroffset);
3175    if (errorcode != 0)    if (errorcode != 0)
3176      {      {
3177      if (offsetcount >= 2)      if (offsetcount >= 2)
# Line 3156  if (utf8 && (options & PCRE_NO_UTF8_CHEC Line 3183  if (utf8 && (options & PCRE_NO_UTF8_CHEC
3183        PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;        PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
3184      }      }
3185    if (start_offset > 0 && start_offset < length &&    if (start_offset > 0 && start_offset < length &&
3186          (((USPTR)subject)[start_offset] & 0xc0) == 0x80)          NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
3187      return PCRE_ERROR_BADUTF8_OFFSET;      return PCRE_ERROR_BADUTF8_OFFSET;
3188    }    }
3189  #endif  #endif
# Line 3165  if (utf8 && (options & PCRE_NO_UTF8_CHEC Line 3192  if (utf8 && (options & PCRE_NO_UTF8_CHEC
3192  is a feature that makes it possible to save compiled regex and re-use them  is a feature that makes it possible to save compiled regex and re-use them
3193  in other programs later. */  in other programs later. */
3194    
3195  if (md->tables == NULL) md->tables = _pcre_default_tables;  if (md->tables == NULL) md->tables = PRIV(default_tables);
3196    
3197  /* The lower casing table and the "must be at the start of a line" flag are  /* The lower casing table and the "must be at the start of a line" flag are
3198  used in a loop when finding where to start. */  used in a loop when finding where to start. */
# Line 3184  if (!anchored) Line 3211  if (!anchored)
3211    {    {
3212    if ((re->flags & PCRE_FIRSTSET) != 0)    if ((re->flags & PCRE_FIRSTSET) != 0)
3213      {      {
3214      first_byte = re->first_byte & 255;      has_first_char = TRUE;
3215      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)      first_char = first_char2 = re->first_char;
3216        first_byte = lcc[first_byte];      if ((re->flags & PCRE_FCH_CASELESS) != 0)
3217          {
3218          first_char2 = TABLE_GET(first_char, md->tables + fcc_offset, first_char);
3219    #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3220          if (utf && first_char > 127)
3221            first_char2 = UCD_OTHERCASE(first_char);
3222    #endif
3223          }
3224      }      }
3225    else    else
3226      {      {
# Line 3201  character" set. */ Line 3235  character" set. */
3235    
3236  if ((re->flags & PCRE_REQCHSET) != 0)  if ((re->flags & PCRE_REQCHSET) != 0)
3237    {    {
3238    req_byte = re->req_byte & 255;    has_req_char = TRUE;
3239    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;    req_char = req_char2 = re->req_char;
3240    req_byte2 = (md->tables + fcc_offset)[req_byte];  /* case flipped */    if ((re->flags & PCRE_RCH_CASELESS) != 0)
3241        {
3242        req_char2 = TABLE_GET(req_char, md->tables + fcc_offset, req_char);
3243    #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3244        if (utf && req_char > 127)
3245          req_char2 = UCD_OTHERCASE(req_char);
3246    #endif
3247        }
3248    }    }
3249    
3250  /* Call the main matching function, looping for a non-anchored regex after a  /* Call the main matching function, looping for a non-anchored regex after a
# Line 3216  for (;;) Line 3257  for (;;)
3257    
3258    if ((options & PCRE_DFA_RESTART) == 0)    if ((options & PCRE_DFA_RESTART) == 0)
3259      {      {
3260      const uschar *save_end_subject = end_subject;      const pcre_uchar *save_end_subject = end_subject;
3261    
3262      /* If firstline is TRUE, the start of the match is constrained to the first      /* If firstline is TRUE, the start of the match is constrained to the first
3263      line of a multiline string. Implement this by temporarily adjusting      line of a multiline string. Implement this by temporarily adjusting
# Line 3225  for (;;) Line 3266  for (;;)
3266    
3267      if (firstline)      if (firstline)
3268        {        {
3269        USPTR t = current_subject;        PCRE_PUCHAR t = current_subject;
3270  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3271        if (utf8)        if (utf)
3272          {          {
3273          while (t < md->end_subject && !IS_NEWLINE(t))          while (t < md->end_subject && !IS_NEWLINE(t))
3274            {            {
3275            t++;            t++;
3276            while (t < end_subject && (*t & 0xc0) == 0x80) t++;            ACROSSCHAR(t < end_subject, *t, t++);
3277            }            }
3278          }          }
3279        else        else
# Line 3249  for (;;) Line 3290  for (;;)
3290    
3291      if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)      if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
3292        {        {
3293        /* Advance to a known first byte. */        /* Advance to a known first char. */
3294    
3295        if (first_byte >= 0)        if (has_first_char)
3296          {          {
3297          if (first_byte_caseless)          if (first_char != first_char2)
3298            while (current_subject < end_subject &&            while (current_subject < end_subject &&
3299                   lcc[*current_subject] != first_byte)                *current_subject != first_char && *current_subject != first_char2)
3300              current_subject++;              current_subject++;
3301          else          else
3302            while (current_subject < end_subject &&            while (current_subject < end_subject &&
3303                   *current_subject != first_byte)                   *current_subject != first_char)
3304              current_subject++;              current_subject++;
3305          }          }
3306    
# Line 3269  for (;;) Line 3310  for (;;)
3310          {          {
3311          if (current_subject > md->start_subject + start_offset)          if (current_subject > md->start_subject + start_offset)
3312            {            {
3313  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3314            if (utf8)            if (utf)
3315              {              {
3316              while (current_subject < end_subject &&              while (current_subject < end_subject &&
3317                     !WAS_NEWLINE(current_subject))                     !WAS_NEWLINE(current_subject))
3318                {                {
3319                current_subject++;                current_subject++;
3320                while(current_subject < end_subject &&                ACROSSCHAR(current_subject < end_subject, *current_subject,
3321                      (*current_subject & 0xc0) == 0x80)                  current_subject++);
                 current_subject++;  
3322                }                }
3323              }              }
3324            else            else
# Line 3305  for (;;) Line 3345  for (;;)
3345          while (current_subject < end_subject)          while (current_subject < end_subject)
3346            {            {
3347            register unsigned int c = *current_subject;            register unsigned int c = *current_subject;
3348    #ifndef COMPILE_PCRE8
3349              if (c > 255) c = 255;
3350    #endif
3351            if ((start_bits[c/8] & (1 << (c&7))) == 0)            if ((start_bits[c/8] & (1 << (c&7))) == 0)
3352              {              {
3353              current_subject++;              current_subject++;
3354  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3355              if (utf8)              /* In non 8-bit mode, the iteration will stop for
3356                while(current_subject < end_subject &&              characters > 255 at the beginning or not stop at all. */
3357                      (*current_subject & 0xc0) == 0x80) current_subject++;              if (utf)
3358                  ACROSSCHAR(current_subject < end_subject, *current_subject,
3359                    current_subject++);
3360  #endif  #endif
3361              }              }
3362            else break;            else break;
# Line 3327  for (;;) Line 3372  for (;;)
3372      disabling is explicitly requested (and of course, by the test above, this      disabling is explicitly requested (and of course, by the test above, this
3373      code is not obeyed when restarting after a partial match). */      code is not obeyed when restarting after a partial match). */
3374    
3375      if ((options & PCRE_NO_START_OPTIMIZE) == 0 &&      if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0 &&
3376          (options & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) == 0)          (options & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) == 0)
3377        {        {
3378        /* If the pattern was studied, a minimum subject length may be set. This        /* If the pattern was studied, a minimum subject length may be set. This
# Line 3339  for (;;) Line 3384  for (;;)
3384            (pcre_uint32)(end_subject - current_subject) < study->minlength)            (pcre_uint32)(end_subject - current_subject) < study->minlength)
3385          return PCRE_ERROR_NOMATCH;          return PCRE_ERROR_NOMATCH;
3386    
3387        /* If req_byte is set, we know that that character must appear in the        /* If req_char is set, we know that that character must appear in the
3388        subject for the match to succeed. If the first character is set, req_byte        subject for the match to succeed. If the first character is set, req_char
3389        must be later in the subject; otherwise the test starts at the match        must be later in the subject; otherwise the test starts at the match
3390        point. This optimization can save a huge amount of work in patterns with        point. This optimization can save a huge amount of work in patterns with
3391        nested unlimited repeats that aren't going to match. Writing separate        nested unlimited repeats that aren't going to match. Writing separate
# Line 3352  for (;;) Line 3397  for (;;)
3397        patterns. This showed up when somebody was matching /^C/ on a 32-megabyte        patterns. This showed up when somebody was matching /^C/ on a 32-megabyte
3398        string... so we don't do this when the string is sufficiently long. */        string... so we don't do this when the string is sufficiently long. */
3399    
3400        if (req_byte >= 0 && end_subject - current_subject < REQ_BYTE_MAX)        if (has_req_char && end_subject - current_subject < REQ_BYTE_MAX)
3401          {          {
3402          register const uschar *p = current_subject + ((first_byte >= 0)? 1 : 0);          register PCRE_PUCHAR p = current_subject + (has_first_char? 1:0);
3403    
3404          /* We don't need to repeat the search if we haven't yet reached the          /* We don't need to repeat the search if we haven't yet reached the
3405          place we found it at last time. */          place we found it at last time. */
3406    
3407          if (p > req_byte_ptr)          if (p > req_char_ptr)
3408            {            {
3409            if (req_byte_caseless)            if (req_char != req_char2)
3410              {              {
3411              while (p < end_subject)              while (p < end_subject)
3412                {                {
3413                register int pp = *p++;                register int pp = *p++;
3414                if (pp == req_byte || pp == req_byte2) { p--; break; }                if (pp == req_char || pp == req_char2) { p--; break; }
3415                }                }
3416              }              }
3417            else            else
3418              {              {
3419              while (p < end_subject)              while (p < end_subject)
3420                {                {
3421                if (*p++ == req_byte) { p--; break; }                if (*p++ == req_char) { p--; break; }
3422                }                }
3423              }              }
3424    
# Line 3386  for (;;) Line 3431  for (;;)
3431            found it, so that we don't search again next time round the loop if            found it, so that we don't search again next time round the loop if
3432            the start hasn't passed this character yet. */            the start hasn't passed this character yet. */
3433    
3434            req_byte_ptr = p;            req_char_ptr = p;
3435            }            }
3436          }          }
3437        }        }
# Line 3418  for (;;) Line 3463  for (;;)
3463    
3464    if (firstline && IS_NEWLINE(current_subject)) break;    if (firstline && IS_NEWLINE(current_subject)) break;
3465    current_subject++;    current_subject++;
3466    if (utf8)  #ifdef SUPPORT_UTF
3467      if (utf)
3468      {      {
3469      while (current_subject < end_subject && (*current_subject & 0xc0) == 0x80)      ACROSSCHAR(current_subject < end_subject, *current_subject,
3470        current_subject++;        current_subject++);
3471      }      }
3472    #endif
3473    if (current_subject > end_subject) break;    if (current_subject > end_subject) break;
3474    
3475    /* If we have just passed a CR and we are now at a LF, and the pattern does    /* If we have just passed a CR and we are now at a LF, and the pattern does

Legend:
Removed from v.680  
changed lines
  Added in v.852

  ViewVC Help
Powered by ViewVC 1.1.5