/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1365 by ph10, Sun Oct 6 18:33:56 2013 UTC revision 1381 by zherczeg, Wed Oct 16 06:23:00 2013 UTC
# Line 462  static const char error_texts[] = Line 462  static const char error_texts[] =
462    "POSIX collating elements are not supported\0"    "POSIX collating elements are not supported\0"
463    "this version of PCRE is compiled without UTF support\0"    "this version of PCRE is compiled without UTF support\0"
464    "spare error\0"  /** DEAD **/    "spare error\0"  /** DEAD **/
465    "character value in \\x{...} sequence is too large\0"    "character value in \\x{} or \\o{} is too large\0"
466    /* 35 */    /* 35 */
467    "invalid condition (?(0)\0"    "invalid condition (?(0)\0"
468    "\\C not allowed in lookbehind assertion\0"    "\\C not allowed in lookbehind assertion\0"
# Line 516  static const char error_texts[] = Line 516  static const char error_texts[] =
516    "character value in \\u.... sequence is too large\0"    "character value in \\u.... sequence is too large\0"
517    "invalid UTF-32 string\0"    "invalid UTF-32 string\0"
518    "setting UTF is disabled by the application\0"    "setting UTF is disabled by the application\0"
519      "non-hex character in \\x{} (closing brace missing?)\0"
520      /* 80 */
521      "non-octal character in \\o{} (closing brace missing?)\0"
522      "missing opening brace after \\o\0"
523    ;    ;
524    
525  /* Table to identify digits and hex digits. This is used when compiling  /* Table to identify digits and hex digits. This is used when compiling
# Line 773  static const pcre_uint8 posspropstab[3][ Line 777  static const pcre_uint8 posspropstab[3][
777    { ucp_L, ucp_N, ucp_P, ucp_Po }   /* WORD */    { ucp_L, ucp_N, ucp_P, ucp_Po }   /* WORD */
778  };  };
779    
780    /* This table is used when converting repeating opcodes into possessified
781    versions as a result of an explicit possessive quantifier such as ++. A zero
782    value means there is no possessified version - in those cases the item in
783    question must be wrapped in ONCE brackets. The table is truncated at OP_CALLOUT
784    because all relevant opcodes are less than that. */
785    
786    static const pcre_uint8 opcode_possessify[] = {
787      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   /* 0 - 15  */
788      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   /* 16 - 31 */
789    
790      0,                       /* NOTI */
791      OP_POSSTAR, 0,           /* STAR, MINSTAR */
792      OP_POSPLUS, 0,           /* PLUS, MINPLUS */
793      OP_POSQUERY, 0,          /* QUERY, MINQUERY */
794      OP_POSUPTO, 0,           /* UPTO, MINUPTO */
795      0,                       /* EXACT */
796      0, 0, 0, 0,              /* POS{STAR,PLUS,QUERY,UPTO} */
797    
798      OP_POSSTARI, 0,          /* STARI, MINSTARI */
799      OP_POSPLUSI, 0,          /* PLUSI, MINPLUSI */
800      OP_POSQUERYI, 0,         /* QUERYI, MINQUERYI */
801      OP_POSUPTOI, 0,          /* UPTOI, MINUPTOI */
802      0,                       /* EXACTI */
803      0, 0, 0, 0,              /* POS{STARI,PLUSI,QUERYI,UPTOI} */
804    
805      OP_NOTPOSSTAR, 0,        /* NOTSTAR, NOTMINSTAR */
806      OP_NOTPOSPLUS, 0,        /* NOTPLUS, NOTMINPLUS */
807      OP_NOTPOSQUERY, 0,       /* NOTQUERY, NOTMINQUERY */
808      OP_NOTPOSUPTO, 0,        /* NOTUPTO, NOTMINUPTO */
809      0,                       /* NOTEXACT */
810      0, 0, 0, 0,              /* NOTPOS{STAR,PLUS,QUERY,UPTO} */
811    
812      OP_NOTPOSSTARI, 0,       /* NOTSTARI, NOTMINSTARI */
813      OP_NOTPOSPLUSI, 0,       /* NOTPLUSI, NOTMINPLUSI */
814      OP_NOTPOSQUERYI, 0,      /* NOTQUERYI, NOTMINQUERYI */
815      OP_NOTPOSUPTOI, 0,       /* NOTUPTOI, NOTMINUPTOI */
816      0,                       /* NOTEXACTI */
817      0, 0, 0, 0,              /* NOTPOS{STARI,PLUSI,QUERYI,UPTOI} */
818    
819      OP_TYPEPOSSTAR, 0,       /* TYPESTAR, TYPEMINSTAR */
820      OP_TYPEPOSPLUS, 0,       /* TYPEPLUS, TYPEMINPLUS */
821      OP_TYPEPOSQUERY, 0,      /* TYPEQUERY, TYPEMINQUERY */
822      OP_TYPEPOSUPTO, 0,       /* TYPEUPTO, TYPEMINUPTO */
823      0,                       /* TYPEEXACT */
824      0, 0, 0, 0,              /* TYPEPOS{STAR,PLUS,QUERY,UPTO} */
825    
826      OP_CRPOSSTAR, 0,         /* CRSTAR, CRMINSTAR */
827      OP_CRPOSPLUS, 0,         /* CRPLUS, CRMINPLUS */
828      OP_CRPOSQUERY, 0,        /* CRQUERY, CRMINQUERY */
829      OP_CRPOSRANGE, 0,        /* CRRANGE, CRMINRANGE */
830      0, 0, 0, 0,              /* CRPOS{STAR,PLUS,QUERY,RANGE} */
831    
832      0, 0, 0,                 /* CLASS, NCLASS, XCLASS */
833      0, 0,                    /* REF, REFI */
834      0, 0,                    /* DNREF, DNREFI */
835      0, 0                     /* RECURSE, CALLOUT */
836    };
837    
838    
839    
840  /*************************************************  /*************************************************
# Line 879  return (*p == CHAR_RIGHT_CURLY_BRACKET); Line 941  return (*p == CHAR_RIGHT_CURLY_BRACKET);
941  *************************************************/  *************************************************/
942    
943  /* This function is called when a \ has been encountered. It either returns a  /* This function is called when a \ has been encountered. It either returns a
944  positive value for a simple escape such as \n, or 0 for a data character  positive value for a simple escape such as \n, or 0 for a data character which
945  which will be placed in chptr. A backreference to group n is returned as  will be placed in chptr. A backreference to group n is returned as negative n.
946  negative n. When UTF-8 is enabled, a positive value greater than 255 may  When UTF-8 is enabled, a positive value greater than 255 may be returned in
947  be returned in chptr.  chptr. On entry, ptr is pointing at the \. On exit, it is on the final
948  On entry,ptr is pointing at the \. On exit, it is on the final character of the  character of the escape sequence.
 escape sequence.  
949    
950  Arguments:  Arguments:
951    ptrptr         points to the pattern position pointer    ptrptr         points to the pattern position pointer
952    chptr          points to the data character    chptr          points to a returned data character
953    errorcodeptr   points to the errorcode variable    errorcodeptr   points to the errorcode variable
954    bracount       number of previous extracting brackets    bracount       number of previous extracting brackets
955    options        the options bits    options        the options bits
# Line 1092  else Line 1153  else
1153      break;      break;
1154    
1155      /* The handling of escape sequences consisting of a string of digits      /* The handling of escape sequences consisting of a string of digits
1156      starting with one that is not zero is not straightforward. By experiment,      starting with one that is not zero is not straightforward. Perl has changed
1157      the way Perl works seems to be as follows:      over the years. Nowadays \g{} for backreferences and \o{} for octal are
1158        recommended to avoid the ambiguities in the old syntax.
1159    
1160      Outside a character class, the digits are read as a decimal number. If the      Outside a character class, the digits are read as a decimal number. If the
1161      number is less than 10, or if there are that many previous extracting      number is less than 8 (used to be 10), or if there are that many previous
1162      left brackets, then it is a back reference. Otherwise, up to three octal      extracting left brackets, then it is a back reference. Otherwise, up to
1163      digits are read to form an escaped byte. Thus \123 is likely to be octal      three octal digits are read to form an escaped byte. Thus \123 is likely to
1164      123 (cf \0123, which is octal 012 followed by the literal 3). If the octal      be octal 123 (cf \0123, which is octal 012 followed by the literal 3). If
1165      value is greater than 377, the least significant 8 bits are taken. Inside a      the octal value is greater than 377, the least significant 8 bits are
1166      character class, \ followed by a digit is always an octal number. */      taken. \8 and \9 are treated as the literal characters 8 and 9.
1167    
1168        Inside a character class, \ followed by a digit is always either a literal
1169        8 or 9 or an octal number. */
1170    
1171      case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4: case CHAR_5:      case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4: case CHAR_5:
1172      case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9:      case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9:
# Line 1128  else Line 1193  else
1193          *errorcodeptr = ERR61;          *errorcodeptr = ERR61;
1194          break;          break;
1195          }          }
1196        if (s < 10 || s <= bracount)        if (s < 8 || s <= bracount)  /* Check for back reference */
1197          {          {
1198          escape = -s;          escape = -s;
1199          break;          break;
# Line 1136  else Line 1201  else
1201        ptr = oldptr;      /* Put the pointer back and fall through */        ptr = oldptr;      /* Put the pointer back and fall through */
1202        }        }
1203    
1204      /* Handle an octal number following \. If the first digit is 8 or 9, Perl      /* Handle a digit following \ when the number is not a back reference. If
1205      generates a binary zero byte and treats the digit as a following literal.      the first digit is 8 or 9, Perl used to generate a binary zero byte and
1206      Thus we have to pull back the pointer by one. */      then treat the digit as a following literal. At least by Perl 5.18 this
1207        changed so as not to insert the binary zero. */
1208    
1209      if ((c = *ptr) >= CHAR_8)      if ((c = *ptr) >= CHAR_8) break;
1210        {  
1211        ptr--;      /* Fall through with a digit less than 8 */
       c = 0;  
       break;  
       }  
1212    
1213      /* \0 always starts an octal number, but we may drop through to here with a      /* \0 always starts an octal number, but we may drop through to here with a
1214      larger first octal digit. The original code used just to take the least      larger first octal digit. The original code used just to take the least
# Line 1162  else Line 1225  else
1225  #endif  #endif
1226      break;      break;
1227    
1228      /* \x is complicated. \x{ddd} is a character number which can be greater      /* \o is a relatively new Perl feature, supporting a more general way of
1229      than 0xff in utf or non-8bit mode, but only if the ddd are hex digits.      specifying character codes in octal. The only supported form is \o{ddd}. */
1230      If not, { is treated as a data character. */  
1231        case CHAR_o:
1232        if (ptr[1] != CHAR_LEFT_CURLY_BRACKET) *errorcodeptr = ERR81; else
1233          {
1234          ptr += 2;
1235          c = 0;
1236          overflow = FALSE;
1237          while (*ptr >= CHAR_0 && *ptr <= CHAR_7)
1238            {
1239            register pcre_uint32 cc = *ptr++;
1240            if (c == 0 && cc == CHAR_0) continue;     /* Leading zeroes */
1241    #ifdef COMPILE_PCRE32
1242            if (c >= 0x20000000l) { overflow = TRUE; break; }
1243    #endif
1244            c = (c << 3) + cc - CHAR_0 ;
1245    #if defined COMPILE_PCRE8
1246            if (c > (utf ? 0x10ffffU : 0xffU)) { overflow = TRUE; break; }
1247    #elif defined COMPILE_PCRE16
1248            if (c > (utf ? 0x10ffffU : 0xffffU)) { overflow = TRUE; break; }
1249    #elif defined COMPILE_PCRE32
1250            if (utf && c > 0x10ffffU) { overflow = TRUE; break; }
1251    #endif
1252            }
1253          if (overflow)
1254            {
1255            while (*ptr >= CHAR_0 && *ptr <= CHAR_7) ptr++;
1256            *errorcodeptr = ERR34;
1257            }
1258          else if (*ptr == CHAR_RIGHT_CURLY_BRACKET)
1259            {
1260            if (utf && c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73;
1261            }
1262          else *errorcodeptr = ERR80;
1263          }
1264        break;
1265    
1266        /* \x is complicated. In JavaScript, \x must be followed by two hexadecimal
1267        numbers. Otherwise it is a lowercase x letter. */
1268    
1269      case CHAR_x:      case CHAR_x:
1270      if ((options & PCRE_JAVASCRIPT_COMPAT) != 0)      if ((options & PCRE_JAVASCRIPT_COMPAT) != 0)
1271        {        {
       /* In JavaScript, \x must be followed by two hexadecimal numbers.  
       Otherwise it is a lowercase x letter. */  
1272        if (MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0        if (MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0
1273          && MAX_255(ptr[2]) && (digitab[ptr[2]] & ctype_xdigit) != 0)          && MAX_255(ptr[2]) && (digitab[ptr[2]] & ctype_xdigit) != 0)
1274          {          {
# Line 1187  else Line 1285  else
1285  #endif  #endif
1286            }            }
1287          }          }
1288        break;        }    /* End JavaScript handling */
       }  
1289    
1290      if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)      /* Handle \x in Perl's style. \x{ddd} is a character number which can be
1291        {      greater than 0xff in utf or non-8bit mode, but only if the ddd are hex
1292        const pcre_uchar *pt = ptr + 2;      digits. If not, { used to be treated as a data character. However, Perl
1293        seems to read hex digits up to the first non-such, and ignore the rest, so
1294        that, for example \x{zz} matches a binary zero. This seems crazy, so PCRE
1295        now gives an error. */
1296    
1297        c = 0;      else
1298        overflow = FALSE;        {
1299        while (MAX_255(*pt) && (digitab[*pt] & ctype_xdigit) != 0)        if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)
1300          {          {
1301          register pcre_uint32 cc = *pt++;          ptr += 2;
1302          if (c == 0 && cc == CHAR_0) continue;     /* Leading zeroes */          c = 0;
1303            overflow = FALSE;
1304            while (MAX_255(*ptr) && (digitab[*ptr] & ctype_xdigit) != 0)
1305              {
1306              register pcre_uint32 cc = *ptr++;
1307              if (c == 0 && cc == CHAR_0) continue;     /* Leading zeroes */
1308    
1309  #ifdef COMPILE_PCRE32  #ifdef COMPILE_PCRE32
1310          if (c >= 0x10000000l) { overflow = TRUE; break; }            if (c >= 0x10000000l) { overflow = TRUE; break; }
1311  #endif  #endif
1312    
1313  #ifndef EBCDIC  /* ASCII/UTF-8 coding */  #ifndef EBCDIC  /* ASCII/UTF-8 coding */
1314          if (cc >= CHAR_a) cc -= 32;               /* Convert to upper case */            if (cc >= CHAR_a) cc -= 32;               /* Convert to upper case */
1315          c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));            c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
1316  #else           /* EBCDIC coding */  #else           /* EBCDIC coding */
1317          if (cc >= CHAR_a && cc <= CHAR_z) cc += 64;  /* Convert to upper case */            if (cc >= CHAR_a && cc <= CHAR_z) cc += 64;  /* Convert to upper case */
1318          c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));            c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
1319  #endif  #endif
1320    
1321  #if defined COMPILE_PCRE8  #if defined COMPILE_PCRE8
1322          if (c > (utf ? 0x10ffffU : 0xffU)) { overflow = TRUE; break; }            if (c > (utf ? 0x10ffffU : 0xffU)) { overflow = TRUE; break; }
1323  #elif defined COMPILE_PCRE16  #elif defined COMPILE_PCRE16
1324          if (c > (utf ? 0x10ffffU : 0xffffU)) { overflow = TRUE; break; }            if (c > (utf ? 0x10ffffU : 0xffffU)) { overflow = TRUE; break; }
1325  #elif defined COMPILE_PCRE32  #elif defined COMPILE_PCRE32
1326          if (utf && c > 0x10ffffU) { overflow = TRUE; break; }            if (utf && c > 0x10ffffU) { overflow = TRUE; break; }
1327  #endif  #endif
1328          }            }
1329    
1330        if (overflow)          if (overflow)
1331          {            {
1332          while (MAX_255(*pt) && (digitab[*pt] & ctype_xdigit) != 0) pt++;            while (MAX_255(*ptr) && (digitab[*ptr] & ctype_xdigit) != 0) ptr++;
1333          *errorcodeptr = ERR34;            *errorcodeptr = ERR34;
1334          }            }
1335    
1336        if (*pt == CHAR_RIGHT_CURLY_BRACKET)          else if (*ptr == CHAR_RIGHT_CURLY_BRACKET)
1337          {            {
1338          if (utf && c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73;            if (utf && c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73;
1339          ptr = pt;            }
         break;  
         }  
1340    
1341        /* If the sequence of hex digits does not end with '}', then we don't          /* If the sequence of hex digits does not end with '}', give an error.
1342        recognize this construct; fall through to the normal \x handling. */          We used just to recognize this construct and fall through to the normal
1343        }          \x handling, but nowadays Perl gives an error, which seems much more
1344            sensible, so we do too. */
1345    
1346      /* Read just a single-byte hex-defined char */          else *errorcodeptr = ERR79;
1347            }   /* End of \x{} processing */
1348    
1349      c = 0;        /* Read a single-byte hex-defined char (up to two hex digits after \x) */
1350      while (i++ < 2 && MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0)  
1351        {        else
1352        pcre_uint32 cc;                          /* Some compilers don't like */          {
1353        cc = *(++ptr);                           /* ++ in initializers */          c = 0;
1354            while (i++ < 2 && MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0)
1355              {
1356              pcre_uint32 cc;                          /* Some compilers don't like */
1357              cc = *(++ptr);                           /* ++ in initializers */
1358  #ifndef EBCDIC  /* ASCII/UTF-8 coding */  #ifndef EBCDIC  /* ASCII/UTF-8 coding */
1359        if (cc >= CHAR_a) cc -= 32;              /* Convert to upper case */            if (cc >= CHAR_a) cc -= 32;              /* Convert to upper case */
1360        c = c * 16 + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));            c = c * 16 + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
1361  #else           /* EBCDIC coding */  #else           /* EBCDIC coding */
1362        if (cc <= CHAR_z) cc += 64;              /* Convert to upper case */            if (cc <= CHAR_z) cc += 64;              /* Convert to upper case */
1363        c = c * 16 + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));            c = c * 16 + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
1364  #endif  #endif
1365        }            }
1366            }     /* End of \xdd handling */
1367          }       /* End of Perl-style \x handling */
1368      break;      break;
1369    
1370      /* For \c, a following letter is upper-cased; then the 0x40 bit is flipped.      /* For \c, a following letter is upper-cased; then the 0x40 bit is flipped.
# Line 1764  for (;;) Line 1875  for (;;)
1875    
1876      switch (*cc)      switch (*cc)
1877        {        {
       case OP_CRPLUS:  
       case OP_CRMINPLUS:  
1878        case OP_CRSTAR:        case OP_CRSTAR:
1879        case OP_CRMINSTAR:        case OP_CRMINSTAR:
1880          case OP_CRPLUS:
1881          case OP_CRMINPLUS:
1882        case OP_CRQUERY:        case OP_CRQUERY:
1883        case OP_CRMINQUERY:        case OP_CRMINQUERY:
1884          case OP_CRPOSSTAR:
1885          case OP_CRPOSPLUS:
1886          case OP_CRPOSQUERY:
1887        return -1;        return -1;
1888    
1889        case OP_CRRANGE:        case OP_CRRANGE:
1890        case OP_CRMINRANGE:        case OP_CRMINRANGE:
1891          case OP_CRPOSRANGE:
1892        if (GET2(cc,1) != GET2(cc,1+IMM2_SIZE)) return -1;        if (GET2(cc,1) != GET2(cc,1+IMM2_SIZE)) return -1;
1893        branchlength += (int)GET2(cc,1);        branchlength += (int)GET2(cc,1);
1894        cc += 1 + 2 * IMM2_SIZE;        cc += 1 + 2 * IMM2_SIZE;
# Line 2366  for (code = first_significant_code(code Line 2481  for (code = first_significant_code(code
2481        case OP_CRMINSTAR:        case OP_CRMINSTAR:
2482        case OP_CRQUERY:        case OP_CRQUERY:
2483        case OP_CRMINQUERY:        case OP_CRMINQUERY:
2484          case OP_CRPOSSTAR:
2485          case OP_CRPOSQUERY:
2486        break;        break;
2487    
2488        default:                   /* Non-repeat => class must match */        default:                   /* Non-repeat => class must match */
2489        case OP_CRPLUS:            /* These repeats aren't empty */        case OP_CRPLUS:            /* These repeats aren't empty */
2490        case OP_CRMINPLUS:        case OP_CRMINPLUS:
2491          case OP_CRPOSPLUS:
2492        return FALSE;        return FALSE;
2493    
2494        case OP_CRRANGE:        case OP_CRRANGE:
2495        case OP_CRMINRANGE:        case OP_CRMINRANGE:
2496          case OP_CRPOSRANGE:
2497        if (GET2(ccode, 1) > 0) return FALSE;  /* Minimum > 0 */        if (GET2(ccode, 1) > 0) return FALSE;  /* Minimum > 0 */
2498        break;        break;
2499        }        }
# Line 2653  switch(ptype) Line 2772  switch(ptype)
2772    /* Perl space used to exclude VT, but from Perl 5.18 it is included, which    /* Perl space used to exclude VT, but from Perl 5.18 it is included, which
2773    means that Perl space and POSIX space are now identical. PCRE was changed    means that Perl space and POSIX space are now identical. PCRE was changed
2774    at release 8.34. */    at release 8.34. */
2775    
2776    case PT_SPACE:    /* Perl space */    case PT_SPACE:    /* Perl space */
2777    case PT_PXSPACE:  /* POSIX space */    case PT_PXSPACE:  /* POSIX space */
2778    return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||    switch(c)
2779            c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||      {
2780            c == CHAR_FF || c == CHAR_CR)      HSPACE_CASES:
2781            == negated;      VSPACE_CASES:
2782        return negated;
2783    
2784        default:
2785        return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == negated;
2786        }
2787      break;  /* Control never reaches here */
2788    
2789    case PT_WORD:    case PT_WORD:
2790    return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||    return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
# Line 2818  switch(c) Line 2943  switch(c)
2943      return code + 2;      return code + 2;
2944      }      }
2945    
2946    /* Convert only if we have anough space. */    /* Convert only if we have enough space. */
2947    
2948    clist_src = PRIV(ucd_caseless_sets) + code[1];    clist_src = PRIV(ucd_caseless_sets) + code[1];
2949    clist_dest = list + 2;    clist_dest = list + 2;
2950    code += 2;    code += 2;
2951    
2952    do {    do {
      /* Early return if there is not enough space. */  
2953       if (clist_dest >= list + 8)       if (clist_dest >= list + 8)
2954         {         {
2955           /* Early return if there is not enough space. This should never
2956           happen, since all clists are shorter than 5 character now. */
2957         list[2] = code[0];         list[2] = code[0];
2958         list[3] = code[1];         list[3] = code[1];
2959         return code;         return code;
2960         }         }
2961       *clist_dest++ = *clist_src;       *clist_dest++ = *clist_src;
2962       }       }
2963     while(*clist_src++ != NOTACHAR);    while(*clist_src++ != NOTACHAR);
2964    
2965    /* Enough space to store all characters. */    /* All characters are stored. The terminating NOTACHAR
2966      is copied form the clist itself. */
2967    
2968    list[0] = (c == OP_PROP) ? OP_CHAR : OP_NOT;    list[0] = (c == OP_PROP) ? OP_CHAR : OP_NOT;
2969    return code;    return code;
# Line 2859  switch(c) Line 2986  switch(c)
2986      case OP_CRMINSTAR:      case OP_CRMINSTAR:
2987      case OP_CRQUERY:      case OP_CRQUERY:
2988      case OP_CRMINQUERY:      case OP_CRMINQUERY:
2989        case OP_CRPOSSTAR:
2990        case OP_CRPOSQUERY:
2991      list[1] = TRUE;      list[1] = TRUE;
2992      end++;      end++;
2993      break;      break;
2994    
2995        case OP_CRPLUS:
2996        case OP_CRMINPLUS:
2997        case OP_CRPOSPLUS:
2998        end++;
2999        break;
3000    
3001      case OP_CRRANGE:      case OP_CRRANGE:
3002      case OP_CRMINRANGE:      case OP_CRMINRANGE:
3003        case OP_CRPOSRANGE:
3004      list[1] = (GET2(end, 1) == 0);      list[1] = (GET2(end, 1) == 0);
3005      end += 1 + 2 * IMM2_SIZE;      end += 1 + 2 * IMM2_SIZE;
3006      break;      break;
# Line 2895  Returns:      TRUE if the auto-possessif Line 3031  Returns:      TRUE if the auto-possessif
3031    
3032  static BOOL  static BOOL
3033  compare_opcodes(const pcre_uchar *code, BOOL utf, const compile_data *cd,  compare_opcodes(const pcre_uchar *code, BOOL utf, const compile_data *cd,
3034    const pcre_uint32* base_list)    const pcre_uint32* base_list, const pcre_uchar *base_end)
3035  {  {
3036  pcre_uchar c;  pcre_uchar c;
3037  pcre_uint32 list[8];  pcre_uint32 list[8];
3038  const pcre_uint32* chr_ptr;  const pcre_uint32* chr_ptr;
3039  const pcre_uint32* ochr_ptr;  const pcre_uint32* ochr_ptr;
3040  const pcre_uint32* list_ptr;  const pcre_uint32* list_ptr;
3041    const pcre_uchar *next_code;
3042    const pcre_uint8 *class_bits;
3043  pcre_uint32 chr;  pcre_uint32 chr;
3044    BOOL accepted;
3045    
3046    /* Note: the base_list[1] contains whether the current opcode has greedy
3047    (represented by a non-zero value) quantifier. This is a different from
3048    other character type lists, which stores here that the character iterator
3049    matches to an empty string (also represented by a non-zero value). */
3050    
3051  for(;;)  for(;;)
3052    {    {
# Line 2925  for(;;) Line 3069  for(;;)
3069    switch(c)    switch(c)
3070      {      {
3071      case OP_END:      case OP_END:
3072      /* TRUE only in greedy case. The non-greedy case could be replaced by an      case OP_KETRPOS:
3073      OP_EXACT, but it is probably not worth it. (And note that OP_EXACT uses      /* TRUE only in greedy case. The non-greedy case could be replaced by
3074      more memory, which we cannot get at this stage.) */      an OP_EXACT, but it is probably not worth it. (And note that OP_EXACT
3075        uses more memory, which we cannot get at this stage.) */
3076    
3077      return base_list[1] != 0;      return base_list[1] != 0;
3078    
3079      case OP_KET:      case OP_KET:
3080      /* If the bracket is capturing, and referenced by an OP_RECURSE, the      /* If the bracket is capturing, and referenced by an OP_RECURSE, or
3081      non-greedy case cannot be converted to a possessive form. We do not test      it is an atomic sub-pattern (assert, once, etc.) the non-greedy case
3082      the bracket type at the moment, but we might do it in the future to improve      cannot be converted to a possessive form. */
     this condition. (But note that recursive calls are always atomic.) */  
3083    
3084      if (base_list[1] == 0) return FALSE;      if (base_list[1] == 0) return FALSE;
3085    
3086        switch(*(code - GET(code, 1)))
3087          {
3088          case OP_ASSERT:
3089          case OP_ASSERT_NOT:
3090          case OP_ASSERTBACK:
3091          case OP_ASSERTBACK_NOT:
3092          case OP_ONCE:
3093          case OP_ONCE_NC:
3094          /* Atomic sub-patterns and assertions can always auto-possessify their
3095          last iterator. */
3096          return TRUE;
3097          }
3098    
3099        code += PRIV(OP_lengths)[c];
3100        continue;
3101    
3102        case OP_ONCE:
3103        case OP_ONCE_NC:
3104        case OP_BRA:
3105        case OP_CBRA:
3106        next_code = code;
3107        do next_code += GET(next_code, 1); while (*next_code == OP_ALT);
3108    
3109        /* We do not support repeated brackets, because they can lead to
3110        infinite recursion. */
3111    
3112        if (*next_code != OP_KET) return FALSE;
3113    
3114        next_code = code + GET(code, 1);
3115        code += PRIV(OP_lengths)[c];
3116    
3117        while (*next_code == OP_ALT)
3118          {
3119          if (!compare_opcodes(code, utf, cd, base_list, base_end)) return FALSE;
3120          code = next_code + 1 + LINK_SIZE;
3121          next_code += GET(next_code, 1);
3122          }
3123        continue;
3124    
3125        case OP_BRAZERO:
3126        case OP_BRAMINZERO:
3127    
3128        next_code = code + 1;
3129        if (*next_code != OP_BRA && *next_code != OP_CBRA)
3130          return FALSE;
3131    
3132        do next_code += GET(next_code, 1); while (*next_code == OP_ALT);
3133    
3134        /* We do not support repeated brackets, because they can lead to
3135        infinite recursion. */
3136        if (*next_code != OP_KET) return FALSE;
3137    
3138        /* The bracket content will be checked by the
3139        OP_BRA/OP_CBRA case above. */
3140        next_code += 1 + LINK_SIZE;
3141        if (!compare_opcodes(next_code, utf, cd, base_list, base_end)) return FALSE;
3142    
3143      code += PRIV(OP_lengths)[c];      code += PRIV(OP_lengths)[c];
3144      continue;      continue;
3145      }      }
# Line 2968  for(;;) Line 3170  for(;;)
3170      {      {
3171      pcre_uint32 leftop, rightop;      pcre_uint32 leftop, rightop;
3172    
     if (list[1] != 0) return FALSE;   /* Must match at least one character */  
3173      leftop = base_list[0];      leftop = base_list[0];
3174      rightop = list[0];      rightop = list[0];
3175    
3176  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3177        accepted = FALSE; /* Always set in non-unicode case. */
3178      if (leftop == OP_PROP || leftop == OP_NOTPROP)      if (leftop == OP_PROP || leftop == OP_NOTPROP)
3179        {        {
3180        if (rightop == OP_EOD) return TRUE;        if (rightop == OP_EOD)
3181        if (rightop == OP_PROP || rightop == OP_NOTPROP)          accepted = TRUE;
3182          else if (rightop == OP_PROP || rightop == OP_NOTPROP)
3183          {          {
3184          int n;          int n;
3185          const pcre_uint8 *p;          const pcre_uint8 *p;
# Line 2997  for(;;) Line 3200  for(;;)
3200          n = propposstab[base_list[2]][list[2]];          n = propposstab[base_list[2]][list[2]];
3201          switch(n)          switch(n)
3202            {            {
3203            case 0: return FALSE;            case 0: break;
3204            case 1: return bothprop;            case 1: accepted = bothprop; break;
3205            case 2: return (base_list[3] == list[3]) != same;            case 2: accepted = (base_list[3] == list[3]) != same; break;
3206            case 3: return !same;            case 3: accepted = !same; break;
3207    
3208            case 4:  /* Left general category, right particular category */            case 4:  /* Left general category, right particular category */
3209            return risprop && catposstab[base_list[3]][list[3]] == same;            accepted = risprop && catposstab[base_list[3]][list[3]] == same;
3210              break;
3211    
3212            case 5:  /* Right general category, left particular category */            case 5:  /* Right general category, left particular category */
3213            return lisprop && catposstab[list[3]][base_list[3]] == same;            accepted = lisprop && catposstab[list[3]][base_list[3]] == same;
3214              break;
3215    
3216            /* This code is logically tricky. Think hard before fiddling with it.            /* This code is logically tricky. Think hard before fiddling with it.
3217            The posspropstab table has four entries per row. Each row relates to            The posspropstab table has four entries per row. Each row relates to
3218            one of PCRE's special properties such as ALNUM or SPACE or WORD.            one of PCRE's special properties such as ALNUM or SPACE or WORD.
3219            Only WORD actually needs all four entries, but using repeats for the            Only WORD actually needs all four entries, but using repeats for the
3220            others means they can all use the same code below.            others means they can all use the same code below.
3221    
3222            The first two entries in each row are Unicode general categories, and            The first two entries in each row are Unicode general categories, and
3223            apply always, because all the characters they include are part of the            apply always, because all the characters they include are part of the
3224            PCRE character set. The third and fourth entries are a general and a            PCRE character set. The third and fourth entries are a general and a
# Line 3023  for(;;) Line 3228  for(;;)
3228            category contains more characters than the specials that are defined            category contains more characters than the specials that are defined
3229            for the property being tested against. Therefore, it cannot be used            for the property being tested against. Therefore, it cannot be used
3230            in a NOTPROP case.            in a NOTPROP case.
3231    
3232            Example: the row for WORD contains ucp_L, ucp_N, ucp_P, ucp_Po.            Example: the row for WORD contains ucp_L, ucp_N, ucp_P, ucp_Po.
3233            Underscore is covered by ucp_P or ucp_Po. */            Underscore is covered by ucp_P or ucp_Po. */
3234    
# Line 3031  for(;;) Line 3236  for(;;)
3236            case 7:  /* Left space vs right general category */            case 7:  /* Left space vs right general category */
3237            case 8:  /* Left word vs right general category */            case 8:  /* Left word vs right general category */
3238            p = posspropstab[n-6];            p = posspropstab[n-6];
3239            return risprop && lisprop ==            accepted = risprop && lisprop ==
3240              (list[3] != p[0] &&              (list[3] != p[0] &&
3241               list[3] != p[1] &&               list[3] != p[1] &&
3242              (list[3] != p[2] || !lisprop));              (list[3] != p[2] || !lisprop));
3243              break;
3244    
3245            case 9:   /* Right alphanum vs left general category */            case 9:   /* Right alphanum vs left general category */
3246            case 10:  /* Right space vs left general category */            case 10:  /* Right space vs left general category */
3247            case 11:  /* Right word vs left general category */            case 11:  /* Right word vs left general category */
3248            p = posspropstab[n-9];            p = posspropstab[n-9];
3249            return lisprop && risprop ==            accepted = lisprop && risprop ==
3250              (base_list[3] != p[0] &&              (base_list[3] != p[0] &&
3251               base_list[3] != p[1] &&               base_list[3] != p[1] &&
3252              (base_list[3] != p[2] || !risprop));              (base_list[3] != p[2] || !risprop));
3253              break;
3254    
3255            case 12:  /* Left alphanum vs right particular category */            case 12:  /* Left alphanum vs right particular category */
3256            case 13:  /* Left space vs right particular category */            case 13:  /* Left space vs right particular category */
3257            case 14:  /* Left word vs right particular category */            case 14:  /* Left word vs right particular category */
3258            p = posspropstab[n-12];            p = posspropstab[n-12];
3259            return risprop && lisprop ==            accepted = risprop && lisprop ==
3260              (catposstab[p[0]][list[3]] &&              (catposstab[p[0]][list[3]] &&
3261               catposstab[p[1]][list[3]] &&               catposstab[p[1]][list[3]] &&
3262              (list[3] != p[3] || !lisprop));              (list[3] != p[3] || !lisprop));
3263              break;
3264    
3265            case 15:  /* Right alphanum vs left particular category */            case 15:  /* Right alphanum vs left particular category */
3266            case 16:  /* Right space vs left particular category */            case 16:  /* Right space vs left particular category */
3267            case 17:  /* Right word vs left particular category */            case 17:  /* Right word vs left particular category */
3268            p = posspropstab[n-15];            p = posspropstab[n-15];
3269            return lisprop && risprop ==            accepted = lisprop && risprop ==
3270              (catposstab[p[0]][base_list[3]] &&              (catposstab[p[0]][base_list[3]] &&
3271               catposstab[p[1]][base_list[3]] &&               catposstab[p[1]][base_list[3]] &&
3272              (base_list[3] != p[3] || !risprop));              (base_list[3] != p[3] || !risprop));
3273              break;
3274            }            }
3275          }          }
       return FALSE;  
3276        }        }
3277    
3278      else      else
3279  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
3280    
3281      return leftop >= FIRST_AUTOTAB_OP && leftop <= LAST_AUTOTAB_LEFT_OP &&      accepted = leftop >= FIRST_AUTOTAB_OP && leftop <= LAST_AUTOTAB_LEFT_OP &&
3282             rightop >= FIRST_AUTOTAB_OP && rightop <= LAST_AUTOTAB_RIGHT_OP &&             rightop >= FIRST_AUTOTAB_OP && rightop <= LAST_AUTOTAB_RIGHT_OP &&
3283             autoposstab[leftop - FIRST_AUTOTAB_OP][rightop - FIRST_AUTOTAB_OP];             autoposstab[leftop - FIRST_AUTOTAB_OP][rightop - FIRST_AUTOTAB_OP];
3284    
3285        if (!accepted)
3286          return FALSE;
3287    
3288        if (list[1] == 0) return TRUE;
3289        /* Might be an empty repeat. */
3290        continue;
3291      }      }
3292    
3293    /* Control reaches here only if one of the items is a small character list.    /* Control reaches here only if one of the items is a small character list.
# Line 3193  for(;;) Line 3408  for(;;)
3408          return FALSE;          return FALSE;
3409        break;        break;
3410    
       /* The class comparisons work only when the class is the second item  
       of the pair, because there are at present no possessive forms of the  
       class opcodes. Note also that the "code" variable that is used below  
       points after the second item, and that the pointer for the first item  
       is not available, so even if there were possessive forms of the class  
       opcodes, the correct comparison could not be done. */  
   
3411        case OP_NCLASS:        case OP_NCLASS:
3412        if (chr > 255) return FALSE;        if (chr > 255) return FALSE;
3413        /* Fall through */        /* Fall through */
3414    
3415        case OP_CLASS:        case OP_CLASS:
       if (list_ptr != list) return FALSE;   /* Class is first opcode */  
3416        if (chr > 255) break;        if (chr > 255) break;
3417        if ((((pcre_uint8 *)(code - list_ptr[2] + 1))[chr >> 3] & (1 << (chr & 7))) != 0)        class_bits = (pcre_uint8 *)((list_ptr == list ? code : base_end) - list_ptr[2]);
3418          if ((class_bits[chr >> 3] & (1 << (chr & 7))) != 0)
3419          return FALSE;          return FALSE;
3420        break;        break;
3421    
3422  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3423        case OP_XCLASS:        case OP_XCLASS:
3424        if (list_ptr != list) return FALSE;   /* Class is first opcode */        if (list_ptr != list) return FALSE;   /* Class is first opcode */
3425        if (PRIV(xclass)(chr, code - list_ptr[2] + 1 + LINK_SIZE, utf))        if (PRIV(xclass)(chr, code - list_ptr[2] + LINK_SIZE, utf))
3426          return FALSE;          return FALSE;
3427        break;        break;
3428  #endif  #endif
# Line 3257  auto_possessify(pcre_uchar *code, BOOL u Line 3465  auto_possessify(pcre_uchar *code, BOOL u
3465  {  {
3466  register pcre_uchar c;  register pcre_uchar c;
3467  const pcre_uchar *end;  const pcre_uchar *end;
3468    pcre_uchar *repeat_code;
3469  pcre_uint32 list[8];  pcre_uint32 list[8];
3470    
3471  for (;;)  for (;;)
# Line 3270  for (;;) Line 3479  for (;;)
3479        get_chr_property_list(code, utf, cd->fcc, list) : NULL;        get_chr_property_list(code, utf, cd->fcc, list) : NULL;
3480      list[1] = c == OP_STAR || c == OP_PLUS || c == OP_QUERY || c == OP_UPTO;      list[1] = c == OP_STAR || c == OP_PLUS || c == OP_QUERY || c == OP_UPTO;
3481    
3482      if (end != NULL && compare_opcodes(end, utf, cd, list))      if (end != NULL && compare_opcodes(end, utf, cd, list, end))
3483        {        {
3484        switch(c)        switch(c)
3485          {          {
# Line 3309  for (;;) Line 3518  for (;;)
3518        }        }
3519      c = *code;      c = *code;
3520      }      }
3521      else if (c == OP_CLASS || c == OP_NCLASS || c == OP_XCLASS)
3522        {
3523    #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3524        if (c == OP_XCLASS)
3525          repeat_code = code + 1 + GET(code, 1);
3526        else
3527    #endif
3528          repeat_code = code + 1 + (32 / sizeof(pcre_uchar));
3529    
3530        c = *repeat_code;
3531        if (c >= OP_CRSTAR && c <= OP_CRMINRANGE)
3532          {
3533          /* end must not be NULL. */
3534          end = get_chr_property_list(code, utf, cd->fcc, list);
3535    
3536          list[1] = (c & 1) == 0;
3537    
3538          if (compare_opcodes(end, utf, cd, list, end))
3539            {
3540            switch (c)
3541              {
3542              case OP_CRSTAR:
3543              *repeat_code = OP_CRPOSSTAR;
3544              break;
3545    
3546              case OP_CRPLUS:
3547              *repeat_code = OP_CRPOSPLUS;
3548              break;
3549    
3550              case OP_CRQUERY:
3551              *repeat_code = OP_CRPOSQUERY;
3552              break;
3553    
3554              case OP_CRRANGE:
3555              *repeat_code = OP_CRPOSRANGE;
3556              break;
3557              }
3558            }
3559          }
3560        c = *code;
3561        }
3562    
3563    switch(c)    switch(c)
3564      {      {
# Line 3335  for (;;) Line 3585  for (;;)
3585        code += 2;        code += 2;
3586      break;      break;
3587    
3588    #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3589      case OP_XCLASS:      case OP_XCLASS:
3590      code += GET(code, 1);      code += GET(code, 1);
3591      break;      break;
3592    #endif
3593    
3594      case OP_MARK:      case OP_MARK:
3595      case OP_PRUNE_ARG:      case OP_PRUNE_ARG:
# Line 4201  for (;; ptr++) Line 4453  for (;; ptr++)
4453        }        }
4454      }      }
4455    
   /* Fill in length of a previous callout, except when the next thing is  
   a quantifier. */  
   
4456    is_quantifier =    is_quantifier =
4457      c == CHAR_ASTERISK || c == CHAR_PLUS || c == CHAR_QUESTION_MARK ||      c == CHAR_ASTERISK || c == CHAR_PLUS || c == CHAR_QUESTION_MARK ||
4458      (c == CHAR_LEFT_CURLY_BRACKET && is_counted_repeat(ptr+1));      (c == CHAR_LEFT_CURLY_BRACKET && is_counted_repeat(ptr+1));
4459    
4460    if (!is_quantifier && previous_callout != NULL &&    /* Fill in length of a previous callout, except when the next thing is a
4461      quantifier or when processing a property substitution string in UCP mode. */
4462    
4463      if (!is_quantifier && previous_callout != NULL && nestptr == NULL &&
4464         after_manual_callout-- <= 0)         after_manual_callout-- <= 0)
4465      {      {
4466      if (lengthptr == NULL)      /* Don't attempt in pre-compile phase */      if (lengthptr == NULL)      /* Don't attempt in pre-compile phase */
# Line 4239  for (;; ptr++) Line 4491  for (;; ptr++)
4491        }        }
4492      }      }
4493    
4494    /* No auto callout for quantifiers. */    /* No auto callout for quantifiers, or while processing property strings that
4495      are substituted for \w etc in UCP mode. */
4496    
4497    if ((options & PCRE_AUTO_CALLOUT) != 0 && !is_quantifier)    if ((options & PCRE_AUTO_CALLOUT) != 0 && !is_quantifier && nestptr == NULL)
4498      {      {
4499      previous_callout = code;      previous_callout = code;
4500      code = auto_callout(code, ptr, cd);      code = auto_callout(code, ptr, cd);
# Line 4631  for (;; ptr++) Line 4884  for (;; ptr++)
4884              5.18. Before PCRE 8.34, we had to preserve the VT bit if it was              5.18. Before PCRE 8.34, we had to preserve the VT bit if it was
4885              previously set by something earlier in the character class.              previously set by something earlier in the character class.
4886              Luckily, the value of CHAR_VT is 0x0b in both ASCII and EBCDIC, so              Luckily, the value of CHAR_VT is 0x0b in both ASCII and EBCDIC, so
4887              we could just adjust the appropriate bit. From PCRE 8.34 we no              we could just adjust the appropriate bit. From PCRE 8.34 we no
4888              longer treat \s and \S specially. */              longer treat \s and \S specially. */
4889    
4890              case ESC_s:              case ESC_s:
# Line 5729  for (;; ptr++) Line 5982  for (;; ptr++)
5982        goto FAILED;        goto FAILED;
5983        }        }
5984    
5985      /* If the character following a repeat is '+', or if certain optimization      /* If the character following a repeat is '+', possessive_quantifier is
5986      tests above succeeded, possessive_quantifier is TRUE. For some opcodes,      TRUE. For some opcodes, there are special alternative opcodes for this
5987      there are special alternative opcodes for this case. For anything else, we      case. For anything else, we wrap the entire repeated item inside OP_ONCE
5988      wrap the entire repeated item inside OP_ONCE brackets. Logically, the '+'      brackets. Logically, the '+' notation is just syntactic sugar, taken from
5989      notation is just syntactic sugar, taken from Sun's Java package, but the      Sun's Java package, but the special opcodes can optimize it.
     special opcodes can optimize it.  
5990    
5991      Some (but not all) possessively repeated subpatterns have already been      Some (but not all) possessively repeated subpatterns have already been
5992      completely handled in the code just above. For them, possessive_quantifier      completely handled in the code just above. For them, possessive_quantifier
5993      is always FALSE at this stage.      is always FALSE at this stage. Note that the repeated item starts at
5994        tempcode, not at previous, which might be the first part of a string whose
5995      Note that the repeated item starts at tempcode, not at previous, which      (former) last char we repeated. */
     might be the first part of a string whose (former) last char we repeated.  
   
     Possessifying an 'exact' quantifier has no effect, so we can ignore it. But  
     an 'upto' may follow. We skip over an 'exact' item, and then test the  
     length of what remains before proceeding. */  
5996    
5997      if (possessive_quantifier)      if (possessive_quantifier)
5998        {        {
5999        int len;        int len;
6000    
6001        if (*tempcode == OP_TYPEEXACT)        /* Possessifying an EXACT quantifier has no effect, so we can ignore it.
6002          However, QUERY, STAR, or UPTO may follow (for quantifiers such as {5,6},
6003          {5,}, or {5,10}). We skip over an EXACT item; if the length of what
6004          remains is greater than zero, there's a further opcode that can be
6005          handled. If not, do nothing, leaving the EXACT alone. */
6006    
6007          switch(*tempcode)
6008            {
6009            case OP_TYPEEXACT:
6010          tempcode += PRIV(OP_lengths)[*tempcode] +          tempcode += PRIV(OP_lengths)[*tempcode] +
6011            ((tempcode[1 + IMM2_SIZE] == OP_PROP            ((tempcode[1 + IMM2_SIZE] == OP_PROP
6012            || tempcode[1 + IMM2_SIZE] == OP_NOTPROP)? 2 : 0);            || tempcode[1 + IMM2_SIZE] == OP_NOTPROP)? 2 : 0);
6013            break;
6014    
6015        else if (*tempcode == OP_EXACT || *tempcode == OP_NOTEXACT)          /* CHAR opcodes are used for exacts whose count is 1. */
6016          {  
6017            case OP_CHAR:
6018            case OP_CHARI:
6019            case OP_NOT:
6020            case OP_NOTI:
6021            case OP_EXACT:
6022            case OP_EXACTI:
6023            case OP_NOTEXACT:
6024            case OP_NOTEXACTI:
6025          tempcode += PRIV(OP_lengths)[*tempcode];          tempcode += PRIV(OP_lengths)[*tempcode];
6026  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
6027          if (utf && HAS_EXTRALEN(tempcode[-1]))          if (utf && HAS_EXTRALEN(tempcode[-1]))
6028            tempcode += GET_EXTRALEN(tempcode[-1]);            tempcode += GET_EXTRALEN(tempcode[-1]);
6029  #endif  #endif
6030            break;
6031    
6032            /* For the class opcodes, the repeat operator appears at the end;
6033            adjust tempcode to point to it. */
6034    
6035            case OP_CLASS:
6036            case OP_NCLASS:
6037            tempcode += 1 + 32/sizeof(pcre_uchar);
6038            break;
6039    
6040    #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
6041            case OP_XCLASS:
6042            tempcode += GET(tempcode, 1);
6043            break;
6044    #endif
6045          }          }
6046    
6047          /* If tempcode is equal to code (which points to the end of the repeated
6048          item), it means we have skipped an EXACT item but there is no following
6049          QUERY, STAR, or UPTO; the value of len will be 0, and we do nothing. In
6050          all other cases, tempcode will be pointing to the repeat opcode, and will
6051          be less than code, so the value of len will be greater than 0. */
6052    
6053        len = (int)(code - tempcode);        len = (int)(code - tempcode);
6054          if (len > 0)
6055            {
6056            unsigned int repcode = *tempcode;
6057    
6058            /* There is a table for possessifying opcodes, all of which are less
6059            than OP_CALLOUT. A zero entry means there is no possessified version.
6060            */
6061    
6062            if (repcode < OP_CALLOUT && opcode_possessify[repcode] > 0)
6063              *tempcode = opcode_possessify[repcode];
6064    
6065            /* For opcode without a special possessified version, wrap the item in
6066            ONCE brackets. Because we are moving code along, we must ensure that any
6067            pending recursive references are updated. */
6068    
6069            else
6070              {
6071              *code = OP_END;
6072              adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, save_hwm);
6073              memmove(tempcode + 1 + LINK_SIZE, tempcode, IN_UCHARS(len));
6074              code += 1 + LINK_SIZE;
6075              len += 1 + LINK_SIZE;
6076              tempcode[0] = OP_ONCE;
6077              *code++ = OP_KET;
6078              PUTINC(code, 0, len);
6079              PUT(tempcode, 1, len);
6080              }
6081            }
6082    
6083    #ifdef NEVER
6084        if (len > 0) switch (*tempcode)        if (len > 0) switch (*tempcode)
6085          {          {
6086          case OP_STAR:  *tempcode = OP_POSSTAR; break;          case OP_STAR:  *tempcode = OP_POSSTAR; break;
# Line 5793  for (;; ptr++) Line 6108  for (;; ptr++)
6108          case OP_TYPEQUERY: *tempcode = OP_TYPEPOSQUERY; break;          case OP_TYPEQUERY: *tempcode = OP_TYPEPOSQUERY; break;
6109          case OP_TYPEUPTO:  *tempcode = OP_TYPEPOSUPTO; break;          case OP_TYPEUPTO:  *tempcode = OP_TYPEPOSUPTO; break;
6110    
6111            case OP_CRSTAR:   *tempcode = OP_CRPOSSTAR; break;
6112            case OP_CRPLUS:   *tempcode = OP_CRPOSPLUS; break;
6113            case OP_CRQUERY:  *tempcode = OP_CRPOSQUERY; break;
6114            case OP_CRRANGE:  *tempcode = OP_CRPOSRANGE; break;
6115    
6116          /* Because we are moving code along, we must ensure that any          /* Because we are moving code along, we must ensure that any
6117          pending recursive references are updated. */          pending recursive references are updated. */
6118    
# Line 5808  for (;; ptr++) Line 6128  for (;; ptr++)
6128          PUT(tempcode, 1, len);          PUT(tempcode, 1, len);
6129          break;          break;
6130          }          }
6131    #endif
6132        }        }
6133    
6134      /* In all case we no longer have a previous item. We also set the      /* In all case we no longer have a previous item. We also set the
# Line 6048  for (;; ptr++) Line 6369  for (;; ptr++)
6369    
6370          /* Check for a test for a named group's having been set, using the Perl          /* Check for a test for a named group's having been set, using the Perl
6371          syntax (?(<name>) or (?('name'), and also allow for the original PCRE          syntax (?(<name>) or (?('name'), and also allow for the original PCRE
6372          syntax of (?(name) or for (?(+n), (?(-n), and just (?(n). As names may          syntax of (?(name) or for (?(+n), (?(-n), and just (?(n). As names may
6373          consist entirely of digits, there is scope for ambiguity. */          consist entirely of digits, there is scope for ambiguity. */
6374    
6375          else if (ptr[1] == CHAR_LESS_THAN_SIGN)          else if (ptr[1] == CHAR_LESS_THAN_SIGN)
# Line 6066  for (;; ptr++) Line 6387  for (;; ptr++)
6387            terminator = CHAR_NULL;            terminator = CHAR_NULL;
6388            if (ptr[1] == CHAR_MINUS || ptr[1] == CHAR_PLUS) refsign = *(++ptr);            if (ptr[1] == CHAR_MINUS || ptr[1] == CHAR_PLUS) refsign = *(++ptr);
6389            }            }
6390    
6391          /* When a name is one of a number of duplicates, a different opcode is          /* When a name is one of a number of duplicates, a different opcode is
6392          used and it needs more memory. Unfortunately we cannot tell whether a          used and it needs more memory. Unfortunately we cannot tell whether a
6393          name is a duplicate in the first pass, so we have to allow for more          name is a duplicate in the first pass, so we have to allow for more
6394          memory except when we know it is a relative numerical reference. */          memory except when we know it is a relative numerical reference. */
6395    
6396          if (refsign < 0 && lengthptr != NULL) *lengthptr += IMM2_SIZE;          if (refsign < 0 && lengthptr != NULL) *lengthptr += IMM2_SIZE;
6397    
6398          /* We now expect to read a name (possibly all digits); any thing else          /* We now expect to read a name (possibly all digits); any thing else
# Line 6095  for (;; ptr++) Line 6416  for (;; ptr++)
6416          namelen = (int)(ptr - name);          namelen = (int)(ptr - name);
6417    
6418          /* Check the terminator */          /* Check the terminator */
6419    
6420          if ((terminator > 0 && *ptr++ != (pcre_uchar)terminator) ||          if ((terminator > 0 && *ptr++ != (pcre_uchar)terminator) ||
6421              *ptr++ != CHAR_RIGHT_PARENTHESIS)              *ptr++ != CHAR_RIGHT_PARENTHESIS)
6422            {            {
# Line 6132  for (;; ptr++) Line 6453  for (;; ptr++)
6453    
6454          /* Otherwise (did not start with "+" or "-"), start by looking for the          /* Otherwise (did not start with "+" or "-"), start by looking for the
6455          name. */          name. */
6456    
6457          slot = cd->name_table;          slot = cd->name_table;
6458          for (i = 0; i < cd->names_found; i++)          for (i = 0; i < cd->names_found; i++)
6459            {            {
# Line 6140  for (;; ptr++) Line 6461  for (;; ptr++)
6461            slot += cd->name_entry_size;            slot += cd->name_entry_size;
6462            }            }
6463    
6464          /* Found the named subpattern. If the name is duplicated, add one to          /* Found the named subpattern. If the name is duplicated, add one to
6465          the opcode to change CREF/RREF into DNCREF/DNRREF and insert          the opcode to change CREF/RREF into DNCREF/DNRREF and insert
6466          appropriate data values. Otherwise, just insert the unique subpattern          appropriate data values. Otherwise, just insert the unique subpattern
6467          number. */          number. */
6468    
6469          if (i < cd->names_found)          if (i < cd->names_found)
6470            {            {
6471            int offset = i++;            int offset = i++;
6472            int count = 1;            int count = 1;
6473            recno = GET2(slot, 0);   /* Number from first found */            recno = GET2(slot, 0);   /* Number from first found */
6474            for (; i < cd->names_found; i++)            for (; i < cd->names_found; i++)
6475              {              {
6476              slot += cd->name_entry_size;              slot += cd->name_entry_size;
6477              if (STRNCMP_UC_UC(name, slot+IMM2_SIZE, namelen) != 0) break;              if (STRNCMP_UC_UC(name, slot+IMM2_SIZE, namelen) != 0) break;
6478              count++;              count++;
6479              }              }
6480            if (count > 1)            if (count > 1)
6481              {              {
6482              PUT2(code, 2+LINK_SIZE, offset);              PUT2(code, 2+LINK_SIZE, offset);
6483              PUT2(code, 2+LINK_SIZE+IMM2_SIZE, count);              PUT2(code, 2+LINK_SIZE+IMM2_SIZE, count);
6484              skipbytes += IMM2_SIZE;              skipbytes += IMM2_SIZE;
6485              code[1+LINK_SIZE]++;              code[1+LINK_SIZE]++;
6486              }              }
6487            else  /* Not a duplicated name */            else  /* Not a duplicated name */
6488              {              {
6489              PUT2(code, 2+LINK_SIZE, recno);              PUT2(code, 2+LINK_SIZE, recno);
6490              }              }
6491            }            }
6492    
6493          /* If terminator == CHAR_NULL it means that the name followed directly          /* If terminator == CHAR_NULL it means that the name followed directly
# Line 8863  return (pcre32 *)re; Line 9184  return (pcre32 *)re;
9184  }  }
9185    
9186  /* End of pcre_compile.c */  /* End of pcre_compile.c */
9187    

Legend:
Removed from v.1365  
changed lines
  Added in v.1381

  ViewVC Help
Powered by ViewVC 1.1.5