/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1363 by ph10, Tue Oct 1 16:54:40 2013 UTC revision 1372 by ph10, Fri Oct 11 17:05:19 2013 UTC
# Line 462  static const char error_texts[] = Line 462  static const char error_texts[] =
462    "POSIX collating elements are not supported\0"    "POSIX collating elements are not supported\0"
463    "this version of PCRE is compiled without UTF support\0"    "this version of PCRE is compiled without UTF support\0"
464    "spare error\0"  /** DEAD **/    "spare error\0"  /** DEAD **/
465    "character value in \\x{...} sequence is too large\0"    "character value in \\x{} or \\o{} is too large\0"
466    /* 35 */    /* 35 */
467    "invalid condition (?(0)\0"    "invalid condition (?(0)\0"
468    "\\C not allowed in lookbehind assertion\0"    "\\C not allowed in lookbehind assertion\0"
# Line 516  static const char error_texts[] = Line 516  static const char error_texts[] =
516    "character value in \\u.... sequence is too large\0"    "character value in \\u.... sequence is too large\0"
517    "invalid UTF-32 string\0"    "invalid UTF-32 string\0"
518    "setting UTF is disabled by the application\0"    "setting UTF is disabled by the application\0"
519      "non-hex character in \\x{} (closing brace missing?)\0"
520      /* 80 */
521      "non-octal character in \\o{} (closing brace missing?)\0"
522      "missing opening brace after \\o\0"
523    ;    ;
524    
525  /* Table to identify digits and hex digits. This is used when compiling  /* Table to identify digits and hex digits. This is used when compiling
# Line 879  return (*p == CHAR_RIGHT_CURLY_BRACKET); Line 883  return (*p == CHAR_RIGHT_CURLY_BRACKET);
883  *************************************************/  *************************************************/
884    
885  /* This function is called when a \ has been encountered. It either returns a  /* This function is called when a \ has been encountered. It either returns a
886  positive value for a simple escape such as \n, or 0 for a data character  positive value for a simple escape such as \n, or 0 for a data character which
887  which will be placed in chptr. A backreference to group n is returned as  will be placed in chptr. A backreference to group n is returned as negative n.
888  negative n. When UTF-8 is enabled, a positive value greater than 255 may  When UTF-8 is enabled, a positive value greater than 255 may be returned in
889  be returned in chptr.  chptr. On entry, ptr is pointing at the \. On exit, it is on the final
890  On entry,ptr is pointing at the \. On exit, it is on the final character of the  character of the escape sequence.
 escape sequence.  
891    
892  Arguments:  Arguments:
893    ptrptr         points to the pattern position pointer    ptrptr         points to the pattern position pointer
894    chptr          points to the data character    chptr          points to a returned data character
895    errorcodeptr   points to the errorcode variable    errorcodeptr   points to the errorcode variable
896    bracount       number of previous extracting brackets    bracount       number of previous extracting brackets
897    options        the options bits    options        the options bits
# Line 1092  else Line 1095  else
1095      break;      break;
1096    
1097      /* The handling of escape sequences consisting of a string of digits      /* The handling of escape sequences consisting of a string of digits
1098      starting with one that is not zero is not straightforward. By experiment,      starting with one that is not zero is not straightforward. Perl has changed
1099      the way Perl works seems to be as follows:      over the years. Nowadays \g{} for backreferences and \o{} for octal are
1100        recommended to avoid the ambiguities in the old syntax.
1101    
1102      Outside a character class, the digits are read as a decimal number. If the      Outside a character class, the digits are read as a decimal number. If the
1103      number is less than 10, or if there are that many previous extracting      number is less than 8 (used to be 10), or if there are that many previous
1104      left brackets, then it is a back reference. Otherwise, up to three octal      extracting left brackets, then it is a back reference. Otherwise, up to
1105      digits are read to form an escaped byte. Thus \123 is likely to be octal      three octal digits are read to form an escaped byte. Thus \123 is likely to
1106      123 (cf \0123, which is octal 012 followed by the literal 3). If the octal      be octal 123 (cf \0123, which is octal 012 followed by the literal 3). If
1107      value is greater than 377, the least significant 8 bits are taken. Inside a      the octal value is greater than 377, the least significant 8 bits are
1108      character class, \ followed by a digit is always an octal number. */      taken. \8 and \9 are treated as the literal characters 8 and 9.
1109    
1110        Inside a character class, \ followed by a digit is always either a literal
1111        8 or 9 or an octal number. */
1112    
1113      case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4: case CHAR_5:      case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4: case CHAR_5:
1114      case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9:      case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9:
# Line 1128  else Line 1135  else
1135          *errorcodeptr = ERR61;          *errorcodeptr = ERR61;
1136          break;          break;
1137          }          }
1138        if (s < 10 || s <= bracount)        if (s < 8 || s <= bracount)  /* Check for back reference */
1139          {          {
1140          escape = -s;          escape = -s;
1141          break;          break;
# Line 1136  else Line 1143  else
1143        ptr = oldptr;      /* Put the pointer back and fall through */        ptr = oldptr;      /* Put the pointer back and fall through */
1144        }        }
1145    
1146      /* Handle an octal number following \. If the first digit is 8 or 9, Perl      /* Handle a digit following \ when the number is not a back reference. If
1147      generates a binary zero byte and treats the digit as a following literal.      the first digit is 8 or 9, Perl used to generate a binary zero byte and
1148      Thus we have to pull back the pointer by one. */      then treat the digit as a following literal. At least by Perl 5.18 this
1149        changed so as not to insert the binary zero. */
1150      if ((c = *ptr) >= CHAR_8)  
1151        {      if ((c = *ptr) >= CHAR_8) break;
1152        ptr--;  
1153        c = 0;      /* Fall through with a digit less than 8 */
       break;  
       }  
1154    
1155      /* \0 always starts an octal number, but we may drop through to here with a      /* \0 always starts an octal number, but we may drop through to here with a
1156      larger first octal digit. The original code used just to take the least      larger first octal digit. The original code used just to take the least
# Line 1161  else Line 1166  else
1166      if (!utf && c > 0xff) *errorcodeptr = ERR51;      if (!utf && c > 0xff) *errorcodeptr = ERR51;
1167  #endif  #endif
1168      break;      break;
1169    
1170        /* \o is a relatively new Perl feature, supporting a more general way of
1171        specifying character codes in octal. The only supported form is \o{ddd}. */
1172    
1173        case CHAR_o:
1174        if (ptr[1] != CHAR_LEFT_CURLY_BRACKET) *errorcodeptr = ERR81; else
1175          {
1176          ptr += 2;
1177          c = 0;
1178          overflow = FALSE;
1179          while (*ptr >= CHAR_0 && *ptr <= CHAR_7)
1180            {
1181            register pcre_uint32 cc = *ptr++;
1182            if (c == 0 && cc == CHAR_0) continue;     /* Leading zeroes */
1183    #ifdef COMPILE_PCRE32
1184            if (c >= 0x10000000l) { overflow = TRUE; break; }
1185    #endif
1186            c = (c << 3) + cc - CHAR_0 ;
1187    #if defined COMPILE_PCRE8
1188            if (c > (utf ? 0x10ffffU : 0xffU)) { overflow = TRUE; break; }
1189    #elif defined COMPILE_PCRE16
1190            if (c > (utf ? 0x10ffffU : 0xffffU)) { overflow = TRUE; break; }
1191    #elif defined COMPILE_PCRE32
1192            if (utf && c > 0x10ffffU) { overflow = TRUE; break; }
1193    #endif
1194            }
1195          if (overflow)
1196            {
1197            while (*ptr >= CHAR_0 && *ptr <= CHAR_7) ptr++;
1198            *errorcodeptr = ERR34;
1199            }
1200          else if (*ptr == CHAR_RIGHT_CURLY_BRACKET)
1201            {
1202            if (utf && c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73;
1203            }
1204          else *errorcodeptr = ERR80;
1205          }
1206        break;
1207    
1208      /* \x is complicated. \x{ddd} is a character number which can be greater      /* \x is complicated. In JavaScript, \x must be followed by two hexadecimal
1209      than 0xff in utf or non-8bit mode, but only if the ddd are hex digits.      numbers. Otherwise it is a lowercase x letter. */
     If not, { is treated as a data character. */  
1210    
1211      case CHAR_x:      case CHAR_x:
1212      if ((options & PCRE_JAVASCRIPT_COMPAT) != 0)      if ((options & PCRE_JAVASCRIPT_COMPAT) != 0)
1213        {        {
       /* In JavaScript, \x must be followed by two hexadecimal numbers.  
       Otherwise it is a lowercase x letter. */  
1214        if (MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0        if (MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0
1215          && MAX_255(ptr[2]) && (digitab[ptr[2]] & ctype_xdigit) != 0)          && MAX_255(ptr[2]) && (digitab[ptr[2]] & ctype_xdigit) != 0)
1216          {          {
# Line 1187  else Line 1227  else
1227  #endif  #endif
1228            }            }
1229          }          }
1230        break;        }    /* End JavaScript handling */
1231        }  
1232        /* Handle \x in Perl's style. \x{ddd} is a character number which can be
1233      if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)      greater than 0xff in utf or non-8bit mode, but only if the ddd are hex
1234        {      digits. If not, { used to be treated as a data character. However, Perl
1235        const pcre_uchar *pt = ptr + 2;      seems to read hex digits up to the first non-such, and ignore the rest, so
1236        that, for example \x{zz} matches a binary zero. This seems crazy, so PCRE
1237        c = 0;      now gives an error. */
1238        overflow = FALSE;  
1239        while (MAX_255(*pt) && (digitab[*pt] & ctype_xdigit) != 0)      else
1240          {
1241          if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)
1242          {          {
1243          register pcre_uint32 cc = *pt++;          ptr += 2;
1244          if (c == 0 && cc == CHAR_0) continue;     /* Leading zeroes */          c = 0;
1245            overflow = FALSE;
1246            while (MAX_255(*ptr) && (digitab[*ptr] & ctype_xdigit) != 0)
1247              {
1248              register pcre_uint32 cc = *ptr++;
1249              if (c == 0 && cc == CHAR_0) continue;     /* Leading zeroes */
1250    
1251  #ifdef COMPILE_PCRE32  #ifdef COMPILE_PCRE32
1252          if (c >= 0x10000000l) { overflow = TRUE; break; }            if (c >= 0x10000000l) { overflow = TRUE; break; }
1253  #endif  #endif
1254    
1255  #ifndef EBCDIC  /* ASCII/UTF-8 coding */  #ifndef EBCDIC  /* ASCII/UTF-8 coding */
1256          if (cc >= CHAR_a) cc -= 32;               /* Convert to upper case */            if (cc >= CHAR_a) cc -= 32;               /* Convert to upper case */
1257          c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));            c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
1258  #else           /* EBCDIC coding */  #else           /* EBCDIC coding */
1259          if (cc >= CHAR_a && cc <= CHAR_z) cc += 64;  /* Convert to upper case */            if (cc >= CHAR_a && cc <= CHAR_z) cc += 64;  /* Convert to upper case */
1260          c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));            c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
1261  #endif  #endif
1262    
1263  #if defined COMPILE_PCRE8  #if defined COMPILE_PCRE8
1264          if (c > (utf ? 0x10ffffU : 0xffU)) { overflow = TRUE; break; }            if (c > (utf ? 0x10ffffU : 0xffU)) { overflow = TRUE; break; }
1265  #elif defined COMPILE_PCRE16  #elif defined COMPILE_PCRE16
1266          if (c > (utf ? 0x10ffffU : 0xffffU)) { overflow = TRUE; break; }            if (c > (utf ? 0x10ffffU : 0xffffU)) { overflow = TRUE; break; }
1267  #elif defined COMPILE_PCRE32  #elif defined COMPILE_PCRE32
1268          if (utf && c > 0x10ffffU) { overflow = TRUE; break; }            if (utf && c > 0x10ffffU) { overflow = TRUE; break; }
1269  #endif  #endif
1270          }            }
1271    
1272        if (overflow)          if (overflow)
1273          {            {
1274          while (MAX_255(*pt) && (digitab[*pt] & ctype_xdigit) != 0) pt++;            while (MAX_255(*ptr) && (digitab[*ptr] & ctype_xdigit) != 0) ptr++;
1275          *errorcodeptr = ERR34;            *errorcodeptr = ERR34;
1276          }            }
1277    
1278        if (*pt == CHAR_RIGHT_CURLY_BRACKET)          else if (*ptr == CHAR_RIGHT_CURLY_BRACKET)
1279          {            {
1280          if (utf && c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73;            if (utf && c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73;
1281          ptr = pt;            }
1282          break;  
1283          }          /* If the sequence of hex digits does not end with '}', give an error.
1284            We used just to recognize this construct and fall through to the normal
1285        /* If the sequence of hex digits does not end with '}', then we don't          \x handling, but nowadays Perl gives an error, which seems much more
1286        recognize this construct; fall through to the normal \x handling. */          sensible, so we do too. */
1287        }  
1288            else *errorcodeptr = ERR79;
1289            }   /* End of \x{} processing */
1290    
1291      /* Read just a single-byte hex-defined char */        /* Read a single-byte hex-defined char (up to two hex digits after \x) */
1292    
1293      c = 0;        else
1294      while (i++ < 2 && MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0)          {
1295        {          c = 0;
1296        pcre_uint32 cc;                          /* Some compilers don't like */          while (i++ < 2 && MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0)
1297        cc = *(++ptr);                           /* ++ in initializers */            {
1298              pcre_uint32 cc;                          /* Some compilers don't like */
1299              cc = *(++ptr);                           /* ++ in initializers */
1300  #ifndef EBCDIC  /* ASCII/UTF-8 coding */  #ifndef EBCDIC  /* ASCII/UTF-8 coding */
1301        if (cc >= CHAR_a) cc -= 32;              /* Convert to upper case */            if (cc >= CHAR_a) cc -= 32;              /* Convert to upper case */
1302        c = c * 16 + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));            c = c * 16 + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
1303  #else           /* EBCDIC coding */  #else           /* EBCDIC coding */
1304        if (cc <= CHAR_z) cc += 64;              /* Convert to upper case */            if (cc <= CHAR_z) cc += 64;              /* Convert to upper case */
1305        c = c * 16 + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));            c = c * 16 + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
1306  #endif  #endif
1307        }            }
1308            }     /* End of \xdd handling */
1309          }       /* End of Perl-style \x handling */
1310      break;      break;
1311    
1312      /* For \c, a following letter is upper-cased; then the 0x40 bit is flipped.      /* For \c, a following letter is upper-cased; then the 0x40 bit is flipped.
# Line 1524  for (;;) Line 1577  for (;;)
1577    
1578      case OP_CALLOUT:      case OP_CALLOUT:
1579      case OP_CREF:      case OP_CREF:
1580      case OP_NCREF:      case OP_DNCREF:
1581      case OP_RREF:      case OP_RREF:
1582      case OP_NRREF:      case OP_DNRREF:
1583      case OP_DEF:      case OP_DEF:
1584      code += PRIV(OP_lengths)[*code];      code += PRIV(OP_lengths)[*code];
1585      break;      break;
# Line 1663  for (;;) Line 1716  for (;;)
1716      case OP_COMMIT:      case OP_COMMIT:
1717      case OP_CREF:      case OP_CREF:
1718      case OP_DEF:      case OP_DEF:
1719        case OP_DNCREF:
1720        case OP_DNRREF:
1721      case OP_DOLL:      case OP_DOLL:
1722      case OP_DOLLM:      case OP_DOLLM:
1723      case OP_EOD:      case OP_EOD:
1724      case OP_EODN:      case OP_EODN:
1725      case OP_FAIL:      case OP_FAIL:
     case OP_NCREF:  
     case OP_NRREF:  
1726      case OP_NOT_WORD_BOUNDARY:      case OP_NOT_WORD_BOUNDARY:
1727      case OP_PRUNE:      case OP_PRUNE:
1728      case OP_REVERSE:      case OP_REVERSE:
# Line 2650  switch(ptype) Line 2703  switch(ptype)
2703    return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||    return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2704            PRIV(ucp_gentype)[prop->chartype] == ucp_N) == negated;            PRIV(ucp_gentype)[prop->chartype] == ucp_N) == negated;
2705    
2706      /* Perl space used to exclude VT, but from Perl 5.18 it is included, which
2707      means that Perl space and POSIX space are now identical. PCRE was changed
2708      at release 8.34. */
2709    
2710    case PT_SPACE:    /* Perl space */    case PT_SPACE:    /* Perl space */
   return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||  
           c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)  
           == negated;  
   
2711    case PT_PXSPACE:  /* POSIX space */    case PT_PXSPACE:  /* POSIX space */
2712    return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||    return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
2713            c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||            c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
# Line 4201  for (;; ptr++) Line 4254  for (;; ptr++)
4254        }        }
4255      }      }
4256    
   /* Fill in length of a previous callout, except when the next thing is  
   a quantifier. */  
   
4257    is_quantifier =    is_quantifier =
4258      c == CHAR_ASTERISK || c == CHAR_PLUS || c == CHAR_QUESTION_MARK ||      c == CHAR_ASTERISK || c == CHAR_PLUS || c == CHAR_QUESTION_MARK ||
4259      (c == CHAR_LEFT_CURLY_BRACKET && is_counted_repeat(ptr+1));      (c == CHAR_LEFT_CURLY_BRACKET && is_counted_repeat(ptr+1));
4260    
4261    if (!is_quantifier && previous_callout != NULL &&    /* Fill in length of a previous callout, except when the next thing is a
4262      quantifier or when processing a property substitution string in UCP mode. */
4263    
4264      if (!is_quantifier && previous_callout != NULL && nestptr == NULL &&
4265         after_manual_callout-- <= 0)         after_manual_callout-- <= 0)
4266      {      {
4267      if (lengthptr == NULL)      /* Don't attempt in pre-compile phase */      if (lengthptr == NULL)      /* Don't attempt in pre-compile phase */
# Line 4239  for (;; ptr++) Line 4292  for (;; ptr++)
4292        }        }
4293      }      }
4294    
4295    /* No auto callout for quantifiers. */    /* No auto callout for quantifiers, or while processing property strings that
4296      are substituted for \w etc in UCP mode. */
4297    
4298    if ((options & PCRE_AUTO_CALLOUT) != 0 && !is_quantifier)    if ((options & PCRE_AUTO_CALLOUT) != 0 && !is_quantifier && nestptr == NULL)
4299      {      {
4300      previous_callout = code;      previous_callout = code;
4301      code = auto_callout(code, ptr, cd);      code = auto_callout(code, ptr, cd);
# Line 4627  for (;; ptr++) Line 4681  for (;; ptr++)
4681              for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_word];              for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_word];
4682              continue;              continue;
4683    
4684              /* Perl 5.004 onwards omits VT from \s, but we must preserve it              /* Perl 5.004 onwards omitted VT from \s, but restored it at Perl
4685              if it was previously set by something earlier in the character              5.18. Before PCRE 8.34, we had to preserve the VT bit if it was
4686              class. Luckily, the value of CHAR_VT is 0x0b in both ASCII and              previously set by something earlier in the character class.
4687              EBCDIC, so we lazily just adjust the appropriate bit. */              Luckily, the value of CHAR_VT is 0x0b in both ASCII and EBCDIC, so
4688                we could just adjust the appropriate bit. From PCRE 8.34 we no
4689                longer treat \s and \S specially. */
4690    
4691              case ESC_s:              case ESC_s:
4692              classbits[0] |= cbits[cbit_space];              for (c = 0; c < 32; c++) classbits[c] |= cbits[c+cbit_space];
             classbits[1] |= cbits[cbit_space+1] & ~0x08;  
             for (c = 2; c < 32; c++) classbits[c] |= cbits[c+cbit_space];  
4693              continue;              continue;
4694    
4695              case ESC_S:              case ESC_S:
4696              should_flip_negation = TRUE;              should_flip_negation = TRUE;
4697              for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_space];              for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_space];
             classbits[1] |= 0x08;    /* Perl 5.004 onwards omits VT from \s */  
4698              continue;              continue;
4699    
4700              /* The rest apply in both UCP and non-UCP cases. */              /* The rest apply in both UCP and non-UCP cases. */
# Line 6031  for (;; ptr++) Line 6084  for (;; ptr++)
6084                 tempptr[2] == CHAR_LESS_THAN_SIGN))                 tempptr[2] == CHAR_LESS_THAN_SIGN))
6085            break;            break;
6086    
6087          /* Most other conditions use OP_CREF (a couple change to OP_RREF          /* Other conditions use OP_CREF/OP_DNCREF/OP_RREF/OP_DNRREF, and all
6088          below), and all need to skip 1+IMM2_SIZE bytes at the start of the group. */          need to skip at least 1+IMM2_SIZE bytes at the start of the group. */
6089    
6090          code[1+LINK_SIZE] = OP_CREF;          code[1+LINK_SIZE] = OP_CREF;
6091          skipbytes = 1+IMM2_SIZE;          skipbytes = 1+IMM2_SIZE;
# Line 6048  for (;; ptr++) Line 6101  for (;; ptr++)
6101            }            }
6102    
6103          /* Check for a test for a named group's having been set, using the Perl          /* Check for a test for a named group's having been set, using the Perl
6104          syntax (?(<name>) or (?('name') */          syntax (?(<name>) or (?('name'), and also allow for the original PCRE
6105            syntax of (?(name) or for (?(+n), (?(-n), and just (?(n). As names may
6106            consist entirely of digits, there is scope for ambiguity. */
6107    
6108          else if (ptr[1] == CHAR_LESS_THAN_SIGN)          else if (ptr[1] == CHAR_LESS_THAN_SIGN)
6109            {            {
# Line 6065  for (;; ptr++) Line 6120  for (;; ptr++)
6120            terminator = CHAR_NULL;            terminator = CHAR_NULL;
6121            if (ptr[1] == CHAR_MINUS || ptr[1] == CHAR_PLUS) refsign = *(++ptr);            if (ptr[1] == CHAR_MINUS || ptr[1] == CHAR_PLUS) refsign = *(++ptr);
6122            }            }
6123    
6124            /* When a name is one of a number of duplicates, a different opcode is
6125            used and it needs more memory. Unfortunately we cannot tell whether a
6126            name is a duplicate in the first pass, so we have to allow for more
6127            memory except when we know it is a relative numerical reference. */
6128    
6129            if (refsign < 0 && lengthptr != NULL) *lengthptr += IMM2_SIZE;
6130    
6131          /* We now expect to read a name; any thing else is an error */          /* We now expect to read a name (possibly all digits); any thing else
6132            is an error. In the case of all digits, also get it as a number. */
6133    
6134          if (!MAX_255(ptr[1]) || (cd->ctypes[ptr[1]] & ctype_word) == 0)          if (!MAX_255(ptr[1]) || (cd->ctypes[ptr[1]] & ctype_word) == 0)
6135            {            {
# Line 6075  for (;; ptr++) Line 6138  for (;; ptr++)
6138            goto FAILED;            goto FAILED;
6139            }            }
6140    
         /* Read the name, but also get it as a number if it's all digits */  
   
6141          recno = 0;          recno = 0;
6142          name = ++ptr;          name = ++ptr;
6143          while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_word) != 0)          while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_word) != 0)
# Line 6087  for (;; ptr++) Line 6148  for (;; ptr++)
6148            }            }
6149          namelen = (int)(ptr - name);          namelen = (int)(ptr - name);
6150    
6151            /* Check the terminator */
6152    
6153          if ((terminator > 0 && *ptr++ != (pcre_uchar)terminator) ||          if ((terminator > 0 && *ptr++ != (pcre_uchar)terminator) ||
6154              *ptr++ != CHAR_RIGHT_PARENTHESIS)              *ptr++ != CHAR_RIGHT_PARENTHESIS)
6155            {            {
# Line 6122  for (;; ptr++) Line 6185  for (;; ptr++)
6185            }            }
6186    
6187          /* Otherwise (did not start with "+" or "-"), start by looking for the          /* Otherwise (did not start with "+" or "-"), start by looking for the
6188          name. If we find a name, add one to the opcode to change OP_CREF or          name. */
6189          OP_RREF into OP_NCREF or OP_NRREF. These behave exactly the same,  
         except they record that the reference was originally to a name. The  
         information is used to check duplicate names. */  
   
6190          slot = cd->name_table;          slot = cd->name_table;
6191          for (i = 0; i < cd->names_found; i++)          for (i = 0; i < cd->names_found; i++)
6192            {            {
# Line 6134  for (;; ptr++) Line 6194  for (;; ptr++)
6194            slot += cd->name_entry_size;            slot += cd->name_entry_size;
6195            }            }
6196    
6197          /* Found the named subpattern */          /* Found the named subpattern. If the name is duplicated, add one to
6198            the opcode to change CREF/RREF into DNCREF/DNRREF and insert
6199            appropriate data values. Otherwise, just insert the unique subpattern
6200            number. */
6201    
6202          if (i < cd->names_found)          if (i < cd->names_found)
6203            {            {
6204            recno = GET2(slot, 0);            int offset = i++;
6205            PUT2(code, 2+LINK_SIZE, recno);            int count = 1;
6206            code[1+LINK_SIZE]++;            recno = GET2(slot, 0);   /* Number from first found */
6207              for (; i < cd->names_found; i++)
6208                {
6209                slot += cd->name_entry_size;
6210                if (STRNCMP_UC_UC(name, slot+IMM2_SIZE, namelen) != 0) break;
6211                count++;
6212                }
6213              if (count > 1)
6214                {
6215                PUT2(code, 2+LINK_SIZE, offset);
6216                PUT2(code, 2+LINK_SIZE+IMM2_SIZE, count);
6217                skipbytes += IMM2_SIZE;
6218                code[1+LINK_SIZE]++;
6219                }
6220              else  /* Not a duplicated name */
6221                {
6222                PUT2(code, 2+LINK_SIZE, recno);
6223                }
6224            }            }
6225    
6226          /* If terminator == CHAR_NULL it means that the name followed directly          /* If terminator == CHAR_NULL it means that the name followed directly
# Line 7830  do { Line 7910  do {
7910       switch (*scode)       switch (*scode)
7911         {         {
7912         case OP_CREF:         case OP_CREF:
7913         case OP_NCREF:         case OP_DNCREF:
7914         case OP_RREF:         case OP_RREF:
7915         case OP_NRREF:         case OP_DNRREF:
7916         case OP_DEF:         case OP_DEF:
7917         return FALSE;         return FALSE;
7918    

Legend:
Removed from v.1363  
changed lines
  Added in v.1372

  ViewVC Help
Powered by ViewVC 1.1.5