/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1369 by ph10, Tue Oct 8 15:06:46 2013 UTC revision 1370 by ph10, Wed Oct 9 10:18:26 2013 UTC
# Line 462  static const char error_texts[] = Line 462  static const char error_texts[] =
462    "POSIX collating elements are not supported\0"    "POSIX collating elements are not supported\0"
463    "this version of PCRE is compiled without UTF support\0"    "this version of PCRE is compiled without UTF support\0"
464    "spare error\0"  /** DEAD **/    "spare error\0"  /** DEAD **/
465    "character value in \\x{...} sequence is too large\0"    "character value in \\x{} or \\o{} is too large\0"
466    /* 35 */    /* 35 */
467    "invalid condition (?(0)\0"    "invalid condition (?(0)\0"
468    "\\C not allowed in lookbehind assertion\0"    "\\C not allowed in lookbehind assertion\0"
# Line 516  static const char error_texts[] = Line 516  static const char error_texts[] =
516    "character value in \\u.... sequence is too large\0"    "character value in \\u.... sequence is too large\0"
517    "invalid UTF-32 string\0"    "invalid UTF-32 string\0"
518    "setting UTF is disabled by the application\0"    "setting UTF is disabled by the application\0"
519      "non-hex character in \\x{} (closing brace missing?)\0"
520      /* 80 */
521      "non-octal character in \\o{} (closing brace missing?)\0"
522      "missing opening brace after \\o\0"
523    ;    ;
524    
525  /* Table to identify digits and hex digits. This is used when compiling  /* Table to identify digits and hex digits. This is used when compiling
# Line 1162  else Line 1166  else
1166      if (!utf && c > 0xff) *errorcodeptr = ERR51;      if (!utf && c > 0xff) *errorcodeptr = ERR51;
1167  #endif  #endif
1168      break;      break;
1169    
1170        /* \o is a relatively new Perl feature, supporting a more general way of
1171        specifying character codes in octal. The only supported form is \o{ddd}. */
1172    
1173        case CHAR_o:
1174        if (ptr[1] != CHAR_LEFT_CURLY_BRACKET) *errorcodeptr = ERR81; else
1175          {
1176          ptr += 2;
1177          c = 0;
1178          overflow = FALSE;
1179          while (*ptr >= CHAR_0 && *ptr <= CHAR_7)
1180            {
1181            register pcre_uint32 cc = *ptr++;
1182            if (c == 0 && cc == CHAR_0) continue;     /* Leading zeroes */
1183    #ifdef COMPILE_PCRE32
1184            if (c >= 0x10000000l) { overflow = TRUE; break; }
1185    #endif
1186            c = (c << 3) + cc - CHAR_0 ;
1187    #if defined COMPILE_PCRE8
1188            if (c > (utf ? 0x10ffffU : 0xffU)) { overflow = TRUE; break; }
1189    #elif defined COMPILE_PCRE16
1190            if (c > (utf ? 0x10ffffU : 0xffffU)) { overflow = TRUE; break; }
1191    #elif defined COMPILE_PCRE32
1192            if (utf && c > 0x10ffffU) { overflow = TRUE; break; }
1193    #endif
1194            }
1195          if (overflow)
1196            {
1197            while (*ptr >= CHAR_0 && *ptr <= CHAR_7) ptr++;
1198            *errorcodeptr = ERR34;
1199            }
1200          else if (*ptr == CHAR_RIGHT_CURLY_BRACKET)
1201            {
1202            if (utf && c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73;
1203            }
1204          else *errorcodeptr = ERR80;
1205          }
1206        break;
1207    
1208      /* \x is complicated. \x{ddd} is a character number which can be greater      /* \x is complicated. In JavaScript, \x must be followed by two hexadecimal
1209      than 0xff in utf or non-8bit mode, but only if the ddd are hex digits.      numbers. Otherwise it is a lowercase x letter. */
     If not, { is treated as a data character. */  
1210    
1211      case CHAR_x:      case CHAR_x:
1212      if ((options & PCRE_JAVASCRIPT_COMPAT) != 0)      if ((options & PCRE_JAVASCRIPT_COMPAT) != 0)
1213        {        {
       /* In JavaScript, \x must be followed by two hexadecimal numbers.  
       Otherwise it is a lowercase x letter. */  
1214        if (MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0        if (MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0
1215          && MAX_255(ptr[2]) && (digitab[ptr[2]] & ctype_xdigit) != 0)          && MAX_255(ptr[2]) && (digitab[ptr[2]] & ctype_xdigit) != 0)
1216          {          {
# Line 1188  else Line 1227  else
1227  #endif  #endif
1228            }            }
1229          }          }
1230        break;        }    /* End JavaScript handling */
1231        }  
1232        /* Handle \x in Perl's style. \x{ddd} is a character number which can be
1233      if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)      greater than 0xff in utf or non-8bit mode, but only if the ddd are hex
1234        {      digits. If not, { used to be treated as a data character. However, Perl
1235        const pcre_uchar *pt = ptr + 2;      seems to read hex digits up to the first non-such, and ignore the rest, so
1236        that, for example \x{zz} matches a binary zero. This seems crazy, so PCRE
1237        c = 0;      now gives an error. */
1238        overflow = FALSE;  
1239        while (MAX_255(*pt) && (digitab[*pt] & ctype_xdigit) != 0)      else
1240          {
1241          if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)
1242          {          {
1243          register pcre_uint32 cc = *pt++;          ptr += 2;
1244          if (c == 0 && cc == CHAR_0) continue;     /* Leading zeroes */          c = 0;
1245            overflow = FALSE;
1246            while (MAX_255(*ptr) && (digitab[*ptr] & ctype_xdigit) != 0)
1247              {
1248              register pcre_uint32 cc = *ptr++;
1249              if (c == 0 && cc == CHAR_0) continue;     /* Leading zeroes */
1250    
1251  #ifdef COMPILE_PCRE32  #ifdef COMPILE_PCRE32
1252          if (c >= 0x10000000l) { overflow = TRUE; break; }            if (c >= 0x10000000l) { overflow = TRUE; break; }
1253  #endif  #endif
1254    
1255  #ifndef EBCDIC  /* ASCII/UTF-8 coding */  #ifndef EBCDIC  /* ASCII/UTF-8 coding */
1256          if (cc >= CHAR_a) cc -= 32;               /* Convert to upper case */            if (cc >= CHAR_a) cc -= 32;               /* Convert to upper case */
1257          c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));            c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
1258  #else           /* EBCDIC coding */  #else           /* EBCDIC coding */
1259          if (cc >= CHAR_a && cc <= CHAR_z) cc += 64;  /* Convert to upper case */            if (cc >= CHAR_a && cc <= CHAR_z) cc += 64;  /* Convert to upper case */
1260          c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));            c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
1261  #endif  #endif
1262    
1263  #if defined COMPILE_PCRE8  #if defined COMPILE_PCRE8
1264          if (c > (utf ? 0x10ffffU : 0xffU)) { overflow = TRUE; break; }            if (c > (utf ? 0x10ffffU : 0xffU)) { overflow = TRUE; break; }
1265  #elif defined COMPILE_PCRE16  #elif defined COMPILE_PCRE16
1266          if (c > (utf ? 0x10ffffU : 0xffffU)) { overflow = TRUE; break; }            if (c > (utf ? 0x10ffffU : 0xffffU)) { overflow = TRUE; break; }
1267  #elif defined COMPILE_PCRE32  #elif defined COMPILE_PCRE32
1268          if (utf && c > 0x10ffffU) { overflow = TRUE; break; }            if (utf && c > 0x10ffffU) { overflow = TRUE; break; }
1269  #endif  #endif
1270          }            }
1271    
1272        if (overflow)          if (overflow)
1273          {            {
1274          while (MAX_255(*pt) && (digitab[*pt] & ctype_xdigit) != 0) pt++;            while (MAX_255(*ptr) && (digitab[*ptr] & ctype_xdigit) != 0) ptr++;
1275          *errorcodeptr = ERR34;            *errorcodeptr = ERR34;
1276          }            }
1277    
1278        if (*pt == CHAR_RIGHT_CURLY_BRACKET)          else if (*ptr == CHAR_RIGHT_CURLY_BRACKET)
1279          {            {
1280          if (utf && c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73;            if (utf && c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73;
1281          ptr = pt;            }
1282          break;  
1283          }          /* If the sequence of hex digits does not end with '}', give an error.
1284            We used just to recognize this construct and fall through to the normal
1285        /* If the sequence of hex digits does not end with '}', then we don't          \x handling, but nowadays Perl gives an error, which seems much more
1286        recognize this construct; fall through to the normal \x handling. */          sensible, so we do too. */
1287        }  
1288            else *errorcodeptr = ERR79;
1289            }   /* End of \x{} processing */
1290    
1291      /* Read just a single-byte hex-defined char */        /* Read a single-byte hex-defined char (up to two hex digits after \x) */
1292    
1293      c = 0;        else
1294      while (i++ < 2 && MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0)          {
1295        {          c = 0;
1296        pcre_uint32 cc;                          /* Some compilers don't like */          while (i++ < 2 && MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0)
1297        cc = *(++ptr);                           /* ++ in initializers */            {
1298              pcre_uint32 cc;                          /* Some compilers don't like */
1299              cc = *(++ptr);                           /* ++ in initializers */
1300  #ifndef EBCDIC  /* ASCII/UTF-8 coding */  #ifndef EBCDIC  /* ASCII/UTF-8 coding */
1301        if (cc >= CHAR_a) cc -= 32;              /* Convert to upper case */            if (cc >= CHAR_a) cc -= 32;              /* Convert to upper case */
1302        c = c * 16 + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));            c = c * 16 + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
1303  #else           /* EBCDIC coding */  #else           /* EBCDIC coding */
1304        if (cc <= CHAR_z) cc += 64;              /* Convert to upper case */            if (cc <= CHAR_z) cc += 64;              /* Convert to upper case */
1305        c = c * 16 + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));            c = c * 16 + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
1306  #endif  #endif
1307        }            }
1308            }     /* End of \xdd handling */
1309          }       /* End of Perl-style \x handling */
1310      break;      break;
1311    
1312      /* For \c, a following letter is upper-cased; then the 0x40 bit is flipped.      /* For \c, a following letter is upper-cased; then the 0x40 bit is flipped.

Legend:
Removed from v.1369  
changed lines
  Added in v.1370

  ViewVC Help
Powered by ViewVC 1.1.5