/[pcre]/code/branches/pcre16/pcre_jit_compile.c
ViewVC logotype

Diff of /code/branches/pcre16/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 781 by zherczeg, Sat Dec 3 07:58:30 2011 UTC revision 782 by zherczeg, Sat Dec 3 23:58:37 2011 UTC
# Line 302  typedef struct compiler_common { Line 302  typedef struct compiler_common {
302  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
303    BOOL useucp;    BOOL useucp;
304  #endif  #endif
305    jump_list *utf8readchar;    jump_list *utfreadchar;
306    jump_list *utf8readtype8;  #ifdef COMPILE_PCRE8
307      jump_list *utfreadtype8;
308  #endif  #endif
309    #endif /* SUPPORT_UTF8 */
310  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
311    jump_list *getucd;    jump_list *getucd;
312  #endif  #endif
# Line 543  switch(*cc) Line 545  switch(*cc)
545    case OP_NOTPOSPLUSI:    case OP_NOTPOSPLUSI:
546    case OP_NOTPOSQUERYI:    case OP_NOTPOSQUERYI:
547    cc += 2;    cc += 2;
548  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
549    if (common->utf && cc[-1] >= 0xc0) cc += PRIV(utf8_table4)[cc[-1] & 0x3f];    if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
550  #endif  #endif
551    return cc;    return cc;
552    
# Line 565  switch(*cc) Line 567  switch(*cc)
567    case OP_NOTEXACTI:    case OP_NOTEXACTI:
568    case OP_NOTPOSUPTOI:    case OP_NOTPOSUPTOI:
569    cc += 2 + IMM2_SIZE;    cc += 2 + IMM2_SIZE;
570  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
571    if (common->utf && cc[-1] >= 0xc0) cc += PRIV(utf8_table4)[cc[-1] & 0x3f];    if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
572  #endif  #endif
573    return cc;    return cc;
574    
# Line 1285  return MAX_255(c) ? common->fcc[c] != c Line 1287  return MAX_255(c) ? common->fcc[c] != c
1287  static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)  static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
1288  {  {
1289  /* Returns with the othercase. */  /* Returns with the othercase. */
1290  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
1291  if (common->utf && c > 127)  if (common->utf && c > 127)
1292    {    {
1293  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 1302  static unsigned int char_get_othercase_b Line 1304  static unsigned int char_get_othercase_b
1304  {  {
1305  /* Detects if the character and its othercase has only 1 bit difference. */  /* Detects if the character and its othercase has only 1 bit difference. */
1306  unsigned int c, oc, bit;  unsigned int c, oc, bit;
1307  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF8 && defined COMPILE_PCRE8
1308  int n;  int n;
1309  #endif  #endif
1310    
1311  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
1312  if (common->utf)  if (common->utf)
1313    {    {
1314    GETCHAR(c, cc);    GETCHAR(c, cc);
# Line 1324  if (common->utf) Line 1326  if (common->utf)
1326  else  else
1327    {    {
1328    c = *cc;    c = *cc;
1329    oc = common->fcc[c];    oc = TABLE_GET(c, common->fcc, c);
1330    }    }
1331  #else  #else
1332  c = *cc;  c = *cc;
1333  oc = common->fcc[c];  oc = TABLE_GET(c, common->fcc, c);
1334  #endif  #endif
1335    
1336  SLJIT_ASSERT(c != oc);  SLJIT_ASSERT(c != oc);
# Line 1342  if (c <= 127 && bit == 0x20) Line 1344  if (c <= 127 && bit == 0x20)
1344  if (!ispowerof2(bit))  if (!ispowerof2(bit))
1345    return 0;    return 0;
1346    
1347    #ifdef COMPILE_PCRE8
1348    
1349  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1350  if (common->utf && c > 127)  if (common->utf && c > 127)
1351    {    {
1352    n = PRIV(utf8_table4)[*cc & 0x3f];    n = GET_EXTRALEN(*cc);
1353    while ((bit & 0x3f) == 0)    while ((bit & 0x3f) == 0)
1354      {      {
1355      n--;      n--;
# Line 1353  if (common->utf && c > 127) Line 1357  if (common->utf && c > 127)
1357      }      }
1358    return (n << 8) | bit;    return (n << 8) | bit;
1359    }    }
1360  #endif  #endif /* SUPPORT_UTF8 */
1361  return (0 << 8) | bit;  return (0 << 8) | bit;
1362    
1363    #else /* COMPILE_PCRE8 */
1364    
1365    #ifdef COMPILE_PCRE16
1366    #ifdef SUPPORT_UTF16
1367    if (common->utf && c > 65535)
1368      {
1369      if (bit >= (1 << 10))
1370        bit >>= 10;
1371      else
1372        return (bit <= 255) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
1373      }
1374    #endif /* SUPPORT_UTF16 */
1375    return (bit <= 255) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
1376    #endif /* COMPILE_PCRE16 */
1377    
1378    #endif /* COMPILE_PCRE8 */
1379  }  }
1380    
1381  static SLJIT_INLINE void check_input_end(compiler_common *common, jump_list **fallbacks)  static SLJIT_INLINE void check_input_end(compiler_common *common, jump_list **fallbacks)
# Line 1368  static void read_char(compiler_common *c Line 1389  static void read_char(compiler_common *c
1389  /* Reads the character into TMP1, updates STR_PTR.  /* Reads the character into TMP1, updates STR_PTR.
1390  Does not check STR_END. TMP2 Destroyed. */  Does not check STR_END. TMP2 Destroyed. */
1391  DEFINE_COMPILER;  DEFINE_COMPILER;
1392  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
1393  struct sljit_jump *jump;  struct sljit_jump *jump;
1394  #endif  #endif
1395    
1396  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1397  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
1398  if (common->utf)  if (common->utf)
1399    {    {
1400    #ifdef COMPILE_PCRE8
1401    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
1402    add_jump(compiler, &common->utf8readchar, JUMP(SLJIT_FAST_CALL));  #else
1403    #ifdef COMPILE_PCRE16
1404      jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
1405    #endif
1406    #endif /* COMPILE_PCRE8 */
1407      add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
1408    JUMPHERE(jump);    JUMPHERE(jump);
1409    }    }
1410  #endif  #endif
# Line 1389  static void peek_char(compiler_common *c Line 1416  static void peek_char(compiler_common *c
1416  /* Reads the character into TMP1, keeps STR_PTR.  /* Reads the character into TMP1, keeps STR_PTR.
1417  Does not check STR_END. TMP2 Destroyed. */  Does not check STR_END. TMP2 Destroyed. */
1418  DEFINE_COMPILER;  DEFINE_COMPILER;
1419  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
1420  struct sljit_jump *jump;  struct sljit_jump *jump;
1421  #endif  #endif
1422    
1423  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1424  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
1425  if (common->utf)  if (common->utf)
1426    {    {
1427    #ifdef COMPILE_PCRE8
1428    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
1429    add_jump(compiler, &common->utf8readchar, JUMP(SLJIT_FAST_CALL));  #else
1430    #ifdef COMPILE_PCRE16
1431      jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
1432    #endif
1433    #endif /* COMPILE_PCRE8 */
1434      add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
1435    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
1436    JUMPHERE(jump);    JUMPHERE(jump);
1437    }    }
# Line 1409  static void read_char8_type(compiler_com Line 1442  static void read_char8_type(compiler_com
1442  {  {
1443  /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */  /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
1444  DEFINE_COMPILER;  DEFINE_COMPILER;
1445  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || defined COMPILE_PCRE16
1446  struct sljit_jump *jump;  struct sljit_jump *jump;
1447  #endif  #endif
1448    
1449  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
1450  if (common->utf)  if (common->utf)
1451    {    {
1452    OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
1453    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1454    #ifdef COMPILE_PCRE8
1455    /* This can be an extra read in some situations, but hopefully    /* This can be an extra read in some situations, but hopefully
1456    it is a clever early read in most cases. */    it is needed in most cases. */
1457    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1458    jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);    jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
1459    add_jump(compiler, &common->utf8readtype8, JUMP(SLJIT_FAST_CALL));    add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
1460    JUMPHERE(jump);    JUMPHERE(jump);
1461    #else
1462    #ifdef COMPILE_PCRE16
1463      OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1464      jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xff);
1465      OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1466      JUMPHERE(jump);
1467      /* Skip low surrogate if necessary. */
1468      OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);
1469      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);
1470      COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
1471      OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
1472      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
1473    #endif
1474    #endif /* COMPILE_PCRE8 */
1475    return;    return;
1476    }    }
1477  #endif  #endif
1478  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
1479  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1480  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);  #ifdef COMPILE_PCRE16
1481    /* The ctypes array contains only 255 values. */
1482    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1483    jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xff);
1484    #endif
1485    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1486    #ifdef COMPILE_PCRE16
1487    JUMPHERE(jump);
1488    #endif
1489  }  }
1490    
1491  static void skip_char_back(compiler_common *common)  static void skip_char_back(compiler_common *common)
1492  {  {
1493  /* Goes one character back. Only affects STR_PTR. Does not check begin. */  /* Goes one character back. Only affects STR_PTR. Does not check begin. */
1494  DEFINE_COMPILER;  DEFINE_COMPILER;
1495  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1496  struct sljit_label *label;  struct sljit_label *label;
1497    
1498  if (common->utf)  if (common->utf)
1499    {    {
1500    label = LABEL();    label = LABEL();
1501    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
1502    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1503    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
1504    CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);    CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
1505    return;    return;
1506    }    }
1507  #endif  #endif
1508    #if defined SUPPORT_UTF && defined COMPILE_PCRE16
1509    if (common->utf)
1510      {
1511      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
1512      OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1513      /* Skip low surrogate if necessary. */
1514      OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
1515      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
1516      COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
1517      OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
1518      OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1519      return;
1520      }
1521    #endif
1522  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1523  }  }
1524    
# Line 1477  else Line 1547  else
1547    }    }
1548  }  }
1549    
1550  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
1551  static void do_utf8readchar(compiler_common *common)  
1552    #ifdef COMPILE_PCRE8
1553    static void do_utfreadchar(compiler_common *common)
1554  {  {
1555  /* Fast decoding an utf8 character. TMP1 contains the first byte  /* Fast decoding a UTF-8 character. TMP1 contains the first byte
1556  of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */  of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
1557  DEFINE_COMPILER;  DEFINE_COMPILER;
1558  struct sljit_jump *jump;  struct sljit_jump *jump;
# Line 1489  sljit_emit_fast_enter(compiler, RETURN_A Line 1561  sljit_emit_fast_enter(compiler, RETURN_A
1561  /* Searching for the first zero. */  /* Searching for the first zero. */
1562  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
1563  jump = JUMP(SLJIT_C_NOT_ZERO);  jump = JUMP(SLJIT_C_NOT_ZERO);
1564  /* 2 byte sequence */  /* Two byte sequence. */
1565  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 1);  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
1566  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1567  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
1568  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
1569  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1570  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1571  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
1572  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1573  JUMPHERE(jump);  JUMPHERE(jump);
1574    
1575  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
1576  jump = JUMP(SLJIT_C_NOT_ZERO);  jump = JUMP(SLJIT_C_NOT_ZERO);
1577  /* 3 byte sequence */  /* Three byte sequence. */
1578  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 1);  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
1579  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
1580  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
1581  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1582  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
1583  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1584  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 2);  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
1585  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 2);  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
1586  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1587  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1588  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 2);  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
1589  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1590  JUMPHERE(jump);  JUMPHERE(jump);
1591    
1592  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x08);  /* Four byte sequence. */
1593  jump = JUMP(SLJIT_C_NOT_ZERO);  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
 /* 4 byte sequence */  
 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 1);  
1594  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
1595  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
1596  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1597  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
1598  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1599  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 2);  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
1600  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1601  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
1602  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1603  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 3);  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));
1604  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 3);  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
1605  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1606  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1607  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 3);  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  
 JUMPHERE(jump);  
   
 /* 5 byte sequence */  
 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 1);  
 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x03);  
 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 24);  
 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  
 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 18);  
 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  
 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 2);  
 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  
 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);  
 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  
 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 3);  
 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  
 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);  
 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  
 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 4);  
 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 4);  
 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  
 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  
 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 4);  
1608  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1609  }  }
1610    
1611  static void do_utf8readtype8(compiler_common *common)  static void do_utfreadtype8(compiler_common *common)
1612  {  {
1613  /* Fast decoding an utf8 character type. TMP2 contains the first byte  /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
1614  of the character (>= 0xc0) and TMP1 is destroyed. Return value in TMP1. */  of the character (>= 0xc0). Return value in TMP1. */
1615  DEFINE_COMPILER;  DEFINE_COMPILER;
1616  struct sljit_jump *jump;  struct sljit_jump *jump;
1617  struct sljit_jump *compare;  struct sljit_jump *compare;
# Line 1573  sljit_emit_fast_enter(compiler, RETURN_A Line 1620  sljit_emit_fast_enter(compiler, RETURN_A
1620    
1621  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
1622  jump = JUMP(SLJIT_C_NOT_ZERO);  jump = JUMP(SLJIT_C_NOT_ZERO);
1623  /* 2 byte sequence */  /* Two byte sequence. */
1624  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
1625  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1626  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
1627  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
1628  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
# Line 1596  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); Line 1643  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1643  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1644  }  }
1645    
1646  #endif  #else /* COMPILE_PCRE8 */
1647    
1648    #ifdef COMPILE_PCRE16
1649    static void do_utfreadchar(compiler_common *common)
1650    {
1651    /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
1652    of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */
1653    DEFINE_COMPILER;
1654    struct sljit_jump *jump;
1655    
1656    sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
1657    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
1658    /* Do nothing, only return. */
1659    sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1660    
1661    JUMPHERE(jump);
1662    /* Combine two 16 bit characters. */
1663    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 1);
1664    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1665    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
1666    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
1667    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);
1668    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1669    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
1670    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
1671    sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1672    }
1673    #endif /* COMPILE_PCRE16 */
1674    
1675    #endif /* COMPILE_PCRE8 */
1676    
1677    #endif /* SUPPORT_UTF */
1678    
1679  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1680    
# Line 1634  struct sljit_label *newlinelabel = NULL; Line 1712  struct sljit_label *newlinelabel = NULL;
1712  struct sljit_jump *start;  struct sljit_jump *start;
1713  struct sljit_jump *end = NULL;  struct sljit_jump *end = NULL;
1714  struct sljit_jump *nl = NULL;  struct sljit_jump *nl = NULL;
1715  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
1716  struct sljit_jump *singlebyte;  struct sljit_jump *singlechar;
1717  #endif  #endif
1718  jump_list *newline = NULL;  jump_list *newline = NULL;
1719  BOOL newlinecheck = FALSE;  BOOL newlinecheck = FALSE;
# Line 1708  if (newlinecheck) Line 1786  if (newlinecheck)
1786    CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);    CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
1787    
1788  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1789  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1790  if (common->utf)  if (common->utf)
1791    {    {
1792    singlebyte = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);    singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
1793    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
1794    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1795    JUMPHERE(singlebyte);    JUMPHERE(singlechar);
1796      }
1797    #endif
1798    #if defined SUPPORT_UTF && defined COMPILE_PCRE16
1799    if (common->utf)
1800      {
1801      singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
1802      OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
1803      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
1804      COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
1805      OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
1806      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1807      JUMPHERE(singlechar);
1808    }    }
1809  #endif  #endif
1810  JUMPHERE(start);  JUMPHERE(start);
# Line 1770  else Line 1860  else
1860    }    }
1861    
1862  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1863  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1864  if (common->utf)  if (common->utf)
1865    {    {
1866    CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);    CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
# Line 1778  if (common->utf) Line 1868  if (common->utf)
1868    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1869    }    }
1870  #endif  #endif
1871    #if defined SUPPORT_UTF && defined COMPILE_PCRE16
1872    if (common->utf)
1873      {
1874      CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
1875      OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
1876      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
1877      COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
1878      OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
1879      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1880      }
1881    #endif
1882  JUMPTO(SLJIT_JUMP, start);  JUMPTO(SLJIT_JUMP, start);
1883  JUMPHERE(found);  JUMPHERE(found);
1884  JUMPHERE(leave);  JUMPHERE(leave);
# Line 1900  if (common->utf) Line 2001  if (common->utf)
2001    OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);    OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
2002  #endif  #endif
2003  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2004  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2005  if (common->utf)  if (common->utf)
2006    {    {
2007    CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);    CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
# Line 1908  if (common->utf) Line 2009  if (common->utf)
2009    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2010    }    }
2011  #endif  #endif
2012    #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2013    if (common->utf)
2014      {
2015      CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
2016      OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2017      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2018      COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2019      OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2020      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2021      }
2022    #endif
2023  JUMPTO(SLJIT_JUMP, start);  JUMPTO(SLJIT_JUMP, start);
2024  JUMPHERE(found);  JUMPHERE(found);
2025  JUMPHERE(leave);  JUMPHERE(leave);
# Line 2335  if (context->sourcereg == -1) Line 2447  if (context->sourcereg == -1)
2447    context->sourcereg = TMP2;    context->sourcereg = TMP2;
2448    }    }
2449    
2450  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2451  utflength = 1;  utflength = 1;
2452  if (common->utf && *cc >= 0xc0)  if (common->utf && HAS_EXTRALEN(*cc))
2453    utflength += PRIV(utf8_table4)[*cc & 0x3f];    utflength += GET_EXTRALEN(*cc);
2454    
2455  do  do
2456    {    {
# Line 2523  while (*cc != XCL_END) Line 2635  while (*cc != XCL_END)
2635    if (*cc == XCL_SINGLE)    if (*cc == XCL_SINGLE)
2636      {      {
2637      cc += 2;      cc += 2;
2638  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2639      if (common->utf && cc[-1] >= 0xc0) cc += PRIV(utf8_table4)[cc[-1] & 0x3f];      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2640  #endif  #endif
2641  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2642      needschar = TRUE;      needschar = TRUE;
# Line 2533  while (*cc != XCL_END) Line 2645  while (*cc != XCL_END)
2645    else if (*cc == XCL_RANGE)    else if (*cc == XCL_RANGE)
2646      {      {
2647      cc += 2;      cc += 2;
2648  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2649      if (common->utf && cc[-1] >= 0xc0) cc += PRIV(utf8_table4)[cc[-1] & 0x3f];      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2650  #endif  #endif
2651      cc++;      cc++;
2652  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2653      if (common->utf && cc[-1] >= 0xc0) cc += PRIV(utf8_table4)[cc[-1] & 0x3f];      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2654  #endif  #endif
2655  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2656      needschar = TRUE;      needschar = TRUE;
# Line 2875  switch(type) Line 2987  switch(type)
2987    
2988    case OP_ALLANY:    case OP_ALLANY:
2989    check_input_end(common, fallbacks);    check_input_end(common, fallbacks);
2990  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2991    if (common->utf)    if (common->utf)
2992      {      {
2993      OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2994      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2995    #ifdef COMPILE_PCRE8
2996      jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);      jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2997      OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);      OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
2998      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2999    #else /* COMPILE_PCRE8 */
3000    #ifdef COMPILE_PCRE16
3001        jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3002        OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3003        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3004        COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
3005        OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3006        OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3007    #endif /* COMPILE_PCRE16 */
3008    #endif /* COMPILE_PCRE8 */
3009      JUMPHERE(jump[0]);      JUMPHERE(jump[0]);
3010      return cc;      return cc;
3011      }      }
3012  #endif  #endif
3013    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3014    return cc;    return cc;
3015    
3016    case OP_ANYBYTE:    case OP_ANYBYTE:
3017    check_input_end(common, fallbacks);    check_input_end(common, fallbacks);
3018    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3019    return cc;    return cc;
3020    
3021  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 3095  switch(type) Line 3218  switch(type)
3218    case OP_CHAR:    case OP_CHAR:
3219    case OP_CHARI:    case OP_CHARI:
3220    length = 1;    length = 1;
3221  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3222    if (common->utf && *cc >= 0xc0) length += PRIV(utf8_table4)[*cc & 0x3f];    if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
3223  #endif  #endif
3224    if (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0)    if (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0)
3225      {      {
# Line 3129  switch(type) Line 3252  switch(type)
3252    
3253    case OP_NOT:    case OP_NOT:
3254    case OP_NOTI:    case OP_NOTI:
3255  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3256    if (common->utf)    if (common->utf)
3257      {      {
3258      length = 1;      length = 1;
3259      if (*cc >= 0xc0) length += PRIV(utf8_table4)[*cc & 0x3f];      if (HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
3260    
3261      check_input_end(common, fallbacks);      check_input_end(common, fallbacks);
3262      GETCHAR(c, cc);      GETCHAR(c, cc);
# Line 3152  switch(type) Line 3275  switch(type)
3275        /* Skip the variable-length character. */        /* Skip the variable-length character. */
3276        OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);        OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
3277        jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);        jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3278    #ifdef COMPILE_PCRE8
3279        OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);        OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
3280    #endif
3281        OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);        OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3282        JUMPHERE(jump[0]);        JUMPHERE(jump[0]);
3283        return cc + length;        return cc + length;
# Line 3268  do Line 3393  do
3393    if (*cc == OP_CHAR)    if (*cc == OP_CHAR)
3394      {      {
3395      size = 1;      size = 1;
3396  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3397      if (common->utf && cc[1] >= 0xc0)      if (common->utf && HAS_EXTRALEN(cc[1]))
3398        size += PRIV(utf8_table4)[cc[1] & 0x3f];        size += GET_EXTRALEN(cc[1]);
3399  #endif  #endif
3400      }      }
3401    else if (*cc == OP_CHARI)    else if (*cc == OP_CHARI)
3402      {      {
3403      size = 1;      size = 1;
3404  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3405      if (common->utf)      if (common->utf)
3406        {        {
3407        if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)        if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
3408          size = 0;          size = 0;
3409        else if (cc[1] >= 0xc0)        else if (HAS_EXTRALEN(cc[1]))
3410          size += PRIV(utf8_table4)[cc[1] & 0x3f];          size += GET_EXTRALEN(cc[1]);
3411        }        }
3412      else      else
3413  #endif  #endif
# Line 4786  if (*type == 0) Line 4911  if (*type == 0)
4911  if (end != NULL)  if (end != NULL)
4912    {    {
4913    *end = cc + 1;    *end = cc + 1;
4914  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
4915    if (common->utf && *cc >= 0xc0) *end += PRIV(utf8_table4)[*cc & 0x3f];    if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
4916  #endif  #endif
4917    }    }
4918  return cc;  return cc;
# Line 6259  common->utf = (re->options & PCRE_UTF8) Line 6384  common->utf = (re->options & PCRE_UTF8)
6384  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
6385  common->useucp = (re->options & PCRE_UCP) != 0;  common->useucp = (re->options & PCRE_UCP) != 0;
6386  #endif  #endif
6387  common->utf8readchar = NULL;  common->utfreadchar = NULL;
6388  common->utf8readtype8 = NULL;  #ifdef COMPILE_PCRE8
6389    common->utfreadtype8 = NULL;
6390  #endif  #endif
6391    #endif /* SUPPORT_UTF8 */
6392  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
6393  common->getucd = NULL;  common->getucd = NULL;
6394  #endif  #endif
# Line 6487  if (common->caselesscmp != NULL) Line 6614  if (common->caselesscmp != NULL)
6614    set_jumps(common->caselesscmp, LABEL());    set_jumps(common->caselesscmp, LABEL());
6615    do_caselesscmp(common);    do_caselesscmp(common);
6616    }    }
6617  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
6618  if (common->utf8readchar != NULL)  if (common->utfreadchar != NULL)
6619    {    {
6620    set_jumps(common->utf8readchar, LABEL());    set_jumps(common->utfreadchar, LABEL());
6621    do_utf8readchar(common);    do_utfreadchar(common);
6622    }    }
6623  if (common->utf8readtype8 != NULL)  #ifdef COMPILE_PCRE8
6624    if (common->utfreadtype8 != NULL)
6625    {    {
6626    set_jumps(common->utf8readtype8, LABEL());    set_jumps(common->utfreadtype8, LABEL());
6627    do_utf8readtype8(common);    do_utfreadtype8(common);
6628    }    }
6629  #endif  #endif
6630    #endif /* COMPILE_PCRE8 */
6631  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
6632  if (common->getucd != NULL)  if (common->getucd != NULL)
6633    {    {

Legend:
Removed from v.781  
changed lines
  Added in v.782

  ViewVC Help
Powered by ViewVC 1.1.5