/[pcre]/code/trunk/pcre_study.c
ViewVC logotype

Diff of /code/trunk/pcre_study.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 914 by zherczeg, Mon Feb 13 06:04:50 2012 UTC revision 1055 by chpe, Tue Oct 16 15:53:30 2012 UTC
# Line 224  for (;;) Line 224  for (;;)
224      case OP_NOTPOSPLUSI:      case OP_NOTPOSPLUSI:
225      branchlength++;      branchlength++;
226      cc += 2;      cc += 2;
227  #ifdef SUPPORT_UTF  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
228      if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);      if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
229  #endif  #endif
230      break;      break;
# Line 245  for (;;) Line 245  for (;;)
245      case OP_NOTEXACTI:      case OP_NOTEXACTI:
246      branchlength += GET2(cc,1);      branchlength += GET2(cc,1);
247      cc += 2 + IMM2_SIZE;      cc += 2 + IMM2_SIZE;
248  #ifdef SUPPORT_UTF  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
249      if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);      if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
250  #endif  #endif
251      break;      break;
# Line 486  for (;;) Line 486  for (;;)
486      case OP_NOTPOSQUERYI:      case OP_NOTPOSQUERYI:
487    
488      cc += PRIV(OP_lengths)[op];      cc += PRIV(OP_lengths)[op];
489  #ifdef SUPPORT_UTF  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
490      if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);      if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
491  #endif  #endif
492      break;      break;
# Line 538  Arguments: Line 538  Arguments:
538    p             points to the character    p             points to the character
539    caseless      the caseless flag    caseless      the caseless flag
540    cd            the block with char table pointers    cd            the block with char table pointers
541    utf           TRUE for UTF-8 / UTF-16 mode    utf           TRUE for UTF-8 / UTF-16 / UTF-32 mode
542    
543  Returns:        pointer after the character  Returns:        pointer after the character
544  */  */
# Line 564  if (utf && c > 127) Line 564  if (utf && c > 127)
564      (void)PRIV(ord2utf)(c, buff);      (void)PRIV(ord2utf)(c, buff);
565      SET_BIT(buff[0]);      SET_BIT(buff[0]);
566      }      }
567  #endif  #endif  /* Not SUPPORT_UCP */
568    return p;    return p;
569    }    }
570  #endif  #else   /* Not SUPPORT_UTF */
571    (void)(utf);   /* Stops warning for unused parameter */
572    #endif  /* SUPPORT_UTF */
573    
574  /* Not UTF-8 mode, or character is less than 127. */  /* Not UTF-8 mode, or character is less than 127. */
575    
576  if (caseless && (cd->ctypes[c] & ctype_letter) != 0) SET_BIT(cd->fcc[c]);  if (caseless && (cd->ctypes[c] & ctype_letter) != 0) SET_BIT(cd->fcc[c]);
577  return p + 1;  return p + 1;
578  #endif  #endif  /* COMPILE_PCRE8 */
579    
580  #ifdef COMPILE_PCRE16  #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
581  if (c > 0xff)  if (c > 0xff)
582    {    {
583    c = 0xff;    c = 0xff;
# Line 595  if (utf && c > 127) Line 597  if (utf && c > 127)
597        c = 0xff;        c = 0xff;
598      SET_BIT(c);      SET_BIT(c);
599      }      }
600  #endif  #endif  /* SUPPORT_UCP */
601    return p;    return p;
602    }    }
603  #endif  #else   /* Not SUPPORT_UTF */
604    (void)(utf);   /* Stops warning for unused parameter */
605    #endif  /* SUPPORT_UTF */
606    
607  if (caseless && (cd->ctypes[c] & ctype_letter) != 0) SET_BIT(cd->fcc[c]);  if (caseless && (cd->ctypes[c] & ctype_letter) != 0) SET_BIT(cd->fcc[c]);
608  return p + 1;  return p + 1;
# Line 697  function fails unless the result is SSB_ Line 701  function fails unless the result is SSB_
701  Arguments:  Arguments:
702    code         points to an expression    code         points to an expression
703    start_bits   points to a 32-byte table, initialized to 0    start_bits   points to a 32-byte table, initialized to 0
704    utf          TRUE if in UTF-8 / UTF-16 mode    utf          TRUE if in UTF-8 / UTF-16 / UTF-32 mode
705    cd           the block with char table pointers    cd           the block with char table pointers
706    
707  Returns:       SSB_FAIL     => Failed to find any starting bytes  Returns:       SSB_FAIL     => Failed to find any starting bytes
# Line 986  do Line 990  do
990        identical. */        identical. */
991    
992        case OP_HSPACE:        case OP_HSPACE:
993        SET_BIT(0x09);        SET_BIT(CHAR_HT);
994        SET_BIT(0x20);        SET_BIT(CHAR_SPACE);
995  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
996        if (utf)        if (utf)
997          {          {
# Line 996  do Line 1000  do
1000          SET_BIT(0xE1);  /* For U+1680, U+180E */          SET_BIT(0xE1);  /* For U+1680, U+180E */
1001          SET_BIT(0xE2);  /* For U+2000 - U+200A, U+202F, U+205F */          SET_BIT(0xE2);  /* For U+2000 - U+200A, U+202F, U+205F */
1002          SET_BIT(0xE3);  /* For U+3000 */          SET_BIT(0xE3);  /* For U+3000 */
1003  #endif  #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
 #ifdef COMPILE_PCRE16  
1004          SET_BIT(0xA0);          SET_BIT(0xA0);
1005          SET_BIT(0xFF);  /* For characters > 255 */          SET_BIT(0xFF);  /* For characters > 255 */
1006  #endif  #endif  /* COMPILE_PCRE[8|16|32] */
1007          }          }
1008        else        else
1009  #endif /* SUPPORT_UTF */  #endif /* SUPPORT_UTF */
1010          {          {
1011    #ifndef EBCDIC
1012          SET_BIT(0xA0);          SET_BIT(0xA0);
1013  #ifdef COMPILE_PCRE16  #endif  /* Not EBCDIC */
1014    #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1015          SET_BIT(0xFF);  /* For characters > 255 */          SET_BIT(0xFF);  /* For characters > 255 */
1016  #endif  #endif  /* COMPILE_PCRE[16|32] */
1017          }          }
1018        try_next = FALSE;        try_next = FALSE;
1019        break;        break;
1020    
1021        case OP_ANYNL:        case OP_ANYNL:
1022        case OP_VSPACE:        case OP_VSPACE:
1023        SET_BIT(0x0A);        SET_BIT(CHAR_LF);
1024        SET_BIT(0x0B);        SET_BIT(CHAR_VT);
1025        SET_BIT(0x0C);        SET_BIT(CHAR_FF);
1026        SET_BIT(0x0D);        SET_BIT(CHAR_CR);
1027  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
1028        if (utf)        if (utf)
1029          {          {
1030  #ifdef COMPILE_PCRE8  #ifdef COMPILE_PCRE8
1031          SET_BIT(0xC2);  /* For U+0085 */          SET_BIT(0xC2);  /* For U+0085 */
1032          SET_BIT(0xE2);  /* For U+2028, U+2029 */          SET_BIT(0xE2);  /* For U+2028, U+2029 */
1033  #endif  #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1034  #ifdef COMPILE_PCRE16          SET_BIT(CHAR_NEL);
         SET_BIT(0x85);  
1035          SET_BIT(0xFF);  /* For characters > 255 */          SET_BIT(0xFF);  /* For characters > 255 */
1036  #endif  #endif  /* COMPILE_PCRE[8|16|32] */
1037          }          }
1038        else        else
1039  #endif /* SUPPORT_UTF */  #endif /* SUPPORT_UTF */
1040          {          {
1041          SET_BIT(0x85);          SET_BIT(CHAR_NEL);
1042  #ifdef COMPILE_PCRE16  #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1043          SET_BIT(0xFF);  /* For characters > 255 */          SET_BIT(0xFF);  /* For characters > 255 */
1044  #endif  #endif
1045          }          }
# Line 1058  do Line 1062  do
1062        break;        break;
1063    
1064        /* The cbit_space table has vertical tab as whitespace; we have to        /* The cbit_space table has vertical tab as whitespace; we have to
1065        ensure it is set as not whitespace. */        ensure it is set as not whitespace. Luckily, the code value is the same
1066          (0x0b) in ASCII and EBCDIC, so we can just adjust the appropriate bit. */
1067    
1068        case OP_NOT_WHITESPACE:        case OP_NOT_WHITESPACE:
1069        set_nottype_bits(start_bits, cbit_space, table_limit, cd);        set_nottype_bits(start_bits, cbit_space, table_limit, cd);
# Line 1066  do Line 1071  do
1071        try_next = FALSE;        try_next = FALSE;
1072        break;        break;
1073    
1074        /* The cbit_space table has vertical tab as whitespace; we have to        /* The cbit_space table has vertical tab as whitespace; we have to not
1075        not set it from the table. */        set it from the table. Luckily, the code value is the same (0x0b) in
1076          ASCII and EBCDIC, so we can just adjust the appropriate bit. */
1077    
1078        case OP_WHITESPACE:        case OP_WHITESPACE:
1079        c = start_bits[1];    /* Save in case it was already set */        c = start_bits[1];    /* Save in case it was already set */
# Line 1121  do Line 1127  do
1127          return SSB_FAIL;          return SSB_FAIL;
1128    
1129          case OP_HSPACE:          case OP_HSPACE:
1130          SET_BIT(0x09);          SET_BIT(CHAR_HT);
1131          SET_BIT(0x20);          SET_BIT(CHAR_SPACE);
1132  #ifdef COMPILE_PCRE8  #ifdef SUPPORT_UTF
1133          if (utf)          if (utf)
1134            {            {
1135  #ifdef COMPILE_PCRE8  #ifdef COMPILE_PCRE8
# Line 1131  do Line 1137  do
1137            SET_BIT(0xE1);  /* For U+1680, U+180E */            SET_BIT(0xE1);  /* For U+1680, U+180E */
1138            SET_BIT(0xE2);  /* For U+2000 - U+200A, U+202F, U+205F */            SET_BIT(0xE2);  /* For U+2000 - U+200A, U+202F, U+205F */
1139            SET_BIT(0xE3);  /* For U+3000 */            SET_BIT(0xE3);  /* For U+3000 */
1140  #endif  #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
 #ifdef COMPILE_PCRE16  
1141            SET_BIT(0xA0);            SET_BIT(0xA0);
1142            SET_BIT(0xFF);  /* For characters > 255 */            SET_BIT(0xFF);  /* For characters > 255 */
1143  #endif  #endif  /* COMPILE_PCRE[8|16|32] */
1144            }            }
1145          else          else
1146  #endif /* SUPPORT_UTF */  #endif /* SUPPORT_UTF */
1147    #ifndef EBCDIC
1148            SET_BIT(0xA0);            SET_BIT(0xA0);
1149    #endif  /* Not EBCDIC */
1150          break;          break;
1151    
1152          case OP_ANYNL:          case OP_ANYNL:
1153          case OP_VSPACE:          case OP_VSPACE:
1154          SET_BIT(0x0A);          SET_BIT(CHAR_LF);
1155          SET_BIT(0x0B);          SET_BIT(CHAR_VT);
1156          SET_BIT(0x0C);          SET_BIT(CHAR_FF);
1157          SET_BIT(0x0D);          SET_BIT(CHAR_CR);
1158  #ifdef COMPILE_PCRE8  #ifdef SUPPORT_UTF
1159          if (utf)          if (utf)
1160            {            {
1161  #ifdef COMPILE_PCRE8  #ifdef COMPILE_PCRE8
1162            SET_BIT(0xC2);  /* For U+0085 */            SET_BIT(0xC2);  /* For U+0085 */
1163            SET_BIT(0xE2);  /* For U+2028, U+2029 */            SET_BIT(0xE2);  /* For U+2028, U+2029 */
1164  #endif  #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1165  #ifdef COMPILE_PCRE16            SET_BIT(CHAR_NEL);
           SET_BIT(0x85);  
1166            SET_BIT(0xFF);  /* For characters > 255 */            SET_BIT(0xFF);  /* For characters > 255 */
1167  #endif  #endif  /* COMPILE_PCRE16 */
1168            }            }
1169          else          else
1170  #endif /* SUPPORT_UTF */  #endif /* SUPPORT_UTF */
1171            SET_BIT(0x85);            SET_BIT(CHAR_NEL);
1172          break;          break;
1173    
1174          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
# Line 1174  do Line 1180  do
1180          break;          break;
1181    
1182          /* The cbit_space table has vertical tab as whitespace; we have to          /* The cbit_space table has vertical tab as whitespace; we have to
1183          ensure it gets set as not whitespace. */          ensure it gets set as not whitespace. Luckily, the code value is the
1184            same (0x0b) in ASCII and EBCDIC, so we can just adjust the appropriate
1185            bit. */
1186    
1187          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
1188          set_nottype_bits(start_bits, cbit_space, table_limit, cd);          set_nottype_bits(start_bits, cbit_space, table_limit, cd);
# Line 1182  do Line 1190  do
1190          break;          break;
1191    
1192          /* The cbit_space table has vertical tab as whitespace; we have to          /* The cbit_space table has vertical tab as whitespace; we have to
1193          avoid setting it. */          avoid setting it. Luckily, the code value is the same (0x0b) in ASCII
1194            and EBCDIC, so we can just adjust the appropriate bit. */
1195    
1196          case OP_WHITESPACE:          case OP_WHITESPACE:
1197          c = start_bits[1];    /* Save in case it was already set */          c = start_bits[1];    /* Save in case it was already set */
# Line 1216  do Line 1225  do
1225          memset(start_bits+25, 0xff, 7);      /* Bits for 0xc9 - 0xff */          memset(start_bits+25, 0xff, 7);      /* Bits for 0xc9 - 0xff */
1226          }          }
1227  #endif  #endif
1228  #ifdef COMPILE_PCRE16  #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1229        SET_BIT(0xFF);                         /* For characters > 255 */        SET_BIT(0xFF);                         /* For characters > 255 */
1230  #endif  #endif
1231        /* Fall through */        /* Fall through */
# Line 1312  Returns:    pointer to a pcre[16]_extra Line 1321  Returns:    pointer to a pcre[16]_extra
1321              NULL on error or if no optimization possible              NULL on error or if no optimization possible
1322  */  */
1323    
1324  #ifdef COMPILE_PCRE8  #if defined COMPILE_PCRE8
1325  PCRE_EXP_DEFN pcre_extra * PCRE_CALL_CONVENTION  PCRE_EXP_DEFN pcre_extra * PCRE_CALL_CONVENTION
1326  pcre_study(const pcre *external_re, int options, const char **errorptr)  pcre_study(const pcre *external_re, int options, const char **errorptr)
1327  #else  #elif defined COMPILE_PCRE16
1328  PCRE_EXP_DEFN pcre16_extra * PCRE_CALL_CONVENTION  PCRE_EXP_DEFN pcre16_extra * PCRE_CALL_CONVENTION
1329  pcre16_study(const pcre16 *external_re, int options, const char **errorptr)  pcre16_study(const pcre16 *external_re, int options, const char **errorptr)
1330    #elif defined COMPILE_PCRE32
1331    PCRE_EXP_DEFN pcre32_extra * PCRE_CALL_CONVENTION
1332    pcre32_study(const pcre32 *external_re, int options, const char **errorptr)
1333  #endif  #endif
1334  {  {
1335  int min;  int min;
# Line 1340  if (re == NULL || re->magic_number != MA Line 1352  if (re == NULL || re->magic_number != MA
1352    
1353  if ((re->flags & PCRE_MODE) == 0)  if ((re->flags & PCRE_MODE) == 0)
1354    {    {
1355  #ifdef COMPILE_PCRE8  #if defined COMPILE_PCRE8
1356    *errorptr = "argument is compiled in 16 bit mode";    *errorptr = "argument not compiled in 8 bit mode";
1357  #else  #elif defined COMPILE_PCRE16
1358    *errorptr = "argument is compiled in 8 bit mode";    *errorptr = "argument not compiled in 16 bit mode";
1359    #elif defined COMPILE_PCRE32
1360      *errorptr = "argument not compiled in 32 bit mode";
1361  #endif  #endif
1362    return NULL;    return NULL;
1363    }    }
# Line 1370  if ((re->options & PCRE_ANCHORED) == 0 & Line 1384  if ((re->options & PCRE_ANCHORED) == 0 &
1384    
1385    tables = re->tables;    tables = re->tables;
1386    
1387  #ifdef COMPILE_PCRE8  #if defined COMPILE_PCRE8
1388    if (tables == NULL)    if (tables == NULL)
1389      (void)pcre_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES,      (void)pcre_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES,
1390      (void *)(&tables));      (void *)(&tables));
1391  #else  #elif defined COMPILE_PCRE16
1392    if (tables == NULL)    if (tables == NULL)
1393      (void)pcre16_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES,      (void)pcre16_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES,
1394      (void *)(&tables));      (void *)(&tables));
1395    #elif defined COMPILE_PCRE32
1396      if (tables == NULL)
1397        (void)pcre32_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES,
1398        (void *)(&tables));
1399  #endif  #endif
1400    
1401    compile_block.lcc = tables + lcc_offset;    compile_block.lcc = tables + lcc_offset;
# Line 1408  switch(min = find_minlength(code, code, Line 1426  switch(min = find_minlength(code, code,
1426    }    }
1427    
1428  /* If a set of starting bytes has been identified, or if the minimum length is  /* If a set of starting bytes has been identified, or if the minimum length is
1429  greater than zero, or if JIT optimization has been requested, get a  greater than zero, or if JIT optimization has been requested, or if
1430  pcre[16]_extra block and a pcre_study_data block. The study data is put in the  PCRE_STUDY_EXTRA_NEEDED is set, get a pcre[16]_extra block and a
1431  latter, which is pointed to by the former, which may also get additional data  pcre_study_data block. The study data is put in the latter, which is pointed to
1432  set later by the calling program. At the moment, the size of pcre_study_data  by the former, which may also get additional data set later by the calling
1433  is fixed. We nevertheless save it in a field for returning via the  program. At the moment, the size of pcre_study_data is fixed. We nevertheless
1434  pcre_fullinfo() function so that if it becomes variable in the future,  save it in a field for returning via the pcre_fullinfo() function so that if it
1435  we don't have to change that code. */  becomes variable in the future, we don't have to change that code. */
1436    
1437  if (bits_set || min > 0  if (bits_set || min > 0 || (options & (
1438  #ifdef SUPPORT_JIT  #ifdef SUPPORT_JIT
1439      || (options & (PCRE_STUDY_JIT_COMPILE | PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE      PCRE_STUDY_JIT_COMPILE | PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE |
1440                   | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)) != 0      PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE |
1441  #endif  #endif
1442    )      PCRE_STUDY_EXTRA_NEEDED)) != 0)
1443    {    {
1444    extra = (PUBL(extra) *)(PUBL(malloc))    extra = (PUBL(extra) *)(PUBL(malloc))
1445      (sizeof(PUBL(extra)) + sizeof(pcre_study_data));      (sizeof(PUBL(extra)) + sizeof(pcre_study_data));
# Line 1475  if (bits_set || min > 0 Line 1493  if (bits_set || min > 0
1493    
1494    /* If JIT support was compiled and requested, attempt the JIT compilation.    /* If JIT support was compiled and requested, attempt the JIT compilation.
1495    If no starting bytes were found, and the minimum length is zero, and JIT    If no starting bytes were found, and the minimum length is zero, and JIT
1496    compilation fails, abandon the extra block and return NULL. */    compilation fails, abandon the extra block and return NULL, unless
1497      PCRE_STUDY_EXTRA_NEEDED is set. */
1498    
1499  #ifdef SUPPORT_JIT  #ifdef SUPPORT_JIT
1500    extra->executable_jit = NULL;    extra->executable_jit = NULL;
# Line 1486  if (bits_set || min > 0 Line 1505  if (bits_set || min > 0
1505    if ((options & PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE) != 0)    if ((options & PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE) != 0)
1506      PRIV(jit_compile)(re, extra, JIT_PARTIAL_HARD_COMPILE);      PRIV(jit_compile)(re, extra, JIT_PARTIAL_HARD_COMPILE);
1507    
1508    if (study->flags == 0 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) == 0)    if (study->flags == 0 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) == 0 &&
1509          (options & PCRE_STUDY_EXTRA_NEEDED) == 0)
1510      {      {
1511  #ifdef COMPILE_PCRE8  #if defined COMPILE_PCRE8
1512      pcre_free_study(extra);      pcre_free_study(extra);
1513  #endif  #elif defined COMPILE_PCRE16
 #ifdef COMPILE_PCRE16  
1514      pcre16_free_study(extra);      pcre16_free_study(extra);
1515    #elif defined COMPILE_PCRE32
1516        pcre32_free_study(extra);
1517  #endif  #endif
1518      extra = NULL;      extra = NULL;
1519      }      }
# Line 1513  Argument:   a pointer to the pcre[16]_ex Line 1534  Argument:   a pointer to the pcre[16]_ex
1534  Returns:    nothing  Returns:    nothing
1535  */  */
1536    
1537  #ifdef COMPILE_PCRE8  #if defined COMPILE_PCRE8
1538  PCRE_EXP_DEFN void  PCRE_EXP_DEFN void
1539  pcre_free_study(pcre_extra *extra)  pcre_free_study(pcre_extra *extra)
1540  #else  #elif defined COMPILE_PCRE16
1541  PCRE_EXP_DEFN void  PCRE_EXP_DEFN void
1542  pcre16_free_study(pcre16_extra *extra)  pcre16_free_study(pcre16_extra *extra)
1543    #elif defined COMPILE_PCRE32
1544    PCRE_EXP_DEFN void
1545    pcre32_free_study(pcre32_extra *extra)
1546  #endif  #endif
1547  {  {
1548  if (extra == NULL)  if (extra == NULL)

Legend:
Removed from v.914  
changed lines
  Added in v.1055

  ViewVC Help
Powered by ViewVC 1.1.5