/[pcre]/code/trunk/pcre_study.c
ViewVC logotype

Diff of /code/trunk/pcre_study.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1146 by zherczeg, Sat Oct 20 16:45:33 2012 UTC revision 1364 by ph10, Sat Oct 5 15:45:11 2013 UTC
# Line 66  string of that length that matches. In U Line 66  string of that length that matches. In U
66  rather than bytes.  rather than bytes.
67    
68  Arguments:  Arguments:
69      re              compiled pattern block
70    code            pointer to start of group (the bracket)    code            pointer to start of group (the bracket)
71    startcode       pointer to start of the whole pattern    startcode       pointer to start of the whole pattern's code
72    options         the compiling options    options         the compiling options
73    int             RECURSE depth    int             RECURSE depth
74    
# Line 78  Returns:   the minimum length Line 79  Returns:   the minimum length
79  */  */
80    
81  static int  static int
82  find_minlength(const pcre_uchar *code, const pcre_uchar *startcode, int options,  find_minlength(const REAL_PCRE *re, const pcre_uchar *code,
83    int recurse_depth)    const pcre_uchar *startcode, int options, int recurse_depth)
84  {  {
85  int length = -1;  int length = -1;
86  /* PCRE_UTF16 has the same value as PCRE_UTF8. */  /* PCRE_UTF16 has the same value as PCRE_UTF8. */
# Line 129  for (;;) Line 130  for (;;)
130      case OP_SBRAPOS:      case OP_SBRAPOS:
131      case OP_ONCE:      case OP_ONCE:
132      case OP_ONCE_NC:      case OP_ONCE_NC:
133      d = find_minlength(cc, startcode, options, recurse_depth);      d = find_minlength(re, cc, startcode, options, recurse_depth);
134      if (d < 0) return d;      if (d < 0) return d;
135      branchlength += d;      branchlength += d;
136      do cc += GET(cc, 1); while (*cc == OP_ALT);      do cc += GET(cc, 1); while (*cc == OP_ALT);
# Line 323  for (;;) Line 324  for (;;)
324    
325      /* Check a class for variable quantification */      /* Check a class for variable quantification */
326    
 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8  
     case OP_XCLASS:  
     cc += GET(cc, 1);  
     cc -= PRIV(OP_lengths)[OP_CLASS];  
     /* Fall through */  
 #endif  
   
327      case OP_CLASS:      case OP_CLASS:
328      case OP_NCLASS:      case OP_NCLASS:
329    #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
330        case OP_XCLASS:
331        /* The original code caused an unsigned overflow in 64 bit systems,
332        so now we use a conditional statement. */
333        if (op == OP_XCLASS)
334          cc += GET(cc, 1);
335        else
336          cc += PRIV(OP_lengths)[OP_CLASS];
337    #else
338      cc += PRIV(OP_lengths)[OP_CLASS];      cc += PRIV(OP_lengths)[OP_CLASS];
339    #endif
340    
341      switch (*cc)      switch (*cc)
342        {        {
# Line 371  for (;;) Line 375  for (;;)
375      If PCRE_JAVASCRIPT_COMPAT is set, a backreference to an unset bracket      If PCRE_JAVASCRIPT_COMPAT is set, a backreference to an unset bracket
376      matches an empty string (by default it causes a matching failure), so in      matches an empty string (by default it causes a matching failure), so in
377      that case we must set the minimum length to zero. */      that case we must set the minimum length to zero. */
378    
379        case OP_DNREF:     /* Duplicate named pattern back reference */
380        case OP_DNREFI:
381        if ((options & PCRE_JAVASCRIPT_COMPAT) == 0)
382          {
383          int count = GET2(cc, 1+IMM2_SIZE);
384          pcre_uchar *slot = (pcre_uchar *)re +
385            re->name_table_offset + GET2(cc, 1) * re->name_entry_size;
386          d = INT_MAX;
387          while (count-- > 0)
388            {
389            ce = cs = (pcre_uchar *)PRIV(find_bracket)(startcode, utf, GET2(slot, 0));
390            if (cs == NULL) return -2;
391            do ce += GET(ce, 1); while (*ce == OP_ALT);
392            if (cc > cs && cc < ce)
393              {
394              d = 0;
395              had_recurse = TRUE;
396              break;
397              }
398            else
399              {
400              int dd = find_minlength(re, cs, startcode, options, recurse_depth);
401              if (dd < d) d = dd;
402              }
403            slot += re->name_entry_size;
404            }
405          }
406        else d = 0;
407        cc += 1 + 2*IMM2_SIZE;
408        goto REPEAT_BACK_REFERENCE;
409    
410      case OP_REF:      case OP_REF:      /* Single back reference */
411      case OP_REFI:      case OP_REFI:
412      if ((options & PCRE_JAVASCRIPT_COMPAT) == 0)      if ((options & PCRE_JAVASCRIPT_COMPAT) == 0)
413        {        {
# Line 386  for (;;) Line 421  for (;;)
421          }          }
422        else        else
423          {          {
424          d = find_minlength(cs, startcode, options, recurse_depth);          d = find_minlength(re, cs, startcode, options, recurse_depth);
425          }          }
426        }        }
427      else d = 0;      else d = 0;
# Line 394  for (;;) Line 429  for (;;)
429    
430      /* Handle repeated back references */      /* Handle repeated back references */
431    
432        REPEAT_BACK_REFERENCE:
433      switch (*cc)      switch (*cc)
434        {        {
435        case OP_CRSTAR:        case OP_CRSTAR:
# Line 434  for (;;) Line 470  for (;;)
470        had_recurse = TRUE;        had_recurse = TRUE;
471      else      else
472        {        {
473        branchlength += find_minlength(cs, startcode, options, recurse_depth + 1);        branchlength += find_minlength(re, cs, startcode, options,
474            recurse_depth + 1);
475        }        }
476      cc += 1 + LINK_SIZE;      cc += 1 + LINK_SIZE;
477      break;      break;
# Line 569  if (utf && c > 127) Line 606  if (utf && c > 127)
606    return p;    return p;
607    }    }
608  #else   /* Not SUPPORT_UTF */  #else   /* Not SUPPORT_UTF */
609  (void)(utf);   /* Stops warning for unused parameter */  (void)(utf);   /* Stops warning for unused parameter */
610  #endif  /* SUPPORT_UTF */  #endif  /* SUPPORT_UTF */
611    
612  /* Not UTF-8 mode, or character is less than 127. */  /* Not UTF-8 mode, or character is less than 127. */
# Line 602  if (utf && c > 127) Line 639  if (utf && c > 127)
639    return p;    return p;
640    }    }
641  #else   /* Not SUPPORT_UTF */  #else   /* Not SUPPORT_UTF */
642  (void)(utf);   /* Stops warning for unused parameter */  (void)(utf);   /* Stops warning for unused parameter */
643  #endif  /* SUPPORT_UTF */  #endif  /* SUPPORT_UTF */
644    
645  if (caseless && (cd->ctypes[c] & ctype_letter) != 0) SET_BIT(cd->fcc[c]);  if (caseless && (cd->ctypes[c] & ctype_letter) != 0) SET_BIT(cd->fcc[c]);
# Line 822  do Line 859  do
859        case OP_RECURSE:        case OP_RECURSE:
860        case OP_REF:        case OP_REF:
861        case OP_REFI:        case OP_REFI:
862          case OP_DNREF:
863          case OP_DNREFI:
864        case OP_REVERSE:        case OP_REVERSE:
865        case OP_RREF:        case OP_RREF:
866        case OP_SCOND:        case OP_SCOND:
# Line 1009  do Line 1048  do
1048        else        else
1049  #endif /* SUPPORT_UTF */  #endif /* SUPPORT_UTF */
1050          {          {
1051  #ifndef EBCDIC  #ifndef EBCDIC
1052          SET_BIT(0xA0);          SET_BIT(0xA0);
1053  #endif  /* Not EBCDIC */  #endif  /* Not EBCDIC */
1054  #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32  #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1055          SET_BIT(0xFF);  /* For characters > 255 */          SET_BIT(0xFF);  /* For characters > 255 */
1056  #endif  /* COMPILE_PCRE[16|32] */  #endif  /* COMPILE_PCRE[16|32] */
# Line 1063  do Line 1102  do
1102        break;        break;
1103    
1104        /* The cbit_space table has vertical tab as whitespace; we have to        /* The cbit_space table has vertical tab as whitespace; we have to
1105        ensure it is set as not whitespace. Luckily, the code value is the same        ensure it is set as not whitespace. Luckily, the code value is the same
1106        (0x0b) in ASCII and EBCDIC, so we can just adjust the appropriate bit. */        (0x0b) in ASCII and EBCDIC, so we can just adjust the appropriate bit. */
1107    
1108        case OP_NOT_WHITESPACE:        case OP_NOT_WHITESPACE:
# Line 1147  do Line 1186  do
1186  #endif /* SUPPORT_UTF */  #endif /* SUPPORT_UTF */
1187  #ifndef EBCDIC  #ifndef EBCDIC
1188            SET_BIT(0xA0);            SET_BIT(0xA0);
1189  #endif  /* Not EBCDIC */  #endif  /* Not EBCDIC */
1190          break;          break;
1191    
1192          case OP_ANYNL:          case OP_ANYNL:
# Line 1180  do Line 1219  do
1219          set_type_bits(start_bits, cbit_digit, table_limit, cd);          set_type_bits(start_bits, cbit_digit, table_limit, cd);
1220          break;          break;
1221    
1222          /* The cbit_space table has vertical tab as whitespace; we have to          /* The cbit_space table has vertical tab as whitespace; we no longer
1223          ensure it gets set as not whitespace. Luckily, the code value is the          have to play fancy tricks because Perl added VT to its whitespace at
1224          same (0x0b) in ASCII and EBCDIC, so we can just adjust the appropriate          release 5.18. PCRE added it at release 8.34. */
         bit. */  
1225    
1226          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
1227          set_nottype_bits(start_bits, cbit_space, table_limit, cd);          set_nottype_bits(start_bits, cbit_space, table_limit, cd);
         start_bits[1] |= 0x08;  
1228          break;          break;
1229    
         /* The cbit_space table has vertical tab as whitespace; we have to  
         avoid setting it. Luckily, the code value is the same (0x0b) in ASCII  
         and EBCDIC, so we can just adjust the appropriate bit. */  
   
1230          case OP_WHITESPACE:          case OP_WHITESPACE:
         c = start_bits[1];    /* Save in case it was already set */  
1231          set_type_bits(start_bits, cbit_space, table_limit, cd);          set_type_bits(start_bits, cbit_space, table_limit, cd);
         start_bits[1] = (start_bits[1] & ~0x08) | c;  
1232          break;          break;
1233    
1234          case OP_NOT_WORDCHAR:          case OP_NOT_WORDCHAR:
# Line 1343  pcre_uchar *code; Line 1374  pcre_uchar *code;
1374  compile_data compile_block;  compile_data compile_block;
1375  const REAL_PCRE *re = (const REAL_PCRE *)external_re;  const REAL_PCRE *re = (const REAL_PCRE *)external_re;
1376    
1377    
1378  *errorptr = NULL;  *errorptr = NULL;
1379    
1380  if (re == NULL || re->magic_number != MAGIC_NUMBER)  if (re == NULL || re->magic_number != MAGIC_NUMBER)
# Line 1419  if ((re->options & PCRE_ANCHORED) == 0 & Line 1451  if ((re->options & PCRE_ANCHORED) == 0 &
1451    
1452  /* Find the minimum length of subject string. */  /* Find the minimum length of subject string. */
1453    
1454  switch(min = find_minlength(code, code, re->options, 0))  switch(min = find_minlength(re, code, code, re->options, 0))
1455    {    {
1456    case -2: *errorptr = "internal error: missing capturing bracket"; return NULL;    case -2: *errorptr = "internal error: missing capturing bracket"; return NULL;
1457    case -3: *errorptr = "internal error: opcode not recognized"; return NULL;    case -3: *errorptr = "internal error: opcode not recognized"; return NULL;
# Line 1427  switch(min = find_minlength(code, code, Line 1459  switch(min = find_minlength(code, code,
1459    }    }
1460    
1461  /* If a set of starting bytes has been identified, or if the minimum length is  /* If a set of starting bytes has been identified, or if the minimum length is
1462  greater than zero, or if JIT optimization has been requested, or if  greater than zero, or if JIT optimization has been requested, or if
1463  PCRE_STUDY_EXTRA_NEEDED is set, get a pcre[16]_extra block and a  PCRE_STUDY_EXTRA_NEEDED is set, get a pcre[16]_extra block and a
1464  pcre_study_data block. The study data is put in the latter, which is pointed to  pcre_study_data block. The study data is put in the latter, which is pointed to
1465  by the former, which may also get additional data set later by the calling  by the former, which may also get additional data set later by the calling
# Line 1438  becomes variable in the future, we don't Line 1470  becomes variable in the future, we don't
1470  if (bits_set || min > 0 || (options & (  if (bits_set || min > 0 || (options & (
1471  #ifdef SUPPORT_JIT  #ifdef SUPPORT_JIT
1472      PCRE_STUDY_JIT_COMPILE | PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE |      PCRE_STUDY_JIT_COMPILE | PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE |
1473      PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE |      PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE |
1474  #endif  #endif
1475      PCRE_STUDY_EXTRA_NEEDED)) != 0)      PCRE_STUDY_EXTRA_NEEDED)) != 0)
1476    {    {
# Line 1494  if (bits_set || min > 0 || (options & ( Line 1526  if (bits_set || min > 0 || (options & (
1526    
1527    /* If JIT support was compiled and requested, attempt the JIT compilation.    /* If JIT support was compiled and requested, attempt the JIT compilation.
1528    If no starting bytes were found, and the minimum length is zero, and JIT    If no starting bytes were found, and the minimum length is zero, and JIT
1529    compilation fails, abandon the extra block and return NULL, unless    compilation fails, abandon the extra block and return NULL, unless
1530    PCRE_STUDY_EXTRA_NEEDED is set. */    PCRE_STUDY_EXTRA_NEEDED is set. */
1531    
1532  #ifdef SUPPORT_JIT  #ifdef SUPPORT_JIT

Legend:
Removed from v.1146  
changed lines
  Added in v.1364

  ViewVC Help
Powered by ViewVC 1.1.5