/[pcre]/code/trunk/pcre_study.c
ViewVC logotype

Diff of /code/trunk/pcre_study.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 657 by ph10, Mon Aug 15 17:39:09 2011 UTC revision 730 by ph10, Mon Oct 10 16:07:02 2011 UTC
# Line 66  string of that length that matches. In U Line 66  string of that length that matches. In U
66  rather than bytes.  rather than bytes.
67    
68  Arguments:  Arguments:
69    code        pointer to start of group (the bracket)    code            pointer to start of group (the bracket)
70    startcode   pointer to start of the whole pattern    startcode       pointer to start of the whole pattern
71    options     the compiling options    options         the compiling options
72    had_accept  pointer to flag for (*ACCEPT) encountered    int             RECURSE depth
   int         RECURSE depth  
73    
74  Returns:   the minimum length  Returns:   the minimum length
75             -1 if \C was encountered             -1 if \C in UTF-8 mode or (*ACCEPT) was encountered
76             -2 internal error (missing capturing bracket)             -2 internal error (missing capturing bracket)
77             -3 internal error (opcode not listed)             -3 internal error (opcode not listed)
78  */  */
79    
80  static int  static int
81  find_minlength(const uschar *code, const uschar *startcode, int options,  find_minlength(const uschar *code, const uschar *startcode, int options,
82    BOOL *had_accept_ptr, int recurse_depth)    int recurse_depth)
83  {  {
84  int length = -1;  int length = -1;
85  BOOL utf8 = (options & PCRE_UTF8) != 0;  BOOL utf8 = (options & PCRE_UTF8) != 0;
# Line 128  for (;;) Line 127  for (;;)
127      case OP_BRAPOS:      case OP_BRAPOS:
128      case OP_SBRAPOS:      case OP_SBRAPOS:
129      case OP_ONCE:      case OP_ONCE:
130      d = find_minlength(cc, startcode, options, had_accept_ptr, recurse_depth);      case OP_ONCE_NC:
131        d = find_minlength(cc, startcode, options, recurse_depth);
132      if (d < 0) return d;      if (d < 0) return d;
133      branchlength += d;      branchlength += d;
     if (*had_accept_ptr) return branchlength;  
134      do cc += GET(cc, 1); while (*cc == OP_ALT);      do cc += GET(cc, 1); while (*cc == OP_ALT);
135      cc += 1 + LINK_SIZE;      cc += 1 + LINK_SIZE;
136      break;      break;
137    
138      /* Reached end of a branch; if it's a ket it is the end of a nested      /* ACCEPT makes things far too complicated; we have to give up. */
     call. If it's ALT it is an alternation in a nested call. If it is END it's  
     the end of the outer call. All can be handled by the same code. If it is  
     ACCEPT, it is essentially the same as END, but we set a flag so that  
     counting stops. */  
139    
140      case OP_ACCEPT:      case OP_ACCEPT:
141      case OP_ASSERT_ACCEPT:      case OP_ASSERT_ACCEPT:
142      *had_accept_ptr = TRUE;      return -1;
143      /* Fall through */  
144        /* Reached end of a branch; if it's a ket it is the end of a nested
145        call. If it's ALT it is an alternation in a nested call. If it is END it's
146        the end of the outer call. All can be handled by the same code. If an
147        ACCEPT was previously encountered, use the length that was in force at that
148        time, and pass back the shortest ACCEPT length. */
149    
150      case OP_ALT:      case OP_ALT:
151      case OP_KET:      case OP_KET:
152      case OP_KETRMAX:      case OP_KETRMAX:
# Line 379  for (;;) Line 380  for (;;)
380          }          }
381        else        else
382          {          {
383          d = find_minlength(cs, startcode, options, had_accept_ptr,          d = find_minlength(cs, startcode, options, recurse_depth);
           recurse_depth);  
         *had_accept_ptr = FALSE;  
384          }          }
385        }        }
386      else d = 0;      else d = 0;
# Line 424  for (;;) Line 423  for (;;)
423    
424      case OP_RECURSE:      case OP_RECURSE:
425      cs = ce = (uschar *)startcode + GET(cc, 1);      cs = ce = (uschar *)startcode + GET(cc, 1);
     if (cs == NULL) return -2;  
426      do ce += GET(ce, 1); while (*ce == OP_ALT);      do ce += GET(ce, 1); while (*ce == OP_ALT);
427      if ((cc > cs && cc < ce) || recurse_depth > 10)      if ((cc > cs && cc < ce) || recurse_depth > 10)
428        had_recurse = TRUE;        had_recurse = TRUE;
429      else      else
430        {        {
431        branchlength += find_minlength(cs, startcode, options, had_accept_ptr,        branchlength += find_minlength(cs, startcode, options, recurse_depth + 1);
         recurse_depth + 1);  
       *had_accept_ptr = FALSE;  
432        }        }
433      cc += 1 + LINK_SIZE;      cc += 1 + LINK_SIZE;
434      break;      break;
# Line 495  for (;;) Line 491  for (;;)
491      case OP_MARK:      case OP_MARK:
492      case OP_PRUNE_ARG:      case OP_PRUNE_ARG:
493      case OP_SKIP_ARG:      case OP_SKIP_ARG:
     cc += _pcre_OP_lengths[op] + cc[1];  
     break;  
   
494      case OP_THEN_ARG:      case OP_THEN_ARG:
495      cc += _pcre_OP_lengths[op] + cc[1+LINK_SIZE];      cc += _pcre_OP_lengths[op] + cc[1];
496      break;      break;
497    
498      /* The remaining opcodes are just skipped over. */      /* The remaining opcodes are just skipped over. */
# Line 794  do Line 787  do
787        return SSB_FAIL;        return SSB_FAIL;
788    
789        /* We can ignore word boundary tests. */        /* We can ignore word boundary tests. */
790    
791        case OP_WORD_BOUNDARY:        case OP_WORD_BOUNDARY:
792        case OP_NOT_WORD_BOUNDARY:        case OP_NOT_WORD_BOUNDARY:
793        tcode++;        tcode++;
794        break;        break;
795    
796        /* If we hit a bracket or a positive lookahead assertion, recurse to set        /* If we hit a bracket or a positive lookahead assertion, recurse to set
797        bits from within the subpattern. If it can't find anything, we have to        bits from within the subpattern. If it can't find anything, we have to
# Line 814  do Line 807  do
807        case OP_CBRAPOS:        case OP_CBRAPOS:
808        case OP_SCBRAPOS:        case OP_SCBRAPOS:
809        case OP_ONCE:        case OP_ONCE:
810          case OP_ONCE_NC:
811        case OP_ASSERT:        case OP_ASSERT:
812        rc = set_start_bits(tcode, start_bits, utf8, cd);        rc = set_start_bits(tcode, start_bits, utf8, cd);
813        if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc;        if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc;
# Line 1228  pcre_study(const pcre *external_re, int Line 1222  pcre_study(const pcre *external_re, int
1222  {  {
1223  int min;  int min;
1224  BOOL bits_set = FALSE;  BOOL bits_set = FALSE;
 BOOL had_accept = FALSE;  
1225  uschar start_bits[32];  uschar start_bits[32];
1226  pcre_extra *extra;  pcre_extra *extra = NULL;
1227  pcre_study_data *study;  pcre_study_data *study;
1228  const uschar *tables;  const uschar *tables;
1229  uschar *code;  uschar *code;
# Line 1281  if ((re->options & PCRE_ANCHORED) == 0 & Line 1274  if ((re->options & PCRE_ANCHORED) == 0 &
1274    rc = set_start_bits(code, start_bits, (re->options & PCRE_UTF8) != 0,    rc = set_start_bits(code, start_bits, (re->options & PCRE_UTF8) != 0,
1275      &compile_block);      &compile_block);
1276    bits_set = rc == SSB_DONE;    bits_set = rc == SSB_DONE;
1277    if (rc == SSB_UNKNOWN) *errorptr = "internal error: opcode not recognized";    if (rc == SSB_UNKNOWN)
1278        {
1279        *errorptr = "internal error: opcode not recognized";
1280        return NULL;
1281        }
1282    }    }
1283    
1284  /* Find the minimum length of subject string. */  /* Find the minimum length of subject string. */
1285    
1286  switch(min = find_minlength(code, code, re->options, &had_accept, 0))  switch(min = find_minlength(code, code, re->options, 0))
1287    {    {
1288    case -2: *errorptr = "internal error: missing capturing bracket"; break;    case -2: *errorptr = "internal error: missing capturing bracket"; return NULL;
1289    case -3: *errorptr = "internal error: opcode not recognized"; break;    case -3: *errorptr = "internal error: opcode not recognized"; return NULL;
1290    default: break;    default: break;
1291    }    }
1292    
1293  /* Return NULL if there's been an error or if no optimization is possible. */  /* If a set of starting bytes has been identified, or if the minimum length is
1294    greater than zero, or if JIT optimization has been requested, get a pcre_extra
1295    block and a pcre_study_data block. The study data is put in the latter, which
1296    is pointed to by the former, which may also get additional data set later by
1297    the calling program. At the moment, the size of pcre_study_data is fixed. We
1298    nevertheless save it in a field for returning via the pcre_fullinfo() function
1299    so that if it becomes variable in the future, we don't have to change that
1300    code. */
1301    
1302    if (bits_set || min > 0
1303    #ifdef SUPPORT_JIT
1304        || (options & PCRE_STUDY_JIT_COMPILE) != 0
1305    #endif
1306      )
1307      {
1308      extra = (pcre_extra *)(pcre_malloc)
1309        (sizeof(pcre_extra) + sizeof(pcre_study_data));
1310      if (extra == NULL)
1311        {
1312        *errorptr = "failed to get memory";
1313        return NULL;
1314        }
1315    
1316      study = (pcre_study_data *)((char *)extra + sizeof(pcre_extra));
1317      extra->flags = PCRE_EXTRA_STUDY_DATA;
1318      extra->study_data = study;
1319    
1320      study->size = sizeof(pcre_study_data);
1321      study->flags = 0;
1322    
1323  if (*errorptr != NULL || (!bits_set && min < 0)) return NULL;    if (bits_set)
1324        {
1325        study->flags |= PCRE_STUDY_MAPPED;
1326        memcpy(study->start_bits, start_bits, sizeof(start_bits));
1327        }
1328    
1329  /* Get a pcre_extra block and a pcre_study_data block. The study data is put in    /* Always set the minlength value in the block, because the JIT compiler
1330  the latter, which is pointed to by the former, which may also get additional    makes use of it. However, don't set the bit unless the length is greater than
1331  data set later by the calling program. At the moment, the size of    zero - the interpretive pcre_exec() and pcre_dfa_exec() needn't waste time
1332  pcre_study_data is fixed. We nevertheless save it in a field for returning via    checking the zero case. */
 the pcre_fullinfo() function so that if it becomes variable in the future, we  
 don't have to change that code. */  
1333    
1334  extra = (pcre_extra *)(pcre_malloc)    if (min > 0)
1335    (sizeof(pcre_extra) + sizeof(pcre_study_data));      {
1336        study->flags |= PCRE_STUDY_MINLEN;
1337        study->minlength = min;
1338        }
1339      else study->minlength = 0;
1340    
1341  if (extra == NULL)    /* If JIT support was compiled and requested, attempt the JIT compilation.
1342    {    If no starting bytes were found, and the minimum length is zero, and JIT
1343    *errorptr = "failed to get memory";    compilation fails, abandon the extra block and return NULL. */
1344    return NULL;  
1345    #ifdef SUPPORT_JIT
1346      extra->executable_jit = NULL;
1347      if ((options & PCRE_STUDY_JIT_COMPILE) != 0) _pcre_jit_compile(re, extra);
1348      if (study->flags == 0 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) == 0)
1349        {
1350        pcre_free_study(extra);
1351        extra = NULL;
1352        }
1353    #endif
1354    }    }
1355    
1356  study = (pcre_study_data *)((char *)extra + sizeof(pcre_extra));  return extra;
1357  extra->flags = PCRE_EXTRA_STUDY_DATA;  }
1358  extra->study_data = study;  
1359    
1360  study->size = sizeof(pcre_study_data);  /*************************************************
1361  study->flags = 0;  *          Free the study data                   *
1362    *************************************************/
1363    
1364  if (bits_set)  /* This function frees the memory that was obtained by pcre_study().
   {  
   study->flags |= PCRE_STUDY_MAPPED;  
   memcpy(study->start_bits, start_bits, sizeof(start_bits));  
   }  
1365    
1366  if (min >= 0)  Argument:   a pointer to the pcre_extra block
1367    {  Returns:    nothing
1368    study->flags |= PCRE_STUDY_MINLEN;  */
   study->minlength = min;  
   }  
1369    
1370  return extra;  PCRE_EXP_DEFN void
1371    pcre_free_study(pcre_extra *extra)
1372    {
1373    #ifdef SUPPORT_JIT
1374    if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
1375         extra->executable_jit != NULL)
1376      _pcre_jit_free(extra->executable_jit);
1377    #endif
1378    pcre_free(extra);
1379  }  }
1380    
1381  /* End of pcre_study.c */  /* End of pcre_study.c */

Legend:
Removed from v.657  
changed lines
  Added in v.730

  ViewVC Help
Powered by ViewVC 1.1.5