/[pcre]/code/trunk/pcre_study.c
ViewVC logotype

Diff of /code/trunk/pcre_study.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 657 by ph10, Mon Aug 15 17:39:09 2011 UTC revision 723 by ph10, Sat Oct 8 15:55:23 2011 UTC
# Line 66  string of that length that matches. In U Line 66  string of that length that matches. In U
66  rather than bytes.  rather than bytes.
67    
68  Arguments:  Arguments:
69    code        pointer to start of group (the bracket)    code            pointer to start of group (the bracket)
70    startcode   pointer to start of the whole pattern    startcode       pointer to start of the whole pattern
71    options     the compiling options    options         the compiling options
72    had_accept  pointer to flag for (*ACCEPT) encountered    int             RECURSE depth
   int         RECURSE depth  
73    
74  Returns:   the minimum length  Returns:   the minimum length
75             -1 if \C was encountered             -1 if \C in UTF-8 mode or (*ACCEPT) was encountered
76             -2 internal error (missing capturing bracket)             -2 internal error (missing capturing bracket)
77             -3 internal error (opcode not listed)             -3 internal error (opcode not listed)
78  */  */
79    
80  static int  static int
81  find_minlength(const uschar *code, const uschar *startcode, int options,  find_minlength(const uschar *code, const uschar *startcode, int options,
82    BOOL *had_accept_ptr, int recurse_depth)    int recurse_depth)
83  {  {
84  int length = -1;  int length = -1;
85  BOOL utf8 = (options & PCRE_UTF8) != 0;  BOOL utf8 = (options & PCRE_UTF8) != 0;
# Line 128  for (;;) Line 127  for (;;)
127      case OP_BRAPOS:      case OP_BRAPOS:
128      case OP_SBRAPOS:      case OP_SBRAPOS:
129      case OP_ONCE:      case OP_ONCE:
130      d = find_minlength(cc, startcode, options, had_accept_ptr, recurse_depth);      case OP_ONCE_NC:
131        d = find_minlength(cc, startcode, options, recurse_depth);
132      if (d < 0) return d;      if (d < 0) return d;
133      branchlength += d;      branchlength += d;
     if (*had_accept_ptr) return branchlength;  
134      do cc += GET(cc, 1); while (*cc == OP_ALT);      do cc += GET(cc, 1); while (*cc == OP_ALT);
135      cc += 1 + LINK_SIZE;      cc += 1 + LINK_SIZE;
136      break;      break;
137    
138      /* Reached end of a branch; if it's a ket it is the end of a nested      /* ACCEPT makes things far too complicated; we have to give up. */
     call. If it's ALT it is an alternation in a nested call. If it is END it's  
     the end of the outer call. All can be handled by the same code. If it is  
     ACCEPT, it is essentially the same as END, but we set a flag so that  
     counting stops. */  
139    
140      case OP_ACCEPT:      case OP_ACCEPT:
141      case OP_ASSERT_ACCEPT:      case OP_ASSERT_ACCEPT:
142      *had_accept_ptr = TRUE;      return -1;
143      /* Fall through */  
144        /* Reached end of a branch; if it's a ket it is the end of a nested
145        call. If it's ALT it is an alternation in a nested call. If it is END it's
146        the end of the outer call. All can be handled by the same code. If an
147        ACCEPT was previously encountered, use the length that was in force at that
148        time, and pass back the shortest ACCEPT length. */
149    
150      case OP_ALT:      case OP_ALT:
151      case OP_KET:      case OP_KET:
152      case OP_KETRMAX:      case OP_KETRMAX:
# Line 379  for (;;) Line 380  for (;;)
380          }          }
381        else        else
382          {          {
383          d = find_minlength(cs, startcode, options, had_accept_ptr,          d = find_minlength(cs, startcode, options, recurse_depth);
           recurse_depth);  
         *had_accept_ptr = FALSE;  
384          }          }
385        }        }
386      else d = 0;      else d = 0;
# Line 430  for (;;) Line 429  for (;;)
429        had_recurse = TRUE;        had_recurse = TRUE;
430      else      else
431        {        {
432        branchlength += find_minlength(cs, startcode, options, had_accept_ptr,        branchlength += find_minlength(cs, startcode, options, recurse_depth + 1);
         recurse_depth + 1);  
       *had_accept_ptr = FALSE;  
433        }        }
434      cc += 1 + LINK_SIZE;      cc += 1 + LINK_SIZE;
435      break;      break;
# Line 794  do Line 791  do
791        return SSB_FAIL;        return SSB_FAIL;
792    
793        /* We can ignore word boundary tests. */        /* We can ignore word boundary tests. */
794    
795        case OP_WORD_BOUNDARY:        case OP_WORD_BOUNDARY:
796        case OP_NOT_WORD_BOUNDARY:        case OP_NOT_WORD_BOUNDARY:
797        tcode++;        tcode++;
798        break;        break;
799    
800        /* If we hit a bracket or a positive lookahead assertion, recurse to set        /* If we hit a bracket or a positive lookahead assertion, recurse to set
801        bits from within the subpattern. If it can't find anything, we have to        bits from within the subpattern. If it can't find anything, we have to
# Line 814  do Line 811  do
811        case OP_CBRAPOS:        case OP_CBRAPOS:
812        case OP_SCBRAPOS:        case OP_SCBRAPOS:
813        case OP_ONCE:        case OP_ONCE:
814          case OP_ONCE_NC:
815        case OP_ASSERT:        case OP_ASSERT:
816        rc = set_start_bits(tcode, start_bits, utf8, cd);        rc = set_start_bits(tcode, start_bits, utf8, cd);
817        if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc;        if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc;
# Line 1228  pcre_study(const pcre *external_re, int Line 1226  pcre_study(const pcre *external_re, int
1226  {  {
1227  int min;  int min;
1228  BOOL bits_set = FALSE;  BOOL bits_set = FALSE;
 BOOL had_accept = FALSE;  
1229  uschar start_bits[32];  uschar start_bits[32];
1230  pcre_extra *extra;  pcre_extra *extra = NULL;
1231  pcre_study_data *study;  pcre_study_data *study;
1232  const uschar *tables;  const uschar *tables;
1233  uschar *code;  uschar *code;
# Line 1281  if ((re->options & PCRE_ANCHORED) == 0 & Line 1278  if ((re->options & PCRE_ANCHORED) == 0 &
1278    rc = set_start_bits(code, start_bits, (re->options & PCRE_UTF8) != 0,    rc = set_start_bits(code, start_bits, (re->options & PCRE_UTF8) != 0,
1279      &compile_block);      &compile_block);
1280    bits_set = rc == SSB_DONE;    bits_set = rc == SSB_DONE;
1281    if (rc == SSB_UNKNOWN) *errorptr = "internal error: opcode not recognized";    if (rc == SSB_UNKNOWN)
1282        {
1283        *errorptr = "internal error: opcode not recognized";
1284        return NULL;
1285        }
1286    }    }
1287    
1288  /* Find the minimum length of subject string. */  /* Find the minimum length of subject string. */
1289    
1290  switch(min = find_minlength(code, code, re->options, &had_accept, 0))  switch(min = find_minlength(code, code, re->options, 0))
1291    {    {
1292    case -2: *errorptr = "internal error: missing capturing bracket"; break;    case -2: *errorptr = "internal error: missing capturing bracket"; return NULL;
1293    case -3: *errorptr = "internal error: opcode not recognized"; break;    case -3: *errorptr = "internal error: opcode not recognized"; return NULL;
1294    default: break;    default: break;
1295    }    }
1296    
1297  /* Return NULL if there's been an error or if no optimization is possible. */  /* If a set of starting bytes has been identified, or if the minimum length is
1298    greater than zero, or if JIT optimization has been requested, get a pcre_extra
1299    block and a pcre_study_data block. The study data is put in the latter, which
1300    is pointed to by the former, which may also get additional data set later by
1301    the calling program. At the moment, the size of pcre_study_data is fixed. We
1302    nevertheless save it in a field for returning via the pcre_fullinfo() function
1303    so that if it becomes variable in the future, we don't have to change that
1304    code. */
1305    
1306    if (bits_set || min > 0
1307    #ifdef SUPPORT_JIT
1308        || (options & PCRE_STUDY_JIT_COMPILE) != 0
1309    #endif
1310      )
1311      {
1312      extra = (pcre_extra *)(pcre_malloc)
1313        (sizeof(pcre_extra) + sizeof(pcre_study_data));
1314      if (extra == NULL)
1315        {
1316        *errorptr = "failed to get memory";
1317        return NULL;
1318        }
1319    
1320  if (*errorptr != NULL || (!bits_set && min < 0)) return NULL;    study = (pcre_study_data *)((char *)extra + sizeof(pcre_extra));
1321      extra->flags = PCRE_EXTRA_STUDY_DATA;
1322      extra->study_data = study;
1323    
1324  /* Get a pcre_extra block and a pcre_study_data block. The study data is put in    study->size = sizeof(pcre_study_data);
1325  the latter, which is pointed to by the former, which may also get additional    study->flags = 0;
 data set later by the calling program. At the moment, the size of  
 pcre_study_data is fixed. We nevertheless save it in a field for returning via  
 the pcre_fullinfo() function so that if it becomes variable in the future, we  
 don't have to change that code. */  
1326    
1327  extra = (pcre_extra *)(pcre_malloc)    if (bits_set)
1328    (sizeof(pcre_extra) + sizeof(pcre_study_data));      {
1329        study->flags |= PCRE_STUDY_MAPPED;
1330        memcpy(study->start_bits, start_bits, sizeof(start_bits));
1331        }
1332    
1333  if (extra == NULL)    /* Always set the minlength value in the block, because the JIT compiler
1334    {    makes use of it. However, don't set the bit unless the length is greater than
1335    *errorptr = "failed to get memory";    zero - the interpretive pcre_exec() and pcre_dfa_exec() needn't waste time
1336    return NULL;    checking the zero case. */
1337    
1338      if (min > 0)
1339        {
1340        study->flags |= PCRE_STUDY_MINLEN;
1341        study->minlength = min;
1342        }
1343      else study->minlength = 0;
1344    
1345      /* If JIT support was compiled and requested, attempt the JIT compilation.
1346      If no starting bytes were found, and the minimum length is zero, and JIT
1347      compilation fails, abandon the extra block and return NULL. */
1348    
1349    #ifdef SUPPORT_JIT
1350      extra->executable_jit = NULL;
1351      if ((options & PCRE_STUDY_JIT_COMPILE) != 0) _pcre_jit_compile(re, extra);
1352      if (study->flags == 0 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) == 0)
1353        {
1354        pcre_free_study(extra);
1355        extra = NULL;
1356        }
1357    #endif
1358    }    }
1359    
1360  study = (pcre_study_data *)((char *)extra + sizeof(pcre_extra));  return extra;
1361  extra->flags = PCRE_EXTRA_STUDY_DATA;  }
 extra->study_data = study;  
1362    
 study->size = sizeof(pcre_study_data);  
 study->flags = 0;  
1363    
1364  if (bits_set)  /*************************************************
1365    {  *          Free the study data                   *
1366    study->flags |= PCRE_STUDY_MAPPED;  *************************************************/
   memcpy(study->start_bits, start_bits, sizeof(start_bits));  
   }  
1367    
1368  if (min >= 0)  /* This function frees the memory that was obtained by pcre_study().
   {  
   study->flags |= PCRE_STUDY_MINLEN;  
   study->minlength = min;  
   }  
1369    
1370  return extra;  Argument:   a pointer to the pcre_extra block
1371    Returns:    nothing
1372    */
1373    
1374    PCRE_EXP_DEFN void
1375    pcre_free_study(pcre_extra *extra)
1376    {
1377    #ifdef SUPPORT_JIT
1378    if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
1379         extra->executable_jit != NULL)
1380      _pcre_jit_free(extra->executable_jit);
1381    #endif
1382    pcre_free(extra);
1383  }  }
1384    
1385  /* End of pcre_study.c */  /* End of pcre_study.c */

Legend:
Removed from v.657  
changed lines
  Added in v.723

  ViewVC Help
Powered by ViewVC 1.1.5