/[pcre]/code/trunk/pcre.c
ViewVC logotype

Diff of /code/trunk/pcre.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 31 by nigel, Sat Feb 24 21:38:57 2007 UTC revision 35 by nigel, Sat Feb 24 21:39:05 2007 UTC
# Line 1790  for (;;) Line 1790  for (;;)
1790      code += 2;      code += 2;
1791      break;      break;
1792    
1793        case OP_WORD_BOUNDARY:
1794        case OP_NOT_WORD_BOUNDARY:
1795        code++;
1796        break;
1797    
1798      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
1799      case OP_ASSERTBACK:      case OP_ASSERTBACK:
1800      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
# Line 1817  all of whose alternatives start with OP_ Line 1822  all of whose alternatives start with OP_
1822  it's anchored. However, if this is a multiline pattern, then only OP_SOD  it's anchored. However, if this is a multiline pattern, then only OP_SOD
1823  counts, since OP_CIRC can match in the middle.  counts, since OP_CIRC can match in the middle.
1824    
1825  A branch is also implicitly anchored if it starts with .* because that will try  A branch is also implicitly anchored if it starts with .* and DOTALL is set,
1826  the rest of the pattern at all possible matching points, so there is no point  because that will try the rest of the pattern at all possible matching points,
1827  trying them again.  so there is no point trying them again.
1828    
1829  Arguments:  Arguments:
1830    code       points to start of expression (the bracket)    code       points to start of expression (the bracket)
# Line 1837  do { Line 1842  do {
1842     register int op = *scode;     register int op = *scode;
1843     if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND)     if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND)
1844       { if (!is_anchored(scode, options)) return FALSE; }       { if (!is_anchored(scode, options)) return FALSE; }
1845     else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR)     else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR) &&
1846                (*options & PCRE_DOTALL) != 0)
1847       { if (scode[1] != OP_ANY) return FALSE; }       { if (scode[1] != OP_ANY) return FALSE; }
1848     else if (op != OP_SOD &&     else if (op != OP_SOD &&
1849             ((*options & PCRE_MULTILINE) != 0 || op != OP_CIRC))             ((*options & PCRE_MULTILINE) != 0 || op != OP_CIRC))
# Line 1851  return TRUE; Line 1857  return TRUE;
1857    
1858    
1859  /*************************************************  /*************************************************
1860  *     Check for start with \n line expression    *  *         Check for starting with ^ or .*        *
1861  *************************************************/  *************************************************/
1862    
1863  /* This is called for multiline expressions to try to find out if every branch  /* This is called to find out if every branch starts with ^ or .* so that
1864  starts with ^ so that "first char" processing can be done to speed things up.  "first char" processing can be done to speed things up in multiline
1865    matching and for non-DOTALL patterns that start with .* (which must start at
1866    the beginning or after \n).
1867    
1868  Argument:  points to start of expression (the bracket)  Argument:  points to start of expression (the bracket)
1869  Returns:   TRUE or FALSE  Returns:   TRUE or FALSE
# Line 1869  do { Line 1877  do {
1877     register int op = *scode;     register int op = *scode;
1878     if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND)     if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND)
1879       { if (!is_startline(scode)) return FALSE; }       { if (!is_startline(scode)) return FALSE; }
1880       else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR)
1881         { if (scode[1] != OP_ANY) return FALSE; }
1882     else if (op != OP_CIRC) return FALSE;     else if (op != OP_CIRC) return FALSE;
1883     code += (code[1] << 8) + code[2];     code += (code[1] << 8) + code[2];
1884     }     }
# Line 2546  if (*errorptr != NULL) Line 2556  if (*errorptr != NULL)
2556    return NULL;    return NULL;
2557    }    }
2558    
2559  /* If the anchored option was not passed, set flag if we can determine that it  /* If the anchored option was not passed, set flag if we can determine that the
2560  is anchored by virtue of ^ characters or \A or anything else. Otherwise, see if  pattern is anchored by virtue of ^ characters or \A or anything else (such as
2561  we can determine what the first character has to be, because that speeds up  starting with .* when DOTALL is set).
2562  unanchored matches no end. In the case of multiline matches, an alternative is  
2563  to set the PCRE_STARTLINE flag if all branches start with ^. */  Otherwise, see if we can determine what the first character has to be, because
2564    that speeds up unanchored matches no end. If not, see if we can set the
2565    PCRE_STARTLINE flag. This is helpful for multiline matches when all branches
2566    start with ^. and also when all branches start with .* for non-DOTALL matches.
2567    */
2568    
2569  if ((options & PCRE_ANCHORED) == 0)  if ((options & PCRE_ANCHORED) == 0)
2570    {    {
# Line 4104  Arguments: Line 4118  Arguments:
4118    external_extra  points to "hints" from pcre_study() or is NULL    external_extra  points to "hints" from pcre_study() or is NULL
4119    subject         points to the subject string    subject         points to the subject string
4120    length          length of subject string (may contain binary zeros)    length          length of subject string (may contain binary zeros)
4121      start_offset    where to start in the subject string
4122    options         option bits    options         option bits
4123    offsets         points to a vector of ints to be filled in with offsets    offsets         points to a vector of ints to be filled in with offsets
4124    offsetcount     the number of elements in the vector    offsetcount     the number of elements in the vector
# Line 4116  Returns:          > 0 => success; value Line 4131  Returns:          > 0 => success; value
4131    
4132  int  int
4133  pcre_exec(const pcre *external_re, const pcre_extra *external_extra,  pcre_exec(const pcre *external_re, const pcre_extra *external_extra,
4134    const char *subject, int length, int options, int *offsets, int offsetcount)    const char *subject, int length, int start_offset, int options, int *offsets,
4135      int offsetcount)
4136  {  {
4137  int resetcount, ocount;  int resetcount, ocount;
4138  int first_char = -1;  int first_char = -1;
4139  int ims = 0;  int ims = 0;
4140  match_data match_block;  match_data match_block;
4141  const uschar *start_bits = NULL;  const uschar *start_bits = NULL;
4142  const uschar *start_match = (const uschar *)subject;  const uschar *start_match = (const uschar *)subject + start_offset;
4143  const uschar *end_subject;  const uschar *end_subject;
4144  const real_pcre *re = (const real_pcre *)external_re;  const real_pcre *re = (const real_pcre *)external_re;
4145  const real_pcre_extra *extra = (const real_pcre_extra *)external_extra;  const real_pcre_extra *extra = (const real_pcre_extra *)external_extra;
# Line 4215  if (!anchored) Line 4231  if (!anchored)
4231          start_bits = extra->start_bits;          start_bits = extra->start_bits;
4232    }    }
4233    
4234  /* Loop for unanchored matches; for anchored regexps the loop runs just once. */  /* Loop for unanchored matches; for anchored regexs the loop runs just once. */
4235    
4236  do  do
4237    {    {

Legend:
Removed from v.31  
changed lines
  Added in v.35

  ViewVC Help
Powered by ViewVC 1.1.5