/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1668 by ph10, Thu Oct 13 16:00:48 2016 UTC revision 1669 by ph10, Tue Oct 18 15:10:09 2016 UTC
# Line 7918  for (;; ptr++) Line 7918  for (;; ptr++)
7918          }          }
7919        }        }
7920    
7921      /* For a forward assertion, we take the reqchar, if set. This can be      /* For a forward assertion, we take the reqchar, if set, provided that the
7922      helpful if the pattern that follows the assertion doesn't set a different      group has also set a first char. This can be helpful if the pattern that
7923      char. For example, it's useful for /(?=abcde).+/. We can't set firstchar      follows the assertion doesn't set a different char. For example, it's
7924      for an assertion, however because it leads to incorrect effect for patterns      useful for /(?=abcde).+/. We can't set firstchar for an assertion, however
7925      such as /(?=a)a.+/ when the "real" "a" would then become a reqchar instead      because it leads to incorrect effect for patterns such as /(?=a)a.+/ when
7926      of a firstchar. This is overcome by a scan at the end if there's no      the "real" "a" would then become a reqchar instead of a firstchar. This is
7927      firstchar, looking for an asserted first char. */      overcome by a scan at the end if there's no firstchar, looking for an
7928        asserted first char. */
7929    
7930      else if (bravalue == OP_ASSERT && subreqcharflags >= 0)      else if (bravalue == OP_ASSERT && subreqcharflags >= 0 &&
7931                 subfirstcharflags >= 0)
7932        {        {
7933        reqchar = subreqchar;        reqchar = subreqchar;
7934        reqcharflags = subreqcharflags;        reqcharflags = subreqcharflags;
# Line 8715  matching and for non-DOTALL patterns tha Line 8717  matching and for non-DOTALL patterns tha
8717  the beginning or after \n). As in the case of is_anchored() (see above), we  the beginning or after \n). As in the case of is_anchored() (see above), we
8718  have to take account of back references to capturing brackets that contain .*  have to take account of back references to capturing brackets that contain .*
8719  because in that case we can't make the assumption. Also, the appearance of .*  because in that case we can't make the assumption. Also, the appearance of .*
8720  inside atomic brackets or in a pattern that contains *PRUNE or *SKIP does not  inside atomic brackets or in an assertion, or in a pattern that contains *PRUNE
8721  count, because once again the assumption no longer holds.  or *SKIP does not count, because once again the assumption no longer holds.
8722    
8723  Arguments:  Arguments:
8724    code           points to start of expression (the bracket)    code           points to start of expression (the bracket)
# Line 8725  Arguments: Line 8727  Arguments:
8727                    the less precise approach                    the less precise approach
8728    cd             points to the compile data    cd             points to the compile data
8729    atomcount      atomic group level    atomcount      atomic group level
8730      inassert       TRUE if in an assertion
8731    
8732  Returns:         TRUE or FALSE  Returns:         TRUE or FALSE
8733  */  */
8734    
8735  static BOOL  static BOOL
8736  is_startline(const pcre_uchar *code, unsigned int bracket_map,  is_startline(const pcre_uchar *code, unsigned int bracket_map,
8737    compile_data *cd, int atomcount)    compile_data *cd, int atomcount, BOOL inassert)
8738  {  {
8739  do {  do {
8740     const pcre_uchar *scode = first_significant_code(     const pcre_uchar *scode = first_significant_code(
# Line 8758  do { Line 8761  do {
8761         return FALSE;         return FALSE;
8762    
8763         default:     /* Assertion */         default:     /* Assertion */
8764         if (!is_startline(scode, bracket_map, cd, atomcount)) return FALSE;         if (!is_startline(scode, bracket_map, cd, atomcount, TRUE)) return FALSE;
8765         do scode += GET(scode, 1); while (*scode == OP_ALT);         do scode += GET(scode, 1); while (*scode == OP_ALT);
8766         scode += 1 + LINK_SIZE;         scode += 1 + LINK_SIZE;
8767         break;         break;
# Line 8772  do { Line 8775  do {
8775     if (op == OP_BRA  || op == OP_BRAPOS ||     if (op == OP_BRA  || op == OP_BRAPOS ||
8776         op == OP_SBRA || op == OP_SBRAPOS)         op == OP_SBRA || op == OP_SBRAPOS)
8777       {       {
8778       if (!is_startline(scode, bracket_map, cd, atomcount)) return FALSE;       if (!is_startline(scode, bracket_map, cd, atomcount, inassert)) return FALSE;
8779       }       }
8780    
8781     /* Capturing brackets */     /* Capturing brackets */
# Line 8782  do { Line 8785  do {
8785       {       {
8786       int n = GET2(scode, 1+LINK_SIZE);       int n = GET2(scode, 1+LINK_SIZE);
8787       int new_map = bracket_map | ((n < 32)? (1 << n) : 1);       int new_map = bracket_map | ((n < 32)? (1 << n) : 1);
8788       if (!is_startline(scode, new_map, cd, atomcount)) return FALSE;       if (!is_startline(scode, new_map, cd, atomcount, inassert)) return FALSE;
8789       }       }
8790    
8791     /* Positive forward assertions */     /* Positive forward assertions */
8792    
8793     else if (op == OP_ASSERT)     else if (op == OP_ASSERT)
8794       {       {
8795       if (!is_startline(scode, bracket_map, cd, atomcount)) return FALSE;       if (!is_startline(scode, bracket_map, cd, atomcount, TRUE)) return FALSE;
8796       }       }
8797    
8798     /* Atomic brackets */     /* Atomic brackets */
8799    
8800     else if (op == OP_ONCE || op == OP_ONCE_NC)     else if (op == OP_ONCE || op == OP_ONCE_NC)
8801       {       {
8802       if (!is_startline(scode, bracket_map, cd, atomcount + 1)) return FALSE;       if (!is_startline(scode, bracket_map, cd, atomcount + 1, inassert)) return FALSE;
8803       }       }
8804    
8805     /* .* means "start at start or after \n" if it isn't in atomic brackets or     /* .* means "start at start or after \n" if it isn't in atomic brackets or
8806     brackets that may be referenced, as long as the pattern does not contain     brackets that may be referenced or an assertion, as long as the pattern does
8807     *PRUNE or *SKIP, because these break the feature. Consider, for example,     not contain *PRUNE or *SKIP, because these break the feature. Consider, for
8808     /.*?a(*PRUNE)b/ with the subject "aab", which matches "ab", i.e. not at the     example, /.*?a(*PRUNE)b/ with the subject "aab", which matches "ab", i.e.
8809     start of a line. */     not at the start of a line. */
8810    
8811     else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR || op == OP_TYPEPOSSTAR)     else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR || op == OP_TYPEPOSSTAR)
8812       {       {
8813       if (scode[1] != OP_ANY || (bracket_map & cd->backref_map) != 0 ||       if (scode[1] != OP_ANY || (bracket_map & cd->backref_map) != 0 ||
8814           atomcount > 0 || cd->had_pruneorskip)           atomcount > 0 || cd->had_pruneorskip || inassert)
8815         return FALSE;         return FALSE;
8816       }       }
8817    
# Line 9663  if ((re->options & PCRE_ANCHORED) == 0) Line 9666  if ((re->options & PCRE_ANCHORED) == 0)
9666        re->flags |= PCRE_FIRSTSET;        re->flags |= PCRE_FIRSTSET;
9667        }        }
9668    
9669      else if (is_startline(codestart, 0, cd, 0)) re->flags |= PCRE_STARTLINE;      else if (is_startline(codestart, 0, cd, 0, FALSE)) re->flags |= PCRE_STARTLINE;
9670      }      }
9671    }    }
9672    

Legend:
Removed from v.1668  
changed lines
  Added in v.1669

  ViewVC Help
Powered by ViewVC 1.1.5