/[pcre]/code/branches/pcre16/pcre_compile.c
ViewVC logotype

Diff of /code/branches/pcre16/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 576 by ph10, Sun Nov 21 18:45:10 2010 UTC revision 578 by ph10, Tue Nov 23 15:34:55 2010 UTC
# Line 1105  top-level call starts at the beginning o Line 1105  top-level call starts at the beginning o
1105  start at a parenthesis. It scans along a pattern's text looking for capturing  start at a parenthesis. It scans along a pattern's text looking for capturing
1106  subpatterns, and counting them. If it finds a named pattern that matches the  subpatterns, and counting them. If it finds a named pattern that matches the
1107  name it is given, it returns its number. Alternatively, if the name is NULL, it  name it is given, it returns its number. Alternatively, if the name is NULL, it
1108  returns when it reaches a given numbered subpattern. We know that if (?P< is  returns when it reaches a given numbered subpattern. Recursion is used to keep
1109  encountered, the name will be terminated by '>' because that is checked in the  track of subpatterns that reset the capturing group numbers - the (?| feature.
1110  first pass. Recursion is used to keep track of subpatterns that reset the  
1111  capturing group numbers - the (?| feature.  This function was originally called only from the second pass, in which we know
1112    that if (?< or (?' or (?P< is encountered, the name will be correctly
1113    terminated because that is checked in the first pass. There is now one call to
1114    this function in the first pass, to check for a recursive back reference by
1115    name (so that we can make the whole group atomic). In this case, we need check
1116    only up to the current position in the pattern, and that is still OK because
1117    and previous occurrences will have been checked. To make this work, the test
1118    for "end of pattern" is a check against cd->end_pattern in the main loop,
1119    instead of looking for a binary zero. This means that the special first-pass
1120    call can adjust cd->end_pattern temporarily. (Checks for binary zero while
1121    processing items within the loop are OK, because afterwards the main loop will
1122    terminate.)
1123    
1124  Arguments:  Arguments:
1125    ptrptr       address of the current character pointer (updated)    ptrptr       address of the current character pointer (updated)
# Line 1209  if (ptr[0] == CHAR_LEFT_PARENTHESIS) Line 1220  if (ptr[0] == CHAR_LEFT_PARENTHESIS)
1220    }    }
1221    
1222  /* Past any initial parenthesis handling, scan for parentheses or vertical  /* Past any initial parenthesis handling, scan for parentheses or vertical
1223  bars. */  bars. Stop if we get to cd->end_pattern. Note that this is important for the
1224    first-pass call when this value is temporarily adjusted to stop at the current
1225    position. So DO NOT change this to a test for binary zero. */
1226    
1227  for (; *ptr != 0; ptr++)  for (; ptr < cd->end_pattern; ptr++)
1228    {    {
1229    /* Skip over backslashed characters and also entire \Q...\E */    /* Skip over backslashed characters and also entire \Q...\E */
1230    
# Line 5373  for (;; ptr++) Line 5386  for (;; ptr++)
5386          while ((cd->ctypes[*ptr] & ctype_word) != 0) ptr++;          while ((cd->ctypes[*ptr] & ctype_word) != 0) ptr++;
5387          namelen = (int)(ptr - name);          namelen = (int)(ptr - name);
5388    
5389          /* In the pre-compile phase, do a syntax check and set a dummy          /* In the pre-compile phase, do a syntax check. We used to just set
5390          reference number. */          a dummy reference number, because it was not used in the first pass.
5391            However, with the change of recursive back references to be atomic,
5392            we have to look for the number so that this state can be identified, as
5393            otherwise the incorrect length is computed. If it's not a backwards
5394            reference, the dummy number will do. */
5395    
5396          if (lengthptr != NULL)          if (lengthptr != NULL)
5397            {            {
5398              const uschar *temp;
5399    
5400            if (namelen == 0)            if (namelen == 0)
5401              {              {
5402              *errorcodeptr = ERR62;              *errorcodeptr = ERR62;
# Line 5393  for (;; ptr++) Line 5412  for (;; ptr++)
5412              *errorcodeptr = ERR48;              *errorcodeptr = ERR48;
5413              goto FAILED;              goto FAILED;
5414              }              }
5415            recno = 0;  
5416              /* The name table does not exist in the first pass, so we cannot
5417              do a simple search as in the code below. Instead, we have to scan the
5418              pattern to find the number. It is important that we scan it only as
5419              far as we have got because the syntax of named subpatterns has not
5420              been checked for the rest of the pattern, and find_parens() assumes
5421              correct syntax. In any case, it's a waste of resources to scan
5422              further. We stop the scan at the current point by temporarily
5423              adjusting the value of cd->endpattern. */
5424    
5425              temp = cd->end_pattern;
5426              cd->end_pattern = ptr;
5427              recno = find_parens(cd, name, namelen,
5428                (options & PCRE_EXTENDED) != 0, utf8);
5429              cd->end_pattern = temp;
5430              if (recno < 0) recno = 0;    /* Forward ref; set dummy number */
5431            }            }
5432    
5433          /* In the real compile, seek the name in the table. We check the name          /* In the real compile, seek the name in the table. We check the name

Legend:
Removed from v.576  
changed lines
  Added in v.578

  ViewVC Help
Powered by ViewVC 1.1.5