/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 578 by ph10, Tue Nov 23 15:34:55 2010 UTC revision 579 by ph10, Wed Nov 24 17:39:25 2010 UTC
# Line 408  static const char error_texts[] = Line 408  static const char error_texts[] =
408    "different names for subpatterns of the same number are not allowed\0"    "different names for subpatterns of the same number are not allowed\0"
409    "(*MARK) must have an argument\0"    "(*MARK) must have an argument\0"
410    "this version of PCRE is not compiled with PCRE_UCP support\0"    "this version of PCRE is not compiled with PCRE_UCP support\0"
411    "\\c must be followed by an ASCII character\0"    "\\c must be followed by an ASCII character\0"
412    ;    ;
413    
414  /* Table to identify digits and hex digits. This is used when compiling  /* Table to identify digits and hex digits. This is used when compiling
# Line 857  else Line 857  else
857      if (c > 127)  /* Excludes all non-ASCII in either mode */      if (c > 127)  /* Excludes all non-ASCII in either mode */
858        {        {
859        *errorcodeptr = ERR68;        *errorcodeptr = ERR68;
860        break;        break;
861        }        }
862      if (c >= CHAR_a && c <= CHAR_z) c -= 32;      if (c >= CHAR_a && c <= CHAR_z) c -= 32;
863      c ^= 0x40;      c ^= 0x40;
864  #else             /* EBCDIC coding */  #else             /* EBCDIC coding */
# Line 1113  that if (?< or (?' or (?P< is encountere Line 1113  that if (?< or (?' or (?P< is encountere
1113  terminated because that is checked in the first pass. There is now one call to  terminated because that is checked in the first pass. There is now one call to
1114  this function in the first pass, to check for a recursive back reference by  this function in the first pass, to check for a recursive back reference by
1115  name (so that we can make the whole group atomic). In this case, we need check  name (so that we can make the whole group atomic). In this case, we need check
1116  only up to the current position in the pattern, and that is still OK because  only up to the current position in the pattern, and that is still OK because
1117  and previous occurrences will have been checked. To make this work, the test  and previous occurrences will have been checked. To make this work, the test
1118  for "end of pattern" is a check against cd->end_pattern in the main loop,  for "end of pattern" is a check against cd->end_pattern in the main loop,
1119  instead of looking for a binary zero. This means that the special first-pass  instead of looking for a binary zero. This means that the special first-pass
1120  call can adjust cd->end_pattern temporarily. (Checks for binary zero while  call can adjust cd->end_pattern temporarily. (Checks for binary zero while
1121  processing items within the loop are OK, because afterwards the main loop will  processing items within the loop are OK, because afterwards the main loop will
1122  terminate.)  terminate.)
1123    
1124  Arguments:  Arguments:
# Line 1127  Arguments: Line 1127  Arguments:
1127    name         name to seek, or NULL if seeking a numbered subpattern    name         name to seek, or NULL if seeking a numbered subpattern
1128    lorn         name length, or subpattern number if name is NULL    lorn         name length, or subpattern number if name is NULL
1129    xmode        TRUE if we are in /x mode    xmode        TRUE if we are in /x mode
1130    utf8         TRUE if we are in UTF-8 mode    utf8         TRUE if we are in UTF-8 mode
1131    count        pointer to the current capturing subpattern number (updated)    count        pointer to the current capturing subpattern number (updated)
1132    
1133  Returns:       the number of the named subpattern, or -1 if not found  Returns:       the number of the named subpattern, or -1 if not found
# Line 1220  if (ptr[0] == CHAR_LEFT_PARENTHESIS) Line 1220  if (ptr[0] == CHAR_LEFT_PARENTHESIS)
1220    }    }
1221    
1222  /* Past any initial parenthesis handling, scan for parentheses or vertical  /* Past any initial parenthesis handling, scan for parentheses or vertical
1223  bars. Stop if we get to cd->end_pattern. Note that this is important for the  bars. Stop if we get to cd->end_pattern. Note that this is important for the
1224  first-pass call when this value is temporarily adjusted to stop at the current  first-pass call when this value is temporarily adjusted to stop at the current
1225  position. So DO NOT change this to a test for binary zero. */  position. So DO NOT change this to a test for binary zero. */
1226    
1227  for (; ptr < cd->end_pattern; ptr++)  for (; ptr < cd->end_pattern; ptr++)
# Line 1298  for (; ptr < cd->end_pattern; ptr++) Line 1298  for (; ptr < cd->end_pattern; ptr++)
1298    
1299    if (xmode && *ptr == CHAR_NUMBER_SIGN)    if (xmode && *ptr == CHAR_NUMBER_SIGN)
1300      {      {
1301      ptr++;      ptr++;
1302      while (*ptr != 0)      while (*ptr != 0)
1303        {        {
1304        if (IS_NEWLINE(ptr)) { ptr += cd->nllen - 1; break; }        if (IS_NEWLINE(ptr)) { ptr += cd->nllen - 1; break; }
1305        ptr++;        ptr++;
1306  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1307        if (utf8) while ((*ptr & 0xc0) == 0x80) ptr++;        if (utf8) while ((*ptr & 0xc0) == 0x80) ptr++;
1308  #endif  #endif
1309        }        }
# Line 1361  Arguments: Line 1361  Arguments:
1361    name         name to seek, or NULL if seeking a numbered subpattern    name         name to seek, or NULL if seeking a numbered subpattern
1362    lorn         name length, or subpattern number if name is NULL    lorn         name length, or subpattern number if name is NULL
1363    xmode        TRUE if we are in /x mode    xmode        TRUE if we are in /x mode
1364    utf8         TRUE if we are in UTF-8 mode    utf8         TRUE if we are in UTF-8 mode
1365    
1366  Returns:       the number of the found subpattern, or -1 if not found  Returns:       the number of the found subpattern, or -1 if not found
1367  */  */
# Line 2545  if ((options & PCRE_EXTENDED) != 0) Line 2545  if ((options & PCRE_EXTENDED) != 0)
2545      while ((cd->ctypes[*ptr] & ctype_space) != 0) ptr++;      while ((cd->ctypes[*ptr] & ctype_space) != 0) ptr++;
2546      if (*ptr == CHAR_NUMBER_SIGN)      if (*ptr == CHAR_NUMBER_SIGN)
2547        {        {
2548        ptr++;        ptr++;
2549        while (*ptr != 0)        while (*ptr != 0)
2550          {          {
2551          if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }          if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }
2552          ptr++;          ptr++;
2553  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2554          if (utf8) while ((*ptr & 0xc0) == 0x80) ptr++;          if (utf8) while ((*ptr & 0xc0) == 0x80) ptr++;
2555  #endif  #endif
2556          }          }
# Line 2589  if ((options & PCRE_EXTENDED) != 0) Line 2589  if ((options & PCRE_EXTENDED) != 0)
2589      while ((cd->ctypes[*ptr] & ctype_space) != 0) ptr++;      while ((cd->ctypes[*ptr] & ctype_space) != 0) ptr++;
2590      if (*ptr == CHAR_NUMBER_SIGN)      if (*ptr == CHAR_NUMBER_SIGN)
2591        {        {
2592        ptr++;        ptr++;
2593        while (*ptr != 0)        while (*ptr != 0)
2594          {          {
2595          if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }          if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }
2596          ptr++;          ptr++;
2597  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2598          if (utf8) while ((*ptr & 0xc0) == 0x80) ptr++;          if (utf8) while ((*ptr & 0xc0) == 0x80) ptr++;
2599  #endif  #endif
2600          }          }
# Line 3170  for (;; ptr++) Line 3170  for (;; ptr++)
3170      if ((cd->ctypes[c] & ctype_space) != 0) continue;      if ((cd->ctypes[c] & ctype_space) != 0) continue;
3171      if (c == CHAR_NUMBER_SIGN)      if (c == CHAR_NUMBER_SIGN)
3172        {        {
3173        ptr++;        ptr++;
3174        while (*ptr != 0)        while (*ptr != 0)
3175          {          {
3176          if (IS_NEWLINE(ptr)) { ptr += cd->nllen - 1; break; }          if (IS_NEWLINE(ptr)) { ptr += cd->nllen - 1; break; }
3177          ptr++;          ptr++;
3178  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
3179          if (utf8) while ((*ptr & 0xc0) == 0x80) ptr++;          if (utf8) while ((*ptr & 0xc0) == 0x80) ptr++;
3180  #endif  #endif
3181          }          }
# Line 3553  for (;; ptr++) Line 3553  for (;; ptr++)
3553              continue;              continue;
3554    
3555              /* Perl 5.004 onwards omits VT from \s, but we must preserve it              /* Perl 5.004 onwards omits VT from \s, but we must preserve it
3556              if it was previously set by something earlier in the character              if it was previously set by something earlier in the character
3557              class. */              class. */
3558    
3559              case ESC_s:              case ESC_s:
3560              classbits[0] |= cbits[cbit_space];              classbits[0] |= cbits[cbit_space];
3561              classbits[1] |= cbits[cbit_space+1] & ~0x08;              classbits[1] |= cbits[cbit_space+1] & ~0x08;
3562              for (c = 2; c < 32; c++) classbits[c] |= cbits[c+cbit_space];              for (c = 2; c < 32; c++) classbits[c] |= cbits[c+cbit_space];
3563              continue;              continue;
3564    
# Line 4875  for (;; ptr++) Line 4875  for (;; ptr++)
4875              if (*code++ == OP_THEN)              if (*code++ == OP_THEN)
4876                {                {
4877                PUT(code, 0, code - bcptr->current_branch - 1);                PUT(code, 0, code - bcptr->current_branch - 1);
4878                code += LINK_SIZE;                code += LINK_SIZE;
4879                }                }
4880              }              }
4881    
4882            else            else
# Line 4890  for (;; ptr++) Line 4890  for (;; ptr++)
4890              if (*code++ == OP_THEN_ARG)              if (*code++ == OP_THEN_ARG)
4891                {                {
4892                PUT(code, 0, code - bcptr->current_branch - 1);                PUT(code, 0, code - bcptr->current_branch - 1);
4893                code += LINK_SIZE;                code += LINK_SIZE;
4894                }                }
4895              *code++ = arglen;              *code++ = arglen;
4896              memcpy(code, arg, arglen);              memcpy(code, arg, arglen);
4897              code += arglen;              code += arglen;
# Line 5395  for (;; ptr++) Line 5395  for (;; ptr++)
5395    
5396          if (lengthptr != NULL)          if (lengthptr != NULL)
5397            {            {
5398            const uschar *temp;            const uschar *temp;
5399    
5400            if (namelen == 0)            if (namelen == 0)
5401              {              {
5402              *errorcodeptr = ERR62;              *errorcodeptr = ERR62;
# Line 5412  for (;; ptr++) Line 5412  for (;; ptr++)
5412              *errorcodeptr = ERR48;              *errorcodeptr = ERR48;
5413              goto FAILED;              goto FAILED;
5414              }              }
5415    
5416            /* The name table does not exist in the first pass, so we cannot            /* The name table does not exist in the first pass, so we cannot
5417            do a simple search as in the code below. Instead, we have to scan the            do a simple search as in the code below. Instead, we have to scan the
5418            pattern to find the number. It is important that we scan it only as            pattern to find the number. It is important that we scan it only as
5419            far as we have got because the syntax of named subpatterns has not            far as we have got because the syntax of named subpatterns has not
5420            been checked for the rest of the pattern, and find_parens() assumes            been checked for the rest of the pattern, and find_parens() assumes
5421            correct syntax. In any case, it's a waste of resources to scan            correct syntax. In any case, it's a waste of resources to scan
5422            further. We stop the scan at the current point by temporarily            further. We stop the scan at the current point by temporarily
5423            adjusting the value of cd->endpattern. */            adjusting the value of cd->endpattern. */
5424    
5425            temp = cd->end_pattern;            temp = cd->end_pattern;
5426            cd->end_pattern = ptr;            cd->end_pattern = ptr;
5427            recno = find_parens(cd, name, namelen,            recno = find_parens(cd, name, namelen,
5428              (options & PCRE_EXTENDED) != 0, utf8);              (options & PCRE_EXTENDED) != 0, utf8);
5429            cd->end_pattern = temp;            cd->end_pattern = temp;
5430            if (recno < 0) recno = 0;    /* Forward ref; set dummy number */            if (recno < 0) recno = 0;    /* Forward ref; set dummy number */
5431            }            }
5432    

Legend:
Removed from v.578  
changed lines
  Added in v.579

  ViewVC Help
Powered by ViewVC 1.1.5