/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 408 by ph10, Fri Mar 27 17:52:03 2009 UTC revision 412 by ph10, Sat Apr 11 10:34:37 2009 UTC
# Line 1019  subpatterns, and counting them. If it fi Line 1019  subpatterns, and counting them. If it fi
1019  name it is given, it returns its number. Alternatively, if the name is NULL, it  name it is given, it returns its number. Alternatively, if the name is NULL, it
1020  returns when it reaches a given numbered subpattern. We know that if (?P< is  returns when it reaches a given numbered subpattern. We know that if (?P< is
1021  encountered, the name will be terminated by '>' because that is checked in the  encountered, the name will be terminated by '>' because that is checked in the
1022  first pass. Recursion is used to keep track of subpatterns that reset the  first pass. Recursion is used to keep track of subpatterns that reset the
1023  capturing group numbers - the (?| feature.  capturing group numbers - the (?| feature.
1024    
1025  Arguments:  Arguments:
# Line 1028  Arguments: Line 1028  Arguments:
1028    name         name to seek, or NULL if seeking a numbered subpattern    name         name to seek, or NULL if seeking a numbered subpattern
1029    lorn         name length, or subpattern number if name is NULL    lorn         name length, or subpattern number if name is NULL
1030    xmode        TRUE if we are in /x mode    xmode        TRUE if we are in /x mode
1031    count        pointer to the current capturing subpattern number (updated)    count        pointer to the current capturing subpattern number (updated)
1032    
1033  Returns:       the number of the named subpattern, or -1 if not found  Returns:       the number of the named subpattern, or -1 if not found
1034  */  */
# Line 1042  int start_count = *count; Line 1042  int start_count = *count;
1042  int hwm_count = start_count;  int hwm_count = start_count;
1043  BOOL dup_parens = FALSE;  BOOL dup_parens = FALSE;
1044    
1045  /* If the first character is a parenthesis, check on the type of group we are  /* If the first character is a parenthesis, check on the type of group we are
1046  dealing with. The very first call may not start with a parenthesis. */  dealing with. The very first call may not start with a parenthesis. */
1047    
1048  if (ptr[0] == CHAR_LEFT_PARENTHESIS)  if (ptr[0] == CHAR_LEFT_PARENTHESIS)
1049    {    {
1050    if (ptr[1] == CHAR_QUESTION_MARK &&    if (ptr[1] == CHAR_QUESTION_MARK &&
1051        ptr[2] == CHAR_VERTICAL_LINE)        ptr[2] == CHAR_VERTICAL_LINE)
1052      {      {
1053      ptr += 3;      ptr += 3;
1054      dup_parens = TRUE;      dup_parens = TRUE;
1055      }      }
1056    
1057    /* Handle a normal, unnamed capturing parenthesis */    /* Handle a normal, unnamed capturing parenthesis */
1058    
1059    else if (ptr[1] != CHAR_QUESTION_MARK && ptr[1] != CHAR_ASTERISK)    else if (ptr[1] != CHAR_QUESTION_MARK && ptr[1] != CHAR_ASTERISK)
1060      {      {
1061      *count += 1;      *count += 1;
1062      if (name == NULL && *count == lorn) return *count;      if (name == NULL && *count == lorn) return *count;
1063      ptr++;      ptr++;
1064      }      }
1065    
1066    /* Handle a condition. If it is an assertion, just carry on so that it    /* Handle a condition. If it is an assertion, just carry on so that it
1067    is processed as normal. If not, skip to the closing parenthesis of the    is processed as normal. If not, skip to the closing parenthesis of the
1068    condition (there can't be any nested parens. */    condition (there can't be any nested parens. */
1069    
1070    else if (ptr[2] == CHAR_LEFT_PARENTHESIS)    else if (ptr[2] == CHAR_LEFT_PARENTHESIS)
1071      {      {
1072      ptr += 2;      ptr += 2;
1073      if (ptr[1] != CHAR_QUESTION_MARK)      if (ptr[1] != CHAR_QUESTION_MARK)
1074        {        {
1075        while (*ptr != 0 && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;        while (*ptr != 0 && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
1076        if (*ptr != 0) ptr++;        if (*ptr != 0) ptr++;
1077        }        }
1078      }      }
1079    
1080    /* We have either (? or (* and not a condition */    /* We have either (? or (* and not a condition */
1081    
1082    else    else
1083      {      {
1084      ptr += 2;      ptr += 2;
1085      if (*ptr == CHAR_P) ptr++;                      /* Allow optional P */      if (*ptr == CHAR_P) ptr++;                      /* Allow optional P */
1086    
1087      /* We have to disambiguate (?<! and (?<= from (?<name> for named groups */      /* We have to disambiguate (?<! and (?<= from (?<name> for named groups */
1088    
1089      if ((*ptr == CHAR_LESS_THAN_SIGN && ptr[1] != CHAR_EXCLAMATION_MARK &&      if ((*ptr == CHAR_LESS_THAN_SIGN && ptr[1] != CHAR_EXCLAMATION_MARK &&
1090          ptr[1] != CHAR_EQUALS_SIGN) || *ptr == CHAR_APOSTROPHE)          ptr[1] != CHAR_EQUALS_SIGN) || *ptr == CHAR_APOSTROPHE)
1091        {        {
# Line 1100  if (ptr[0] == CHAR_LEFT_PARENTHESIS) Line 1100  if (ptr[0] == CHAR_LEFT_PARENTHESIS)
1100        if (name != NULL && lorn == ptr - thisname &&        if (name != NULL && lorn == ptr - thisname &&
1101            strncmp((const char *)name, (const char *)thisname, lorn) == 0)            strncmp((const char *)name, (const char *)thisname, lorn) == 0)
1102          return *count;          return *count;
1103        }        }
1104      }      }
1105    }    }
1106    
1107  /* Past any initial parenthesis handling, scan for parentheses or vertical  /* Past any initial parenthesis handling, scan for parentheses or vertical
1108  bars. */  bars. */
1109    
1110  for (; *ptr != 0; ptr++)  for (; *ptr != 0; ptr++)
# Line 1185  for (; *ptr != 0; ptr++) Line 1185  for (; *ptr != 0; ptr++)
1185      }      }
1186    
1187    /* Check for the special metacharacters */    /* Check for the special metacharacters */
1188    
1189    if (*ptr == CHAR_LEFT_PARENTHESIS)    if (*ptr == CHAR_LEFT_PARENTHESIS)
1190      {      {
1191      int rc = find_parens_sub(&ptr, cd, name, lorn, xmode, count);      int rc = find_parens_sub(&ptr, cd, name, lorn, xmode, count);
1192      if (rc > 0) return rc;      if (rc > 0) return rc;
1193      if (*ptr == 0) goto FAIL_EXIT;      if (*ptr == 0) goto FAIL_EXIT;
1194      }      }
1195    
1196    else if (*ptr == CHAR_RIGHT_PARENTHESIS)    else if (*ptr == CHAR_RIGHT_PARENTHESIS)
1197      {      {
1198      if (dup_parens && *count < hwm_count) *count = hwm_count;      if (dup_parens && *count < hwm_count) *count = hwm_count;
1199      *ptrptr = ptr;      *ptrptr = ptr;
1200      return -1;      return -1;
1201      }      }
1202    
1203    else if (*ptr == CHAR_VERTICAL_LINE && dup_parens)    else if (*ptr == CHAR_VERTICAL_LINE && dup_parens)
1204      {      {
1205      if (*count > hwm_count) hwm_count = *count;      if (*count > hwm_count) hwm_count = *count;
1206      *count = start_count;      *count = start_count;
1207      }      }
1208    }    }
1209    
1210  FAIL_EXIT:  FAIL_EXIT:
# Line 1251  to find_parens_sub() will scan right to Line 1251  to find_parens_sub() will scan right to
1251  does start with a parenthesis, find_parens_sub() will return when it hits the  does start with a parenthesis, find_parens_sub() will return when it hits the
1252  matching closing parens. That is why we have to have a loop. */  matching closing parens. That is why we have to have a loop. */
1253    
1254  for (;;)  for (;;)
1255    {    {
1256    rc = find_parens_sub(&ptr, cd, name, lorn, xmode, &count);    rc = find_parens_sub(&ptr, cd, name, lorn, xmode, &count);
1257    if (rc > 0 || *ptr++ == 0) break;    if (rc > 0 || *ptr++ == 0) break;
1258    }    }
1259    
1260  return rc;  return rc;
1261  }  }
1262    
# Line 6226  if (erroroffset == NULL) Line 6226  if (erroroffset == NULL)
6226    
6227  *erroroffset = 0;  *erroroffset = 0;
6228    
 /* Can't support UTF8 unless PCRE has been compiled to include the code. */  
   
 #ifdef SUPPORT_UTF8  
 utf8 = (options & PCRE_UTF8) != 0;  
 if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 &&  
      (*erroroffset = _pcre_valid_utf8((uschar *)pattern, -1)) >= 0)  
   {  
   errorcode = ERR44;  
   goto PCRE_EARLY_ERROR_RETURN2;  
   }  
 #else  
 if ((options & PCRE_UTF8) != 0)  
   {  
   errorcode = ERR32;  
   goto PCRE_EARLY_ERROR_RETURN;  
   }  
 #endif  
   
 if ((options & ~PUBLIC_COMPILE_OPTIONS) != 0)  
   {  
   errorcode = ERR17;  
   goto PCRE_EARLY_ERROR_RETURN;  
   }  
   
6229  /* Set up pointers to the individual character tables */  /* Set up pointers to the individual character tables */
6230    
6231  if (tables == NULL) tables = _pcre_default_tables;  if (tables == NULL) tables = _pcre_default_tables;
# Line 6258  cd->fcc = tables + fcc_offset; Line 6234  cd->fcc = tables + fcc_offset;
6234  cd->cbits = tables + cbits_offset;  cd->cbits = tables + cbits_offset;
6235  cd->ctypes = tables + ctypes_offset;  cd->ctypes = tables + ctypes_offset;
6236    
6237    /* Check that all undefined public option bits are zero */
6238    
6239    if ((options & ~PUBLIC_COMPILE_OPTIONS) != 0)
6240      {
6241      errorcode = ERR17;
6242      goto PCRE_EARLY_ERROR_RETURN;
6243      }
6244    
6245  /* Check for global one-time settings at the start of the pattern, and remember  /* Check for global one-time settings at the start of the pattern, and remember
6246  the offset for later. */  the offset for later. */
6247    
# Line 6267  while (ptr[skipatstart] == CHAR_LEFT_PAR Line 6251  while (ptr[skipatstart] == CHAR_LEFT_PAR
6251    int newnl = 0;    int newnl = 0;
6252    int newbsr = 0;    int newbsr = 0;
6253    
6254      if (strncmp((char *)(ptr+skipatstart+2), STRING_UTF8_RIGHTPAR, 5) == 0)
6255        { skipatstart += 7; options |= PCRE_UTF8; continue; }
6256    
6257    if (strncmp((char *)(ptr+skipatstart+2), STRING_CR_RIGHTPAR, 3) == 0)    if (strncmp((char *)(ptr+skipatstart+2), STRING_CR_RIGHTPAR, 3) == 0)
6258      { skipatstart += 5; newnl = PCRE_NEWLINE_CR; }      { skipatstart += 5; newnl = PCRE_NEWLINE_CR; }
6259    else if (strncmp((char *)(ptr+skipatstart+2), STRING_LF_RIGHTPAR, 3)  == 0)    else if (strncmp((char *)(ptr+skipatstart+2), STRING_LF_RIGHTPAR, 3)  == 0)
# Line 6290  while (ptr[skipatstart] == CHAR_LEFT_PAR Line 6277  while (ptr[skipatstart] == CHAR_LEFT_PAR
6277    else break;    else break;
6278    }    }
6279    
6280    /* Can't support UTF8 unless PCRE has been compiled to include the code. */
6281    
6282    #ifdef SUPPORT_UTF8
6283    utf8 = (options & PCRE_UTF8) != 0;
6284    if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 &&
6285         (*erroroffset = _pcre_valid_utf8((uschar *)pattern, -1)) >= 0)
6286      {
6287      errorcode = ERR44;
6288      goto PCRE_EARLY_ERROR_RETURN2;
6289      }
6290    #else
6291    if ((options & PCRE_UTF8) != 0)
6292      {
6293      errorcode = ERR32;
6294      goto PCRE_EARLY_ERROR_RETURN;
6295      }
6296    #endif
6297    
6298  /* Check validity of \R options. */  /* Check validity of \R options. */
6299    
6300  switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))  switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))

Legend:
Removed from v.408  
changed lines
  Added in v.412

  ViewVC Help
Powered by ViewVC 1.1.5