/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 391 by ph10, Tue Mar 17 21:16:01 2009 UTC revision 408 by ph10, Fri Mar 27 17:52:03 2009 UTC
# Line 100  is invalid. */ Line 100  is invalid. */
100  #ifndef EBCDIC  #ifndef EBCDIC
101    
102  /* This is the "normal" table for ASCII systems or for EBCDIC systems running  /* This is the "normal" table for ASCII systems or for EBCDIC systems running
103  in UTF-8 mode. */  in UTF-8 mode. */
104    
105  static const short int escapes[] = {  static const short int escapes[] = {
106       0,                       0,       0,                       0,
107         0,                       0,
108         0,                       0,
109       0,                       0,       0,                       0,
      0,                       0,  
110       0,                       0,       0,                       0,
      0,                       0,  
111       CHAR_COLON,              CHAR_SEMICOLON,       CHAR_COLON,              CHAR_SEMICOLON,
112       CHAR_LESS_THAN_SIGN,     CHAR_EQUALS_SIGN,       CHAR_LESS_THAN_SIGN,     CHAR_EQUALS_SIGN,
113       CHAR_GREATER_THAN_SIGN,  CHAR_QUESTION_MARK,       CHAR_GREATER_THAN_SIGN,  CHAR_QUESTION_MARK,
114       CHAR_COMMERCIAL_AT,      -ESC_A,       CHAR_COMMERCIAL_AT,      -ESC_A,
115       -ESC_B,                  -ESC_C,       -ESC_B,                  -ESC_C,
116       -ESC_D,                  -ESC_E,       -ESC_D,                  -ESC_E,
117       0,                       -ESC_G,       0,                       -ESC_G,
118       -ESC_H,                  0,       -ESC_H,                  0,
119       0,                       -ESC_K,       0,                       -ESC_K,
120       0,                       0,       0,                       0,
121       0,                       0,       0,                       0,
122       -ESC_P,                  -ESC_Q,       -ESC_P,                  -ESC_Q,
123       -ESC_R,                  -ESC_S,       -ESC_R,                  -ESC_S,
124       0,                       0,       0,                       0,
125       -ESC_V,                  -ESC_W,       -ESC_V,                  -ESC_W,
126       -ESC_X,                  0,       -ESC_X,                  0,
127       -ESC_Z,                  CHAR_LEFT_SQUARE_BRACKET,       -ESC_Z,                  CHAR_LEFT_SQUARE_BRACKET,
128       CHAR_BACKSLASH,          CHAR_RIGHT_SQUARE_BRACKET,       CHAR_BACKSLASH,          CHAR_RIGHT_SQUARE_BRACKET,
129       CHAR_CIRCUMFLEX_ACCENT,  CHAR_UNDERSCORE,       CHAR_CIRCUMFLEX_ACCENT,  CHAR_UNDERSCORE,
130       CHAR_GRAVE_ACCENT,       7,       CHAR_GRAVE_ACCENT,       7,
131       -ESC_b,                  0,       -ESC_b,                  0,
132       -ESC_d,                  ESC_e,       -ESC_d,                  ESC_e,
133       ESC_f,                   0,       ESC_f,                   0,
134       -ESC_h,                  0,       -ESC_h,                  0,
135       0,                       -ESC_k,       0,                       -ESC_k,
136       0,                       0,       0,                       0,
137       ESC_n,                   0,       ESC_n,                   0,
138       -ESC_p,                  0,       -ESC_p,                  0,
139       ESC_r,                   -ESC_s,       ESC_r,                   -ESC_s,
140       ESC_tee,                 0,       ESC_tee,                 0,
141       -ESC_v,                  -ESC_w,       -ESC_v,                  -ESC_w,
142       0,                       0,       0,                       0,
143       -ESC_z       -ESC_z
144  };  };
145    
146  #else  #else
147    
148  /* This is the "abnormal" table for EBCDIC systems without UTF-8 support. */  /* This is the "abnormal" table for EBCDIC systems without UTF-8 support. */
149    
# Line 177  static const short int escapes[] = { Line 177  static const short int escapes[] = {
177    
178  /* Table of special "verbs" like (*PRUNE). This is a short table, so it is  /* Table of special "verbs" like (*PRUNE). This is a short table, so it is
179  searched linearly. Put all the names into a single string, in order to reduce  searched linearly. Put all the names into a single string, in order to reduce
180  the number of relocations when a shared library is dynamically linked. The  the number of relocations when a shared library is dynamically linked. The
181  string is built from string macros so that it works in UTF-8 mode on EBCDIC  string is built from string macros so that it works in UTF-8 mode on EBCDIC
182  platforms. */  platforms. */
183    
184  typedef struct verbitem {  typedef struct verbitem {
# Line 215  length entry. The first three must be al Line 215  length entry. The first three must be al
215  for handling case independence. */  for handling case independence. */
216    
217  static const char posix_names[] =  static const char posix_names[] =
218    STRING_alpha0 STRING_lower0 STRING_upper0 STRING_alnum0    STRING_alpha0 STRING_lower0 STRING_upper0 STRING_alnum0
219    STRING_ascii0 STRING_blank0 STRING_cntrl0 STRING_digit0    STRING_ascii0 STRING_blank0 STRING_cntrl0 STRING_digit0
220    STRING_graph0 STRING_print0 STRING_punct0 STRING_space0    STRING_graph0 STRING_print0 STRING_punct0 STRING_space0
221    STRING_word0  STRING_xdigit;    STRING_word0  STRING_xdigit;
222    
# Line 360  For convenience, we use the same bit def Line 360  For convenience, we use the same bit def
360    
361  Then we can use ctype_digit and ctype_xdigit in the code. */  Then we can use ctype_digit and ctype_xdigit in the code. */
362    
363  #ifndef EBCDIC  #ifndef EBCDIC
364    
365  /* This is the "normal" case, for ASCII systems, and EBCDIC systems running in  /* This is the "normal" case, for ASCII systems, and EBCDIC systems running in
366  UTF-8 mode. */  UTF-8 mode. */
367    
368  static const unsigned char digitab[] =  static const unsigned char digitab[] =
# Line 400  static const unsigned char digitab[] = Line 400  static const unsigned char digitab[] =
400    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
401    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
402    
403  #else  #else
404    
405  /* This is the "abnormal" case, for EBCDIC systems not running in UTF-8 mode. */  /* This is the "abnormal" case, for EBCDIC systems not running in UTF-8 mode. */
406    
# Line 1009  return p; Line 1009  return p;
1009    
1010    
1011  /*************************************************  /*************************************************
1012  *       Find forward referenced subpattern       *  *  Subroutine for finding forward reference      *
1013  *************************************************/  *************************************************/
1014    
1015  /* This function scans along a pattern's text looking for capturing  /* This recursive function is called only from find_parens() below. The
1016    top-level call starts at the beginning of the pattern. All other calls must
1017    start at a parenthesis. It scans along a pattern's text looking for capturing
1018  subpatterns, and counting them. If it finds a named pattern that matches the  subpatterns, and counting them. If it finds a named pattern that matches the
1019  name it is given, it returns its number. Alternatively, if the name is NULL, it  name it is given, it returns its number. Alternatively, if the name is NULL, it
1020  returns when it reaches a given numbered subpattern. This is used for forward  returns when it reaches a given numbered subpattern. We know that if (?P< is
1021  references to subpatterns. We know that if (?P< is encountered, the name will  encountered, the name will be terminated by '>' because that is checked in the
1022  be terminated by '>' because that is checked in the first pass.  first pass. Recursion is used to keep track of subpatterns that reset the
1023    capturing group numbers - the (?| feature.
1024    
1025  Arguments:  Arguments:
1026    ptr          current position in the pattern    ptrptr       address of the current character pointer (updated)
1027    cd           compile background data    cd           compile background data
1028    name         name to seek, or NULL if seeking a numbered subpattern    name         name to seek, or NULL if seeking a numbered subpattern
1029    lorn         name length, or subpattern number if name is NULL    lorn         name length, or subpattern number if name is NULL
1030    xmode        TRUE if we are in /x mode    xmode        TRUE if we are in /x mode
1031      count        pointer to the current capturing subpattern number (updated)
1032    
1033  Returns:       the number of the named subpattern, or -1 if not found  Returns:       the number of the named subpattern, or -1 if not found
1034  */  */
1035    
1036  static int  static int
1037  find_parens(const uschar *ptr, compile_data *cd, const uschar *name, int lorn,  find_parens_sub(uschar **ptrptr, compile_data *cd, const uschar *name, int lorn,
1038    BOOL xmode)    BOOL xmode, int *count)
1039  {  {
1040  const uschar *thisname;  uschar *ptr = *ptrptr;
1041  int count = cd->bracount;  int start_count = *count;
1042    int hwm_count = start_count;
1043    BOOL dup_parens = FALSE;
1044    
1045  for (; *ptr != 0; ptr++)  /* If the first character is a parenthesis, check on the type of group we are
1046    dealing with. The very first call may not start with a parenthesis. */
1047    
1048    if (ptr[0] == CHAR_LEFT_PARENTHESIS)
1049    {    {
1050    int term;    if (ptr[1] == CHAR_QUESTION_MARK &&
1051          ptr[2] == CHAR_VERTICAL_LINE)
1052        {
1053        ptr += 3;
1054        dup_parens = TRUE;
1055        }
1056    
1057      /* Handle a normal, unnamed capturing parenthesis */
1058    
1059      else if (ptr[1] != CHAR_QUESTION_MARK && ptr[1] != CHAR_ASTERISK)
1060        {
1061        *count += 1;
1062        if (name == NULL && *count == lorn) return *count;
1063        ptr++;
1064        }
1065    
1066      /* Handle a condition. If it is an assertion, just carry on so that it
1067      is processed as normal. If not, skip to the closing parenthesis of the
1068      condition (there can't be any nested parens. */
1069    
1070      else if (ptr[2] == CHAR_LEFT_PARENTHESIS)
1071        {
1072        ptr += 2;
1073        if (ptr[1] != CHAR_QUESTION_MARK)
1074          {
1075          while (*ptr != 0 && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
1076          if (*ptr != 0) ptr++;
1077          }
1078        }
1079    
1080      /* We have either (? or (* and not a condition */
1081    
1082      else
1083        {
1084        ptr += 2;
1085        if (*ptr == CHAR_P) ptr++;                      /* Allow optional P */
1086    
1087        /* We have to disambiguate (?<! and (?<= from (?<name> for named groups */
1088    
1089        if ((*ptr == CHAR_LESS_THAN_SIGN && ptr[1] != CHAR_EXCLAMATION_MARK &&
1090            ptr[1] != CHAR_EQUALS_SIGN) || *ptr == CHAR_APOSTROPHE)
1091          {
1092          int term;
1093          const uschar *thisname;
1094          *count += 1;
1095          if (name == NULL && *count == lorn) return *count;
1096          term = *ptr++;
1097          if (term == CHAR_LESS_THAN_SIGN) term = CHAR_GREATER_THAN_SIGN;
1098          thisname = ptr;
1099          while (*ptr != term) ptr++;
1100          if (name != NULL && lorn == ptr - thisname &&
1101              strncmp((const char *)name, (const char *)thisname, lorn) == 0)
1102            return *count;
1103          }
1104        }
1105      }
1106    
1107    /* Past any initial parenthesis handling, scan for parentheses or vertical
1108    bars. */
1109    
1110    for (; *ptr != 0; ptr++)
1111      {
1112    /* Skip over backslashed characters and also entire \Q...\E */    /* Skip over backslashed characters and also entire \Q...\E */
1113    
1114    if (*ptr == CHAR_BACKSLASH)    if (*ptr == CHAR_BACKSLASH)
1115      {      {
1116      if (*(++ptr) == 0) return -1;      if (*(++ptr) == 0) goto FAIL_EXIT;
1117      if (*ptr == CHAR_Q) for (;;)      if (*ptr == CHAR_Q) for (;;)
1118        {        {
1119        while (*(++ptr) != 0 && *ptr != CHAR_BACKSLASH) {};        while (*(++ptr) != 0 && *ptr != CHAR_BACKSLASH) {};
1120        if (*ptr == 0) return -1;        if (*ptr == 0) goto FAIL_EXIT;
1121        if (*(++ptr) == CHAR_E) break;        if (*(++ptr) == CHAR_E) break;
1122        }        }
1123      continue;      continue;
# Line 1057  for (; *ptr != 0; ptr++) Line 1126  for (; *ptr != 0; ptr++)
1126    /* Skip over character classes; this logic must be similar to the way they    /* Skip over character classes; this logic must be similar to the way they
1127    are handled for real. If the first character is '^', skip it. Also, if the    are handled for real. If the first character is '^', skip it. Also, if the
1128    first few characters (either before or after ^) are \Q\E or \E we skip them    first few characters (either before or after ^) are \Q\E or \E we skip them
1129    too. This makes for compatibility with Perl. Note the use of STR macros to    too. This makes for compatibility with Perl. Note the use of STR macros to
1130    encode "Q\\E" so that it works in UTF-8 on EBCDIC platforms. */    encode "Q\\E" so that it works in UTF-8 on EBCDIC platforms. */
1131    
1132    if (*ptr == CHAR_LEFT_SQUARE_BRACKET)    if (*ptr == CHAR_LEFT_SQUARE_BRACKET)
# Line 1068  for (; *ptr != 0; ptr++) Line 1137  for (; *ptr != 0; ptr++)
1137        int c = *(++ptr);        int c = *(++ptr);
1138        if (c == CHAR_BACKSLASH)        if (c == CHAR_BACKSLASH)
1139          {          {
1140          if (ptr[1] == CHAR_E)          if (ptr[1] == CHAR_E)
1141            ptr++;            ptr++;
1142          else if (strncmp((const char *)ptr+1,          else if (strncmp((const char *)ptr+1,
1143                   STR_Q STR_BACKSLASH STR_E, 3) == 0)                   STR_Q STR_BACKSLASH STR_E, 3) == 0)
1144            ptr += 3;            ptr += 3;
1145          else          else
1146            break;            break;
1147          }          }
1148        else if (!negate_class && c == CHAR_CIRCUMFLEX_ACCENT)        else if (!negate_class && c == CHAR_CIRCUMFLEX_ACCENT)
# Line 1084  for (; *ptr != 0; ptr++) Line 1153  for (; *ptr != 0; ptr++)
1153      /* If the next character is ']', it is a data character that must be      /* If the next character is ']', it is a data character that must be
1154      skipped, except in JavaScript compatibility mode. */      skipped, except in JavaScript compatibility mode. */
1155    
1156      if (ptr[1] == CHAR_RIGHT_SQUARE_BRACKET &&      if (ptr[1] == CHAR_RIGHT_SQUARE_BRACKET &&
1157          (cd->external_options & PCRE_JAVASCRIPT_COMPAT) == 0)          (cd->external_options & PCRE_JAVASCRIPT_COMPAT) == 0)
1158        ptr++;        ptr++;
1159    
# Line 1093  for (; *ptr != 0; ptr++) Line 1162  for (; *ptr != 0; ptr++)
1162        if (*ptr == 0) return -1;        if (*ptr == 0) return -1;
1163        if (*ptr == CHAR_BACKSLASH)        if (*ptr == CHAR_BACKSLASH)
1164          {          {
1165          if (*(++ptr) == 0) return -1;          if (*(++ptr) == 0) goto FAIL_EXIT;
1166          if (*ptr == CHAR_Q) for (;;)          if (*ptr == CHAR_Q) for (;;)
1167            {            {
1168            while (*(++ptr) != 0 && *ptr != CHAR_BACKSLASH) {};            while (*(++ptr) != 0 && *ptr != CHAR_BACKSLASH) {};
1169            if (*ptr == 0) return -1;            if (*ptr == 0) goto FAIL_EXIT;
1170            if (*(++ptr) == CHAR_E) break;            if (*(++ptr) == CHAR_E) break;
1171            }            }
1172          continue;          continue;
# Line 1111  for (; *ptr != 0; ptr++) Line 1180  for (; *ptr != 0; ptr++)
1180    if (xmode && *ptr == CHAR_NUMBER_SIGN)    if (xmode && *ptr == CHAR_NUMBER_SIGN)
1181      {      {
1182      while (*(++ptr) != 0 && *ptr != CHAR_NL) {};      while (*(++ptr) != 0 && *ptr != CHAR_NL) {};
1183      if (*ptr == 0) return -1;      if (*ptr == 0) goto FAIL_EXIT;
1184      continue;      continue;
1185      }      }
1186    
1187    /* An opening parens must now be a real metacharacter */    /* Check for the special metacharacters */
1188    
1189    if (*ptr != CHAR_LEFT_PARENTHESIS) continue;    if (*ptr == CHAR_LEFT_PARENTHESIS)
1190    if (ptr[1] != CHAR_QUESTION_MARK && ptr[1] != CHAR_ASTERISK)      {
1191        int rc = find_parens_sub(&ptr, cd, name, lorn, xmode, count);
1192        if (rc > 0) return rc;
1193        if (*ptr == 0) goto FAIL_EXIT;
1194        }
1195    
1196      else if (*ptr == CHAR_RIGHT_PARENTHESIS)
1197      {      {
1198      count++;      if (dup_parens && *count < hwm_count) *count = hwm_count;
1199      if (name == NULL && count == lorn) return count;      *ptrptr = ptr;
1200      continue;      return -1;
1201      }      }
1202    
1203      else if (*ptr == CHAR_VERTICAL_LINE && dup_parens)
1204        {
1205        if (*count > hwm_count) hwm_count = *count;
1206        *count = start_count;
1207        }
1208      }
1209    
1210    FAIL_EXIT:
1211    *ptrptr = ptr;
1212    return -1;
1213    }
1214    
   ptr += 2;  
   if (*ptr == CHAR_P) ptr++;                      /* Allow optional P */  
1215    
   /* We have to disambiguate (?<! and (?<= from (?<name> */  
1216    
   if ((*ptr != CHAR_LESS_THAN_SIGN || ptr[1] == CHAR_EXCLAMATION_MARK ||  
       ptr[1] == CHAR_EQUALS_SIGN) && *ptr != CHAR_APOSTROPHE)  
     continue;  
1217    
1218    count++;  /*************************************************
1219    *       Find forward referenced subpattern       *
1220    *************************************************/
1221    
1222    if (name == NULL && count == lorn) return count;  /* This function scans along a pattern's text looking for capturing
1223    term = *ptr++;  subpatterns, and counting them. If it finds a named pattern that matches the
1224    if (term == CHAR_LESS_THAN_SIGN) term = CHAR_GREATER_THAN_SIGN;  name it is given, it returns its number. Alternatively, if the name is NULL, it
1225    thisname = ptr;  returns when it reaches a given numbered subpattern. This is used for forward
1226    while (*ptr != term) ptr++;  references to subpatterns. We used to be able to start this scan from the
1227    if (name != NULL && lorn == ptr - thisname &&  current compiling point, using the current count value from cd->bracount, and
1228        strncmp((const char *)name, (const char *)thisname, lorn) == 0)  do it all in a single loop, but the addition of the possibility of duplicate
1229      return count;  subpattern numbers means that we have to scan from the very start, in order to
1230    }  take account of such duplicates, and to use a recursive function to keep track
1231    of the different types of group.
1232    
1233  return -1;  Arguments:
1234      cd           compile background data
1235      name         name to seek, or NULL if seeking a numbered subpattern
1236      lorn         name length, or subpattern number if name is NULL
1237      xmode        TRUE if we are in /x mode
1238    
1239    Returns:       the number of the found subpattern, or -1 if not found
1240    */
1241    
1242    static int
1243    find_parens(compile_data *cd, const uschar *name, int lorn, BOOL xmode)
1244    {
1245    uschar *ptr = (uschar *)cd->start_pattern;
1246    int count = 0;
1247    int rc;
1248    
1249    /* If the pattern does not start with an opening parenthesis, the first call
1250    to find_parens_sub() will scan right to the end (if necessary). However, if it
1251    does start with a parenthesis, find_parens_sub() will return when it hits the
1252    matching closing parens. That is why we have to have a loop. */
1253    
1254    for (;;)
1255      {
1256      rc = find_parens_sub(&ptr, cd, name, lorn, xmode, &count);
1257      if (rc > 0 || *ptr++ == 0) break;
1258      }
1259    
1260    return rc;
1261  }  }
1262    
1263    
1264    
1265    
1266  /*************************************************  /*************************************************
1267  *      Find first significant op code            *  *      Find first significant op code            *
1268  *************************************************/  *************************************************/
# Line 1664  for (code = first_significant_code(code Line 1776  for (code = first_significant_code(code
1776      BOOL empty_branch;      BOOL empty_branch;
1777      if (GET(code, 1) == 0) return TRUE;    /* Hit unclosed bracket */      if (GET(code, 1) == 0) return TRUE;    /* Hit unclosed bracket */
1778    
1779      /* Scan a closed bracket */      /* If a conditional group has only one branch, there is a second, implied,
1780        empty branch, so just skip over the conditional, because it could be empty.
1781        Otherwise, scan the individual branches of the group. */
1782    
1783      empty_branch = FALSE;      if (c == OP_COND && code[GET(code, 1)] != OP_ALT)
     do  
       {  
       if (!empty_branch && could_be_empty_branch(code, endcode, utf8))  
         empty_branch = TRUE;  
1784        code += GET(code, 1);        code += GET(code, 1);
1785        else
1786          {
1787          empty_branch = FALSE;
1788          do
1789            {
1790            if (!empty_branch && could_be_empty_branch(code, endcode, utf8))
1791              empty_branch = TRUE;
1792            code += GET(code, 1);
1793            }
1794          while (*code == OP_ALT);
1795          if (!empty_branch) return FALSE;   /* All branches are non-empty */
1796        }        }
1797      while (*code == OP_ALT);  
     if (!empty_branch) return FALSE;   /* All branches are non-empty */  
1798      c = *code;      c = *code;
1799      continue;      continue;
1800      }      }
# Line 2173  if ((options & PCRE_EXTENDED) != 0) Line 2293  if ((options & PCRE_EXTENDED) != 0)
2293    
2294  /* If the next thing is itself optional, we have to give up. */  /* If the next thing is itself optional, we have to give up. */
2295    
2296  if (*ptr == CHAR_ASTERISK || *ptr == CHAR_QUESTION_MARK ||  if (*ptr == CHAR_ASTERISK || *ptr == CHAR_QUESTION_MARK ||
2297    strncmp((char *)ptr, STR_LEFT_CURLY_BRACKET STR_0 STR_COMMA, 3) == 0)    strncmp((char *)ptr, STR_LEFT_CURLY_BRACKET STR_0 STR_COMMA, 3) == 0)
2298      return FALSE;      return FALSE;
2299    
# Line 2639  for (;; ptr++) Line 2759  for (;; ptr++)
2759    /* Fill in length of a previous callout, except when the next thing is    /* Fill in length of a previous callout, except when the next thing is
2760    a quantifier. */    a quantifier. */
2761    
2762    is_quantifier =    is_quantifier =
2763      c == CHAR_ASTERISK || c == CHAR_PLUS || c == CHAR_QUESTION_MARK ||      c == CHAR_ASTERISK || c == CHAR_PLUS || c == CHAR_QUESTION_MARK ||
2764      (c == CHAR_LEFT_CURLY_BRACKET && is_counted_repeat(ptr+1));      (c == CHAR_LEFT_CURLY_BRACKET && is_counted_repeat(ptr+1));
2765    
# Line 2759  for (;; ptr++) Line 2879  for (;; ptr++)
2879      /* PCRE supports POSIX class stuff inside a class. Perl gives an error if      /* PCRE supports POSIX class stuff inside a class. Perl gives an error if
2880      they are encountered at the top level, so we'll do that too. */      they are encountered at the top level, so we'll do that too. */
2881    
2882      if ((ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT ||      if ((ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT ||
2883           ptr[1] == CHAR_EQUALS_SIGN) &&           ptr[1] == CHAR_EQUALS_SIGN) &&
2884          check_posix_syntax(ptr, &tempptr))          check_posix_syntax(ptr, &tempptr))
2885        {        {
# Line 2777  for (;; ptr++) Line 2897  for (;; ptr++)
2897        c = *(++ptr);        c = *(++ptr);
2898        if (c == CHAR_BACKSLASH)        if (c == CHAR_BACKSLASH)
2899          {          {
2900          if (ptr[1] == CHAR_E)          if (ptr[1] == CHAR_E)
2901            ptr++;            ptr++;
2902          else if (strncmp((const char *)ptr+1,          else if (strncmp((const char *)ptr+1,
2903                            STR_Q STR_BACKSLASH STR_E, 3) == 0)                            STR_Q STR_BACKSLASH STR_E, 3) == 0)
2904            ptr += 3;            ptr += 3;
2905          else          else
2906            break;            break;
2907          }          }
2908        else if (!negate_class && c == CHAR_CIRCUMFLEX_ACCENT)        else if (!negate_class && c == CHAR_CIRCUMFLEX_ACCENT)
# Line 2795  for (;; ptr++) Line 2915  for (;; ptr++)
2915      that. In JS mode, [] must always fail, so generate OP_FAIL, whereas      that. In JS mode, [] must always fail, so generate OP_FAIL, whereas
2916      [^] must match any character, so generate OP_ALLANY. */      [^] must match any character, so generate OP_ALLANY. */
2917    
2918      if (c == CHAR_RIGHT_SQUARE_BRACKET &&      if (c == CHAR_RIGHT_SQUARE_BRACKET &&
2919          (cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0)          (cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0)
2920        {        {
2921        *code++ = negate_class? OP_ALLANY : OP_FAIL;        *code++ = negate_class? OP_ALLANY : OP_FAIL;
# Line 2877  for (;; ptr++) Line 2997  for (;; ptr++)
2997        5.6 and 5.8 do. */        5.6 and 5.8 do. */
2998    
2999        if (c == CHAR_LEFT_SQUARE_BRACKET &&        if (c == CHAR_LEFT_SQUARE_BRACKET &&
3000            (ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT ||            (ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT ||
3001             ptr[1] == CHAR_EQUALS_SIGN) && check_posix_syntax(ptr, &tempptr))             ptr[1] == CHAR_EQUALS_SIGN) && check_posix_syntax(ptr, &tempptr))
3002          {          {
3003          BOOL local_negate = FALSE;          BOOL local_negate = FALSE;
# Line 3227  for (;; ptr++) Line 3347  for (;; ptr++)
3347          while (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_Q)          while (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_Q)
3348            {            {
3349            ptr += 2;            ptr += 2;
3350            if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_E)            if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_E)
3351              { ptr += 2; continue; }              { ptr += 2; continue; }
3352            inescq = TRUE;            inescq = TRUE;
3353            break;            break;
# Line 4427  we set the flag only if there is a liter Line 4547  we set the flag only if there is a liter
4547            }            }
4548          namelen = ptr - name;          namelen = ptr - name;
4549    
4550          if ((terminator > 0 && *ptr++ != terminator) ||          if ((terminator > 0 && *ptr++ != terminator) ||
4551              *ptr++ != CHAR_RIGHT_PARENTHESIS)              *ptr++ != CHAR_RIGHT_PARENTHESIS)
4552            {            {
4553            ptr--;      /* Error offset */            ptr--;      /* Error offset */
# Line 4481  we set the flag only if there is a liter Line 4601  we set the flag only if there is a liter
4601    
4602          /* Search the pattern for a forward reference */          /* Search the pattern for a forward reference */
4603    
4604          else if ((i = find_parens(ptr, cd, name, namelen,          else if ((i = find_parens(cd, name, namelen,
4605                          (options & PCRE_EXTENDED) != 0)) > 0)                          (options & PCRE_EXTENDED) != 0)) > 0)
4606            {            {
4607            PUT2(code, 2+LINK_SIZE, i);            PUT2(code, 2+LINK_SIZE, i);
# Line 4626  we set the flag only if there is a liter Line 4746  we set the flag only if there is a liter
4746    
4747          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4748          case CHAR_P:              /* Python-style named subpattern handling */          case CHAR_P:              /* Python-style named subpattern handling */
4749          if (*(++ptr) == CHAR_EQUALS_SIGN ||          if (*(++ptr) == CHAR_EQUALS_SIGN ||
4750              *ptr == CHAR_GREATER_THAN_SIGN)  /* Reference or recursion */              *ptr == CHAR_GREATER_THAN_SIGN)  /* Reference or recursion */
4751            {            {
4752            is_recurse = *ptr == CHAR_GREATER_THAN_SIGN;            is_recurse = *ptr == CHAR_GREATER_THAN_SIGN;
# Line 4645  we set the flag only if there is a liter Line 4765  we set the flag only if there is a liter
4765          DEFINE_NAME:    /* Come here from (?< handling */          DEFINE_NAME:    /* Come here from (?< handling */
4766          case CHAR_APOSTROPHE:          case CHAR_APOSTROPHE:
4767            {            {
4768            terminator = (*ptr == CHAR_LESS_THAN_SIGN)?            terminator = (*ptr == CHAR_LESS_THAN_SIGN)?
4769              CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;              CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;
4770            name = ++ptr;            name = ++ptr;
4771    
# Line 4780  we set the flag only if there is a liter Line 4900  we set the flag only if there is a liter
4900              recno = GET2(slot, 0);              recno = GET2(slot, 0);
4901              }              }
4902            else if ((recno =                /* Forward back reference */            else if ((recno =                /* Forward back reference */
4903                      find_parens(ptr, cd, name, namelen,                      find_parens(cd, name, namelen,
4904                        (options & PCRE_EXTENDED) != 0)) <= 0)                        (options & PCRE_EXTENDED) != 0)) <= 0)
4905              {              {
4906              *errorcodeptr = ERR15;              *errorcodeptr = ERR15;
# Line 4890  we set the flag only if there is a liter Line 5010  we set the flag only if there is a liter
5010    
5011              if (called == NULL)              if (called == NULL)
5012                {                {
5013                if (find_parens(ptr, cd, NULL, recno,                if (find_parens(cd, NULL, recno,
5014                      (options & PCRE_EXTENDED) != 0) < 0)                      (options & PCRE_EXTENDED) != 0) < 0)
5015                  {                  {
5016                  *errorcodeptr = ERR15;                  *errorcodeptr = ERR15;
# Line 5240  we set the flag only if there is a liter Line 5360  we set the flag only if there is a liter
5360        {        {
5361        if (-c == ESC_Q)            /* Handle start of quoted string */        if (-c == ESC_Q)            /* Handle start of quoted string */
5362          {          {
5363          if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)          if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)
5364            ptr += 2;               /* avoid empty string */            ptr += 2;               /* avoid empty string */
5365              else inescq = TRUE;              else inescq = TRUE;
5366          continue;          continue;
# Line 5270  we set the flag only if there is a liter Line 5390  we set the flag only if there is a liter
5390          {          {
5391          const uschar *p;          const uschar *p;
5392          save_hwm = cd->hwm;   /* Normally this is set when '(' is read */          save_hwm = cd->hwm;   /* Normally this is set when '(' is read */
5393          terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?          terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?
5394            CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;            CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;
5395    
5396          /* These two statements stop the compiler for warning about possibly          /* These two statements stop the compiler for warning about possibly
# Line 5321  we set the flag only if there is a liter Line 5441  we set the flag only if there is a liter
5441        /* \k<name> or \k'name' is a back reference by name (Perl syntax).        /* \k<name> or \k'name' is a back reference by name (Perl syntax).
5442        We also support \k{name} (.NET syntax) */        We also support \k{name} (.NET syntax) */
5443    
5444        if (-c == ESC_k && (ptr[1] == CHAR_LESS_THAN_SIGN ||        if (-c == ESC_k && (ptr[1] == CHAR_LESS_THAN_SIGN ||
5445            ptr[1] == CHAR_APOSTROPHE || ptr[1] == CHAR_LEFT_CURLY_BRACKET))            ptr[1] == CHAR_APOSTROPHE || ptr[1] == CHAR_LEFT_CURLY_BRACKET))
5446          {          {
5447          is_recurse = FALSE;          is_recurse = FALSE;
5448          terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?          terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?
5449            CHAR_GREATER_THAN_SIGN : (*ptr == CHAR_APOSTROPHE)?            CHAR_GREATER_THAN_SIGN : (*ptr == CHAR_APOSTROPHE)?
5450            CHAR_APOSTROPHE : CHAR_RIGHT_CURLY_BRACKET;            CHAR_APOSTROPHE : CHAR_RIGHT_CURLY_BRACKET;
5451          goto NAMED_REF_OR_RECURSE;          goto NAMED_REF_OR_RECURSE;
5452          }          }
# Line 5879  do { Line 5999  do {
5999     const uschar *scode = first_significant_code(code + _pcre_OP_lengths[*code],     const uschar *scode = first_significant_code(code + _pcre_OP_lengths[*code],
6000       NULL, 0, FALSE);       NULL, 0, FALSE);
6001     register int op = *scode;     register int op = *scode;
6002    
6003     /* If we are at the start of a conditional assertion group, *both* the     /* If we are at the start of a conditional assertion group, *both* the
6004     conditional assertion *and* what follows the condition must satisfy the test     conditional assertion *and* what follows the condition must satisfy the test
6005     for start of line. Other kinds of condition fail. Note that there may be an     for start of line. Other kinds of condition fail. Note that there may be an
# Line 5887  do { Line 6007  do {
6007    
6008     if (op == OP_COND)     if (op == OP_COND)
6009       {       {
6010       scode += 1 + LINK_SIZE;       scode += 1 + LINK_SIZE;
6011       if (*scode == OP_CALLOUT) scode += _pcre_OP_lengths[OP_CALLOUT];       if (*scode == OP_CALLOUT) scode += _pcre_OP_lengths[OP_CALLOUT];
6012       switch (*scode)       switch (*scode)
6013         {         {
6014         case OP_CREF:         case OP_CREF:
6015         case OP_RREF:         case OP_RREF:
6016         case OP_DEF:         case OP_DEF:
6017         return FALSE;         return FALSE;
6018    
6019         default:     /* Assertion */         default:     /* Assertion */
6020         if (!is_startline(scode, bracket_map, backref_map)) return FALSE;         if (!is_startline(scode, bracket_map, backref_map)) return FALSE;
6021         do scode += GET(scode, 1); while (*scode == OP_ALT);         do scode += GET(scode, 1); while (*scode == OP_ALT);
6022         scode += 1 + LINK_SIZE;         scode += 1 + LINK_SIZE;
6023         break;         break;
6024         }         }
6025       scode = first_significant_code(scode, NULL, 0, FALSE);       scode = first_significant_code(scode, NULL, 0, FALSE);
6026       op = *scode;       op = *scode;
6027       }       }
6028    
6029     /* Non-capturing brackets */     /* Non-capturing brackets */
6030    
# Line 5925  do { Line 6045  do {
6045     /* Other brackets */     /* Other brackets */
6046    
6047     else if (op == OP_ASSERT || op == OP_ONCE)     else if (op == OP_ASSERT || op == OP_ONCE)
6048       {       {
6049       if (!is_startline(scode, bracket_map, backref_map)) return FALSE;       if (!is_startline(scode, bracket_map, backref_map)) return FALSE;
6050       }       }
6051    
6052     /* .* means "start at start or after \n" if it isn't in brackets that     /* .* means "start at start or after \n" if it isn't in brackets that
# Line 6141  cd->ctypes = tables + ctypes_offset; Line 6261  cd->ctypes = tables + ctypes_offset;
6261  /* Check for global one-time settings at the start of the pattern, and remember  /* Check for global one-time settings at the start of the pattern, and remember
6262  the offset for later. */  the offset for later. */
6263    
6264  while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&  while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&
6265         ptr[skipatstart+1] == CHAR_ASTERISK)         ptr[skipatstart+1] == CHAR_ASTERISK)
6266    {    {
6267    int newnl = 0;    int newnl = 0;

Legend:
Removed from v.391  
changed lines
  Added in v.408

  ViewVC Help
Powered by ViewVC 1.1.5