/[pcre]/code/branches/pcre16/pcre_compile.c
ViewVC logotype

Diff of /code/branches/pcre16/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 406 by ph10, Mon Mar 23 12:05:43 2009 UTC revision 487 by ph10, Wed Jan 6 10:26:55 2010 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2009 University of Cambridge             Copyright (c) 1997-2010 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 53  supporting internal functions that are n Line 53  supporting internal functions that are n
53  #include "pcre_internal.h"  #include "pcre_internal.h"
54    
55    
56  /* When DEBUG is defined, we need the pcre_printint() function, which is also  /* When PCRE_DEBUG is defined, we need the pcre_printint() function, which is
57  used by pcretest. DEBUG is not defined when building a production library. */  also used by pcretest. PCRE_DEBUG is not defined when building a production
58    library. */
59    
60  #ifdef DEBUG  #ifdef PCRE_DEBUG
61  #include "pcre_printint.src"  #include "pcre_printint.src"
62  #endif  #endif
63    
# Line 341  static const char error_texts[] = Line 342  static const char error_texts[] =
342    "number is too big\0"    "number is too big\0"
343    "subpattern name expected\0"    "subpattern name expected\0"
344    "digit expected after (?+\0"    "digit expected after (?+\0"
345    "] is an invalid data character in JavaScript compatibility mode";    "] is an invalid data character in JavaScript compatibility mode\0"
346      /* 65 */
347      "different names for subpatterns of the same number are not allowed";
348    
349    
350  /* Table to identify digits and hex digits. This is used when compiling  /* Table to identify digits and hex digits. This is used when compiling
# Line 1009  return p; Line 1012  return p;
1012    
1013    
1014  /*************************************************  /*************************************************
1015  *       Find forward referenced subpattern       *  *  Subroutine for finding forward reference      *
1016  *************************************************/  *************************************************/
1017    
1018  /* This function scans along a pattern's text looking for capturing  /* This recursive function is called only from find_parens() below. The
1019    top-level call starts at the beginning of the pattern. All other calls must
1020    start at a parenthesis. It scans along a pattern's text looking for capturing
1021  subpatterns, and counting them. If it finds a named pattern that matches the  subpatterns, and counting them. If it finds a named pattern that matches the
1022  name it is given, it returns its number. Alternatively, if the name is NULL, it  name it is given, it returns its number. Alternatively, if the name is NULL, it
1023  returns when it reaches a given numbered subpattern. This is used for forward  returns when it reaches a given numbered subpattern. We know that if (?P< is
1024  references to subpatterns. We know that if (?P< is encountered, the name will  encountered, the name will be terminated by '>' because that is checked in the
1025  be terminated by '>' because that is checked in the first pass.  first pass. Recursion is used to keep track of subpatterns that reset the
1026    capturing group numbers - the (?| feature.
1027    
1028  Arguments:  Arguments:
1029    ptr          current position in the pattern    ptrptr       address of the current character pointer (updated)
1030    cd           compile background data    cd           compile background data
1031    name         name to seek, or NULL if seeking a numbered subpattern    name         name to seek, or NULL if seeking a numbered subpattern
1032    lorn         name length, or subpattern number if name is NULL    lorn         name length, or subpattern number if name is NULL
1033    xmode        TRUE if we are in /x mode    xmode        TRUE if we are in /x mode
1034      count        pointer to the current capturing subpattern number (updated)
1035    
1036  Returns:       the number of the named subpattern, or -1 if not found  Returns:       the number of the named subpattern, or -1 if not found
1037  */  */
1038    
1039  static int  static int
1040  find_parens(const uschar *ptr, compile_data *cd, const uschar *name, int lorn,  find_parens_sub(uschar **ptrptr, compile_data *cd, const uschar *name, int lorn,
1041    BOOL xmode)    BOOL xmode, int *count)
1042  {  {
1043  const uschar *thisname;  uschar *ptr = *ptrptr;
1044  int count = cd->bracount;  int start_count = *count;
1045    int hwm_count = start_count;
1046    BOOL dup_parens = FALSE;
1047    
1048  for (; *ptr != 0; ptr++)  /* If the first character is a parenthesis, check on the type of group we are
1049    dealing with. The very first call may not start with a parenthesis. */
1050    
1051    if (ptr[0] == CHAR_LEFT_PARENTHESIS)
1052    {    {
1053    int term;    if (ptr[1] == CHAR_QUESTION_MARK &&
1054          ptr[2] == CHAR_VERTICAL_LINE)
1055        {
1056        ptr += 3;
1057        dup_parens = TRUE;
1058        }
1059    
1060      /* Handle a normal, unnamed capturing parenthesis */
1061    
1062      else if (ptr[1] != CHAR_QUESTION_MARK && ptr[1] != CHAR_ASTERISK)
1063        {
1064        *count += 1;
1065        if (name == NULL && *count == lorn) return *count;
1066        ptr++;
1067        }
1068    
1069      /* Handle a condition. If it is an assertion, just carry on so that it
1070      is processed as normal. If not, skip to the closing parenthesis of the
1071      condition (there can't be any nested parens. */
1072    
1073      else if (ptr[2] == CHAR_LEFT_PARENTHESIS)
1074        {
1075        ptr += 2;
1076        if (ptr[1] != CHAR_QUESTION_MARK)
1077          {
1078          while (*ptr != 0 && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
1079          if (*ptr != 0) ptr++;
1080          }
1081        }
1082    
1083      /* We have either (? or (* and not a condition */
1084    
1085      else
1086        {
1087        ptr += 2;
1088        if (*ptr == CHAR_P) ptr++;                      /* Allow optional P */
1089    
1090        /* We have to disambiguate (?<! and (?<= from (?<name> for named groups */
1091    
1092        if ((*ptr == CHAR_LESS_THAN_SIGN && ptr[1] != CHAR_EXCLAMATION_MARK &&
1093            ptr[1] != CHAR_EQUALS_SIGN) || *ptr == CHAR_APOSTROPHE)
1094          {
1095          int term;
1096          const uschar *thisname;
1097          *count += 1;
1098          if (name == NULL && *count == lorn) return *count;
1099          term = *ptr++;
1100          if (term == CHAR_LESS_THAN_SIGN) term = CHAR_GREATER_THAN_SIGN;
1101          thisname = ptr;
1102          while (*ptr != term) ptr++;
1103          if (name != NULL && lorn == ptr - thisname &&
1104              strncmp((const char *)name, (const char *)thisname, lorn) == 0)
1105            return *count;
1106          term++;
1107          }
1108        }
1109      }
1110    
1111    /* Past any initial parenthesis handling, scan for parentheses or vertical
1112    bars. */
1113    
1114    for (; *ptr != 0; ptr++)
1115      {
1116    /* Skip over backslashed characters and also entire \Q...\E */    /* Skip over backslashed characters and also entire \Q...\E */
1117    
1118    if (*ptr == CHAR_BACKSLASH)    if (*ptr == CHAR_BACKSLASH)
1119      {      {
1120      if (*(++ptr) == 0) return -1;      if (*(++ptr) == 0) goto FAIL_EXIT;
1121      if (*ptr == CHAR_Q) for (;;)      if (*ptr == CHAR_Q) for (;;)
1122        {        {
1123        while (*(++ptr) != 0 && *ptr != CHAR_BACKSLASH) {};        while (*(++ptr) != 0 && *ptr != CHAR_BACKSLASH) {};
1124        if (*ptr == 0) return -1;        if (*ptr == 0) goto FAIL_EXIT;
1125        if (*(++ptr) == CHAR_E) break;        if (*(++ptr) == CHAR_E) break;
1126        }        }
1127      continue;      continue;
# Line 1065  for (; *ptr != 0; ptr++) Line 1138  for (; *ptr != 0; ptr++)
1138      BOOL negate_class = FALSE;      BOOL negate_class = FALSE;
1139      for (;;)      for (;;)
1140        {        {
1141        int c = *(++ptr);        if (ptr[1] == CHAR_BACKSLASH)
       if (c == CHAR_BACKSLASH)  
1142          {          {
1143          if (ptr[1] == CHAR_E)          if (ptr[2] == CHAR_E)
1144            ptr++;            ptr+= 2;
1145          else if (strncmp((const char *)ptr+1,          else if (strncmp((const char *)ptr+2,
1146                   STR_Q STR_BACKSLASH STR_E, 3) == 0)                   STR_Q STR_BACKSLASH STR_E, 3) == 0)
1147            ptr += 3;            ptr += 4;
1148          else          else
1149            break;            break;
1150          }          }
1151        else if (!negate_class && c == CHAR_CIRCUMFLEX_ACCENT)        else if (!negate_class && ptr[1] == CHAR_CIRCUMFLEX_ACCENT)
1152            {
1153          negate_class = TRUE;          negate_class = TRUE;
1154            ptr++;
1155            }
1156        else break;        else break;
1157        }        }
1158    
# Line 1093  for (; *ptr != 0; ptr++) Line 1168  for (; *ptr != 0; ptr++)
1168        if (*ptr == 0) return -1;        if (*ptr == 0) return -1;
1169        if (*ptr == CHAR_BACKSLASH)        if (*ptr == CHAR_BACKSLASH)
1170          {          {
1171          if (*(++ptr) == 0) return -1;          if (*(++ptr) == 0) goto FAIL_EXIT;
1172          if (*ptr == CHAR_Q) for (;;)          if (*ptr == CHAR_Q) for (;;)
1173            {            {
1174            while (*(++ptr) != 0 && *ptr != CHAR_BACKSLASH) {};            while (*(++ptr) != 0 && *ptr != CHAR_BACKSLASH) {};
1175            if (*ptr == 0) return -1;            if (*ptr == 0) goto FAIL_EXIT;
1176            if (*(++ptr) == CHAR_E) break;            if (*(++ptr) == CHAR_E) break;
1177            }            }
1178          continue;          continue;
# Line 1111  for (; *ptr != 0; ptr++) Line 1186  for (; *ptr != 0; ptr++)
1186    if (xmode && *ptr == CHAR_NUMBER_SIGN)    if (xmode && *ptr == CHAR_NUMBER_SIGN)
1187      {      {
1188      while (*(++ptr) != 0 && *ptr != CHAR_NL) {};      while (*(++ptr) != 0 && *ptr != CHAR_NL) {};
1189      if (*ptr == 0) return -1;      if (*ptr == 0) goto FAIL_EXIT;
1190      continue;      continue;
1191      }      }
1192    
1193    /* An opening parens must now be a real metacharacter */    /* Check for the special metacharacters */
1194    
1195    if (*ptr != CHAR_LEFT_PARENTHESIS) continue;    if (*ptr == CHAR_LEFT_PARENTHESIS)
   if (ptr[1] != CHAR_QUESTION_MARK && ptr[1] != CHAR_ASTERISK)  
1196      {      {
1197      count++;      int rc = find_parens_sub(&ptr, cd, name, lorn, xmode, count);
1198      if (name == NULL && count == lorn) return count;      if (rc > 0) return rc;
1199      continue;      if (*ptr == 0) goto FAIL_EXIT;
1200      }      }
1201    
1202    ptr += 2;    else if (*ptr == CHAR_RIGHT_PARENTHESIS)
1203    if (*ptr == CHAR_P) ptr++;                      /* Allow optional P */      {
1204        if (dup_parens && *count < hwm_count) *count = hwm_count;
1205        *ptrptr = ptr;
1206        return -1;
1207        }
1208    
1209      else if (*ptr == CHAR_VERTICAL_LINE && dup_parens)
1210        {
1211        if (*count > hwm_count) hwm_count = *count;
1212        *count = start_count;
1213        }
1214      }
1215    
1216    FAIL_EXIT:
1217    *ptrptr = ptr;
1218    return -1;
1219    }
1220    
   /* We have to disambiguate (?<! and (?<= from (?<name> */  
1221    
   if ((*ptr != CHAR_LESS_THAN_SIGN || ptr[1] == CHAR_EXCLAMATION_MARK ||  
       ptr[1] == CHAR_EQUALS_SIGN) && *ptr != CHAR_APOSTROPHE)  
     continue;  
1222    
   count++;  
1223    
1224    if (name == NULL && count == lorn) return count;  /*************************************************
1225    term = *ptr++;  *       Find forward referenced subpattern       *
1226    if (term == CHAR_LESS_THAN_SIGN) term = CHAR_GREATER_THAN_SIGN;  *************************************************/
1227    thisname = ptr;  
1228    while (*ptr != term) ptr++;  /* This function scans along a pattern's text looking for capturing
1229    if (name != NULL && lorn == ptr - thisname &&  subpatterns, and counting them. If it finds a named pattern that matches the
1230        strncmp((const char *)name, (const char *)thisname, lorn) == 0)  name it is given, it returns its number. Alternatively, if the name is NULL, it
1231      return count;  returns when it reaches a given numbered subpattern. This is used for forward
1232    references to subpatterns. We used to be able to start this scan from the
1233    current compiling point, using the current count value from cd->bracount, and
1234    do it all in a single loop, but the addition of the possibility of duplicate
1235    subpattern numbers means that we have to scan from the very start, in order to
1236    take account of such duplicates, and to use a recursive function to keep track
1237    of the different types of group.
1238    
1239    Arguments:
1240      cd           compile background data
1241      name         name to seek, or NULL if seeking a numbered subpattern
1242      lorn         name length, or subpattern number if name is NULL
1243      xmode        TRUE if we are in /x mode
1244    
1245    Returns:       the number of the found subpattern, or -1 if not found
1246    */
1247    
1248    static int
1249    find_parens(compile_data *cd, const uschar *name, int lorn, BOOL xmode)
1250    {
1251    uschar *ptr = (uschar *)cd->start_pattern;
1252    int count = 0;
1253    int rc;
1254    
1255    /* If the pattern does not start with an opening parenthesis, the first call
1256    to find_parens_sub() will scan right to the end (if necessary). However, if it
1257    does start with a parenthesis, find_parens_sub() will return when it hits the
1258    matching closing parens. That is why we have to have a loop. */
1259    
1260    for (;;)
1261      {
1262      rc = find_parens_sub(&ptr, cd, name, lorn, xmode, &count);
1263      if (rc > 0 || *ptr++ == 0) break;
1264    }    }
1265    
1266  return -1;  return rc;
1267  }  }
1268    
1269    
1270    
1271    
1272  /*************************************************  /*************************************************
1273  *      Find first significant op code            *  *      Find first significant op code            *
1274  *************************************************/  *************************************************/
# Line 1200  for (;;) Line 1318  for (;;)
1318    
1319      case OP_CALLOUT:      case OP_CALLOUT:
1320      case OP_CREF:      case OP_CREF:
1321        case OP_NCREF:
1322      case OP_RREF:      case OP_RREF:
1323        case OP_NRREF:
1324      case OP_DEF:      case OP_DEF:
1325      code += _pcre_OP_lengths[*code];      code += _pcre_OP_lengths[*code];
1326      break;      break;
# Line 1216  for (;;) Line 1336  for (;;)
1336    
1337    
1338  /*************************************************  /*************************************************
1339  *        Find the fixed length of a pattern      *  *        Find the fixed length of a branch       *
1340  *************************************************/  *************************************************/
1341    
1342  /* Scan a pattern and compute the fixed length of subject that will match it,  /* Scan a branch and compute the fixed length of subject that will match it,
1343  if the length is fixed. This is needed for dealing with backward assertions.  if the length is fixed. This is needed for dealing with backward assertions.
1344  In UTF8 mode, the result is in characters rather than bytes.  In UTF8 mode, the result is in characters rather than bytes. The branch is
1345    temporarily terminated with OP_END when this function is called.
1346    
1347    This function is called when a backward assertion is encountered, so that if it
1348    fails, the error message can point to the correct place in the pattern.
1349    However, we cannot do this when the assertion contains subroutine calls,
1350    because they can be forward references. We solve this by remembering this case
1351    and doing the check at the end; a flag specifies which mode we are running in.
1352    
1353  Arguments:  Arguments:
1354    code     points to the start of the pattern (the bracket)    code     points to the start of the pattern (the bracket)
1355    options  the compiling options    options  the compiling options
1356      atend    TRUE if called when the pattern is complete
1357      cd       the "compile data" structure
1358    
1359  Returns:   the fixed length, or -1 if there is no fixed length,  Returns:   the fixed length,
1360                 or -1 if there is no fixed length,
1361               or -2 if \C was encountered               or -2 if \C was encountered
1362                 or -3 if an OP_RECURSE item was encountered and atend is FALSE
1363  */  */
1364    
1365  static int  static int
1366  find_fixedlength(uschar *code, int options)  find_fixedlength(uschar *code, int options, BOOL atend, compile_data *cd)
1367  {  {
1368  int length = -1;  int length = -1;
1369    
# Line 1245  branch, check the length against that of Line 1376  branch, check the length against that of
1376  for (;;)  for (;;)
1377    {    {
1378    int d;    int d;
1379      uschar *ce, *cs;
1380    register int op = *cc;    register int op = *cc;
1381    switch (op)    switch (op)
1382      {      {
# Line 1252  for (;;) Line 1384  for (;;)
1384      case OP_BRA:      case OP_BRA:
1385      case OP_ONCE:      case OP_ONCE:
1386      case OP_COND:      case OP_COND:
1387      d = find_fixedlength(cc + ((op == OP_CBRA)? 2:0), options);      d = find_fixedlength(cc + ((op == OP_CBRA)? 2:0), options, atend, cd);
1388      if (d < 0) return d;      if (d < 0) return d;
1389      branchlength += d;      branchlength += d;
1390      do cc += GET(cc, 1); while (*cc == OP_ALT);      do cc += GET(cc, 1); while (*cc == OP_ALT);
# Line 1275  for (;;) Line 1407  for (;;)
1407      branchlength = 0;      branchlength = 0;
1408      break;      break;
1409    
1410        /* A true recursion implies not fixed length, but a subroutine call may
1411        be OK. If the subroutine is a forward reference, we can't deal with
1412        it until the end of the pattern, so return -3. */
1413    
1414        case OP_RECURSE:
1415        if (!atend) return -3;
1416        cs = ce = (uschar *)cd->start_code + GET(cc, 1);  /* Start subpattern */
1417        do ce += GET(ce, 1); while (*ce == OP_ALT);       /* End subpattern */
1418        if (cc > cs && cc < ce) return -1;                /* Recursion */
1419        d = find_fixedlength(cs + 2, options, atend, cd);
1420        if (d < 0) return d;
1421        branchlength += d;
1422        cc += 1 + LINK_SIZE;
1423        break;
1424    
1425      /* Skip over assertive subpatterns */      /* Skip over assertive subpatterns */
1426    
1427      case OP_ASSERT:      case OP_ASSERT:
# Line 1288  for (;;) Line 1435  for (;;)
1435    
1436      case OP_REVERSE:      case OP_REVERSE:
1437      case OP_CREF:      case OP_CREF:
1438        case OP_NCREF:
1439      case OP_RREF:      case OP_RREF:
1440        case OP_NRREF:
1441      case OP_DEF:      case OP_DEF:
1442      case OP_OPT:      case OP_OPT:
1443      case OP_CALLOUT:      case OP_CALLOUT:
# Line 1311  for (;;) Line 1460  for (;;)
1460      branchlength++;      branchlength++;
1461      cc += 2;      cc += 2;
1462  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1463      if ((options & PCRE_UTF8) != 0)      if ((options & PCRE_UTF8) != 0 && cc[-1] >= 0xc0)
1464        {        cc += _pcre_utf8_table4[cc[-1] & 0x3f];
       while ((*cc & 0xc0) == 0x80) cc++;  
       }  
1465  #endif  #endif
1466      break;      break;
1467    
# Line 1325  for (;;) Line 1472  for (;;)
1472      branchlength += GET2(cc,1);      branchlength += GET2(cc,1);
1473      cc += 4;      cc += 4;
1474  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1475      if ((options & PCRE_UTF8) != 0)      if ((options & PCRE_UTF8) != 0 && cc[-1] >= 0xc0)
1476        {        cc += _pcre_utf8_table4[cc[-1] & 0x3f];
       while((*cc & 0x80) == 0x80) cc++;  
       }  
1477  #endif  #endif
1478      break;      break;
1479    
# Line 1407  for (;;) Line 1552  for (;;)
1552    
1553    
1554  /*************************************************  /*************************************************
1555  *    Scan compiled regex for numbered bracket    *  *    Scan compiled regex for specific bracket    *
1556  *************************************************/  *************************************************/
1557    
1558  /* This little function scans through a compiled pattern until it finds a  /* This little function scans through a compiled pattern until it finds a
1559  capturing bracket with the given number.  capturing bracket with the given number, or, if the number is negative, an
1560    instance of OP_REVERSE for a lookbehind. The function is global in the C sense
1561    so that it can be called from pcre_study() when finding the minimum matching
1562    length.
1563    
1564  Arguments:  Arguments:
1565    code        points to start of expression    code        points to start of expression
1566    utf8        TRUE in UTF-8 mode    utf8        TRUE in UTF-8 mode
1567    number      the required bracket number    number      the required bracket number or negative to find a lookbehind
1568    
1569  Returns:      pointer to the opcode for the bracket, or NULL if not found  Returns:      pointer to the opcode for the bracket, or NULL if not found
1570  */  */
1571    
1572  static const uschar *  const uschar *
1573  find_bracket(const uschar *code, BOOL utf8, int number)  _pcre_find_bracket(const uschar *code, BOOL utf8, int number)
1574  {  {
1575  for (;;)  for (;;)
1576    {    {
# Line 1435  for (;;) Line 1583  for (;;)
1583    
1584    if (c == OP_XCLASS) code += GET(code, 1);    if (c == OP_XCLASS) code += GET(code, 1);
1585    
1586      /* Handle recursion */
1587    
1588      else if (c == OP_REVERSE)
1589        {
1590        if (number < 0) return (uschar *)code;
1591        code += _pcre_OP_lengths[c];
1592        }
1593    
1594    /* Handle capturing bracket */    /* Handle capturing bracket */
1595    
1596    else if (c == OP_CBRA)    else if (c == OP_CBRA)
# Line 1800  for (code = first_significant_code(code Line 1956  for (code = first_significant_code(code
1956      case OP_QUERY:      case OP_QUERY:
1957      case OP_MINQUERY:      case OP_MINQUERY:
1958      case OP_POSQUERY:      case OP_POSQUERY:
1959        if (utf8 && code[1] >= 0xc0) code += _pcre_utf8_table4[code[1] & 0x3f];
1960        break;
1961    
1962      case OP_UPTO:      case OP_UPTO:
1963      case OP_MINUPTO:      case OP_MINUPTO:
1964      case OP_POSUPTO:      case OP_POSUPTO:
1965      if (utf8) while ((code[2] & 0xc0) == 0x80) code++;      if (utf8 && code[3] >= 0xc0) code += _pcre_utf8_table4[code[3] & 0x3f];
1966      break;      break;
1967  #endif  #endif
1968      }      }
# Line 1836  static BOOL Line 1995  static BOOL
1995  could_be_empty(const uschar *code, const uschar *endcode, branch_chain *bcptr,  could_be_empty(const uschar *code, const uschar *endcode, branch_chain *bcptr,
1996    BOOL utf8)    BOOL utf8)
1997  {  {
1998  while (bcptr != NULL && bcptr->current >= code)  while (bcptr != NULL && bcptr->current_branch >= code)
1999    {    {
2000    if (!could_be_empty_branch(bcptr->current, endcode, utf8)) return FALSE;    if (!could_be_empty_branch(bcptr->current_branch, endcode, utf8))
2001        return FALSE;
2002    bcptr = bcptr->outer;    bcptr = bcptr->outer;
2003    }    }
2004  return TRUE;  return TRUE;
# Line 2500  BOOL utf8 = FALSE; Line 2660  BOOL utf8 = FALSE;
2660  uschar *utf8_char = NULL;  uschar *utf8_char = NULL;
2661  #endif  #endif
2662    
2663  #ifdef DEBUG  #ifdef PCRE_DEBUG
2664  if (lengthptr != NULL) DPRINTF((">> start branch\n"));  if (lengthptr != NULL) DPRINTF((">> start branch\n"));
2665  #endif  #endif
2666    
# Line 2559  for (;; ptr++) Line 2719  for (;; ptr++)
2719    
2720    if (lengthptr != NULL)    if (lengthptr != NULL)
2721      {      {
2722  #ifdef DEBUG  #ifdef PCRE_DEBUG
2723      if (code > cd->hwm) cd->hwm = code;                 /* High water info */      if (code > cd->hwm) cd->hwm = code;                 /* High water info */
2724  #endif  #endif
2725      if (code > cd->start_workspace + COMPILE_WORK_SIZE) /* Check for overrun */      if (code > cd->start_workspace + COMPILE_WORK_SIZE) /* Check for overrun */
# Line 3757  we set the flag only if there is a liter Line 3917  we set the flag only if there is a liter
3917    
3918        if (repeat_max == 0) goto END_REPEAT;        if (repeat_max == 0) goto END_REPEAT;
3919    
3920          /*--------------------------------------------------------------------*/
3921          /* This code is obsolete from release 8.00; the restriction was finally
3922          removed: */
3923    
3924        /* All real repeats make it impossible to handle partial matching (maybe        /* All real repeats make it impossible to handle partial matching (maybe
3925        one day we will be able to remove this restriction). */        one day we will be able to remove this restriction). */
3926    
3927        if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL;        /* if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL; */
3928          /*--------------------------------------------------------------------*/
3929    
3930        /* Combine the op_type with the repeat_type */        /* Combine the op_type with the repeat_type */
3931    
# Line 3907  we set the flag only if there is a liter Line 4072  we set the flag only if there is a liter
4072          goto END_REPEAT;          goto END_REPEAT;
4073          }          }
4074    
4075          /*--------------------------------------------------------------------*/
4076          /* This code is obsolete from release 8.00; the restriction was finally
4077          removed: */
4078    
4079        /* All real repeats make it impossible to handle partial matching (maybe        /* All real repeats make it impossible to handle partial matching (maybe
4080        one day we will be able to remove this restriction). */        one day we will be able to remove this restriction). */
4081    
4082        if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL;        /* if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL; */
4083          /*--------------------------------------------------------------------*/
4084    
4085        if (repeat_min == 0 && repeat_max == -1)        if (repeat_min == 0 && repeat_max == -1)
4086          *code++ = OP_CRSTAR + repeat_type;          *code++ = OP_CRSTAR + repeat_type;
# Line 4045  we set the flag only if there is a liter Line 4215  we set the flag only if there is a liter
4215            {            {
4216            /* In the pre-compile phase, we don't actually do the replication. We            /* In the pre-compile phase, we don't actually do the replication. We
4217            just adjust the length as if we had. Do some paranoid checks for            just adjust the length as if we had. Do some paranoid checks for
4218            potential integer overflow. */            potential integer overflow. The INT64_OR_DOUBLE type is a 64-bit
4219              integer type when available, otherwise double. */
4220    
4221            if (lengthptr != NULL)            if (lengthptr != NULL)
4222              {              {
4223              int delta = (repeat_min - 1)*length_prevgroup;              int delta = (repeat_min - 1)*length_prevgroup;
4224              if ((double)(repeat_min - 1)*(double)length_prevgroup >              if ((INT64_OR_DOUBLE)(repeat_min - 1)*
4225                                                              (double)INT_MAX ||                    (INT64_OR_DOUBLE)length_prevgroup >
4226                        (INT64_OR_DOUBLE)INT_MAX ||
4227                  OFLOW_MAX - *lengthptr < delta)                  OFLOW_MAX - *lengthptr < delta)
4228                {                {
4229                *errorcodeptr = ERR20;                *errorcodeptr = ERR20;
# Line 4097  we set the flag only if there is a liter Line 4269  we set the flag only if there is a liter
4269          just adjust the length as if we had. For each repetition we must add 1          just adjust the length as if we had. For each repetition we must add 1
4270          to the length for BRAZERO and for all but the last repetition we must          to the length for BRAZERO and for all but the last repetition we must
4271          add 2 + 2*LINKSIZE to allow for the nesting that occurs. Do some          add 2 + 2*LINKSIZE to allow for the nesting that occurs. Do some
4272          paranoid checks to avoid integer overflow. */          paranoid checks to avoid integer overflow. The INT64_OR_DOUBLE type is
4273            a 64-bit integer type when available, otherwise double. */
4274    
4275          if (lengthptr != NULL && repeat_max > 0)          if (lengthptr != NULL && repeat_max > 0)
4276            {            {
4277            int delta = repeat_max * (length_prevgroup + 1 + 2 + 2*LINK_SIZE) -            int delta = repeat_max * (length_prevgroup + 1 + 2 + 2*LINK_SIZE) -
4278                        2 - 2*LINK_SIZE;   /* Last one doesn't nest */                        2 - 2*LINK_SIZE;   /* Last one doesn't nest */
4279            if ((double)repeat_max *            if ((INT64_OR_DOUBLE)repeat_max *
4280                  (double)(length_prevgroup + 1 + 2 + 2*LINK_SIZE)                  (INT64_OR_DOUBLE)(length_prevgroup + 1 + 2 + 2*LINK_SIZE)
4281                    > (double)INT_MAX ||                    > (INT64_OR_DOUBLE)INT_MAX ||
4282                OFLOW_MAX - *lengthptr < delta)                OFLOW_MAX - *lengthptr < delta)
4283              {              {
4284              *errorcodeptr = ERR20;              *errorcodeptr = ERR20;
# Line 4225  we set the flag only if there is a liter Line 4398  we set the flag only if there is a liter
4398      if (possessive_quantifier)      if (possessive_quantifier)
4399        {        {
4400        int len;        int len;
4401        if (*tempcode == OP_EXACT || *tempcode == OP_TYPEEXACT ||  
4402            *tempcode == OP_NOTEXACT)        if (*tempcode == OP_TYPEEXACT)
4403          tempcode += _pcre_OP_lengths[*tempcode] +          tempcode += _pcre_OP_lengths[*tempcode] +
4404            ((*tempcode == OP_TYPEEXACT &&            ((tempcode[3] == OP_PROP || tempcode[3] == OP_NOTPROP)? 2 : 0);
4405               (tempcode[3] == OP_PROP || tempcode[3] == OP_NOTPROP))? 2:0);  
4406          else if (*tempcode == OP_EXACT || *tempcode == OP_NOTEXACT)
4407            {
4408            tempcode += _pcre_OP_lengths[*tempcode];
4409    #ifdef SUPPORT_UTF8
4410            if (utf8 && tempcode[-1] >= 0xc0)
4411              tempcode += _pcre_utf8_table4[tempcode[-1] & 0x3f];
4412    #endif
4413            }
4414    
4415        len = code - tempcode;        len = code - tempcode;
4416        if (len > 0) switch (*tempcode)        if (len > 0) switch (*tempcode)
4417          {          {
# Line 4307  we set the flag only if there is a liter Line 4489  we set the flag only if there is a liter
4489          if (namelen == verbs[i].len &&          if (namelen == verbs[i].len &&
4490              strncmp((char *)name, vn, namelen) == 0)              strncmp((char *)name, vn, namelen) == 0)
4491            {            {
4492            *code = verbs[i].op;            /* Check for open captures before ACCEPT */
4493            if (*code++ == OP_ACCEPT) cd->had_accept = TRUE;  
4494              if (verbs[i].op == OP_ACCEPT)
4495                {
4496                open_capitem *oc;
4497                cd->had_accept = TRUE;
4498                for (oc = cd->open_caps; oc != NULL; oc = oc->next)
4499                  {
4500                  *code++ = OP_CLOSE;
4501                  PUT2INC(code, 0, oc->number);
4502                  }
4503                }
4504              *code++ = verbs[i].op;
4505            break;            break;
4506            }            }
4507          vn += verbs[i].len + 1;          vn += verbs[i].len + 1;
# Line 4470  we set the flag only if there is a liter Line 4663  we set the flag only if there is a liter
4663            }            }
4664    
4665          /* Otherwise (did not start with "+" or "-"), start by looking for the          /* Otherwise (did not start with "+" or "-"), start by looking for the
4666          name. */          name. If we find a name, add one to the opcode to change OP_CREF or
4667            OP_RREF into OP_NCREF or OP_NRREF. These behave exactly the same,
4668            except they record that the reference was originally to a name. The
4669            information is used to check duplicate names. */
4670    
4671          slot = cd->name_table;          slot = cd->name_table;
4672          for (i = 0; i < cd->names_found; i++)          for (i = 0; i < cd->names_found; i++)
# Line 4485  we set the flag only if there is a liter Line 4681  we set the flag only if there is a liter
4681            {            {
4682            recno = GET2(slot, 0);            recno = GET2(slot, 0);
4683            PUT2(code, 2+LINK_SIZE, recno);            PUT2(code, 2+LINK_SIZE, recno);
4684              code[1+LINK_SIZE]++;
4685            }            }
4686    
4687          /* Search the pattern for a forward reference */          /* Search the pattern for a forward reference */
4688    
4689          else if ((i = find_parens(ptr, cd, name, namelen,          else if ((i = find_parens(cd, name, namelen,
4690                          (options & PCRE_EXTENDED) != 0)) > 0)                          (options & PCRE_EXTENDED) != 0)) > 0)
4691            {            {
4692            PUT2(code, 2+LINK_SIZE, i);            PUT2(code, 2+LINK_SIZE, i);
4693              code[1+LINK_SIZE]++;
4694            }            }
4695    
4696          /* If terminator == 0 it means that the name followed directly after          /* If terminator == 0 it means that the name followed directly after
# Line 4685  we set the flag only if there is a liter Line 4883  we set the flag only if there is a liter
4883                }                }
4884              }              }
4885    
4886            /* In the real compile, create the entry in the table */            /* In the real compile, create the entry in the table, maintaining
4887              alphabetical order. Duplicate names for different numbers are
4888              permitted only if PCRE_DUPNAMES is set. Duplicate names for the same
4889              number are always OK. (An existing number can be re-used if (?|
4890              appears in the pattern.) In either event, a duplicate name results in
4891              a duplicate entry in the table, even if the number is the same. This
4892              is because the number of names, and hence the table size, is computed
4893              in the pre-compile, and it affects various numbers and pointers which
4894              would all have to be modified, and the compiled code moved down, if
4895              duplicates with the same number were omitted from the table. This
4896              doesn't seem worth the hassle. However, *different* names for the
4897              same number are not permitted. */
4898    
4899            else            else
4900              {              {
4901                BOOL dupname = FALSE;
4902              slot = cd->name_table;              slot = cd->name_table;
4903    
4904              for (i = 0; i < cd->names_found; i++)              for (i = 0; i < cd->names_found; i++)
4905                {                {
4906                int crc = memcmp(name, slot+2, namelen);                int crc = memcmp(name, slot+2, namelen);
# Line 4697  we set the flag only if there is a liter Line 4908  we set the flag only if there is a liter
4908                  {                  {
4909                  if (slot[2+namelen] == 0)                  if (slot[2+namelen] == 0)
4910                    {                    {
4911                    if ((options & PCRE_DUPNAMES) == 0)                    if (GET2(slot, 0) != cd->bracount + 1 &&
4912                          (options & PCRE_DUPNAMES) == 0)
4913                      {                      {
4914                      *errorcodeptr = ERR43;                      *errorcodeptr = ERR43;
4915                      goto FAILED;                      goto FAILED;
4916                      }                      }
4917                      else dupname = TRUE;
4918                    }                    }
4919                  else crc = -1;      /* Current name is substring */                  else crc = -1;      /* Current name is a substring */
4920                  }                  }
4921    
4922                  /* Make space in the table and break the loop for an earlier
4923                  name. For a duplicate or later name, carry on. We do this for
4924                  duplicates so that in the simple case (when ?(| is not used) they
4925                  are in order of their numbers. */
4926    
4927                if (crc < 0)                if (crc < 0)
4928                  {                  {
4929                  memmove(slot + cd->name_entry_size, slot,                  memmove(slot + cd->name_entry_size, slot,
4930                    (cd->names_found - i) * cd->name_entry_size);                    (cd->names_found - i) * cd->name_entry_size);
4931                  break;                  break;
4932                  }                  }
4933    
4934                  /* Continue the loop for a later or duplicate name */
4935    
4936                slot += cd->name_entry_size;                slot += cd->name_entry_size;
4937                }                }
4938    
4939                /* For non-duplicate names, check for a duplicate number before
4940                adding the new name. */
4941    
4942                if (!dupname)
4943                  {
4944                  uschar *cslot = cd->name_table;
4945                  for (i = 0; i < cd->names_found; i++)
4946                    {
4947                    if (cslot != slot)
4948                      {
4949                      if (GET2(cslot, 0) == cd->bracount + 1)
4950                        {
4951                        *errorcodeptr = ERR65;
4952                        goto FAILED;
4953                        }
4954                      }
4955                    else i--;
4956                    cslot += cd->name_entry_size;
4957                    }
4958                  }
4959    
4960              PUT2(slot, 0, cd->bracount + 1);              PUT2(slot, 0, cd->bracount + 1);
4961              memcpy(slot + 2, name, namelen);              memcpy(slot + 2, name, namelen);
4962              slot[2+namelen] = 0;              slot[2+namelen] = 0;
4963              }              }
4964            }            }
4965    
4966          /* In both cases, count the number of names we've encountered. */          /* In both pre-compile and compile, count the number of names we've
4967            encountered. */
4968    
         ptr++;                    /* Move past > or ' */  
4969          cd->names_found++;          cd->names_found++;
4970            ptr++;                    /* Move past > or ' */
4971          goto NUMBERED_GROUP;          goto NUMBERED_GROUP;
4972    
4973    
# Line 4788  we set the flag only if there is a liter Line 5032  we set the flag only if there is a liter
5032              recno = GET2(slot, 0);              recno = GET2(slot, 0);
5033              }              }
5034            else if ((recno =                /* Forward back reference */            else if ((recno =                /* Forward back reference */
5035                      find_parens(ptr, cd, name, namelen,                      find_parens(cd, name, namelen,
5036                        (options & PCRE_EXTENDED) != 0)) <= 0)                        (options & PCRE_EXTENDED) != 0)) <= 0)
5037              {              {
5038              *errorcodeptr = ERR15;              *errorcodeptr = ERR15;
# Line 4892  we set the flag only if there is a liter Line 5136  we set the flag only if there is a liter
5136            if (lengthptr == NULL)            if (lengthptr == NULL)
5137              {              {
5138              *code = OP_END;              *code = OP_END;
5139              if (recno != 0) called = find_bracket(cd->start_code, utf8, recno);              if (recno != 0)
5140                  called = _pcre_find_bracket(cd->start_code, utf8, recno);
5141    
5142              /* Forward reference */              /* Forward reference */
5143    
5144              if (called == NULL)              if (called == NULL)
5145                {                {
5146                if (find_parens(ptr, cd, NULL, recno,                if (find_parens(cd, NULL, recno,
5147                      (options & PCRE_EXTENDED) != 0) < 0)                      (options & PCRE_EXTENDED) != 0) < 0)
5148                  {                  {
5149                  *errorcodeptr = ERR15;                  *errorcodeptr = ERR15;
# Line 5008  we set the flag only if there is a liter Line 5253  we set the flag only if there is a liter
5253              {              {
5254              cd->external_options = newoptions;              cd->external_options = newoptions;
5255              }              }
5256           else            else
5257              {              {
5258              if ((options & PCRE_IMS) != (newoptions & PCRE_IMS))              if ((options & PCRE_IMS) != (newoptions & PCRE_IMS))
5259                {                {
# Line 5536  uschar *code = *codeptr; Line 5781  uschar *code = *codeptr;
5781  uschar *last_branch = code;  uschar *last_branch = code;
5782  uschar *start_bracket = code;  uschar *start_bracket = code;
5783  uschar *reverse_count = NULL;  uschar *reverse_count = NULL;
5784    open_capitem capitem;
5785    int capnumber = 0;
5786  int firstbyte, reqbyte;  int firstbyte, reqbyte;
5787  int branchfirstbyte, branchreqbyte;  int branchfirstbyte, branchreqbyte;
5788  int length;  int length;
5789  int orig_bracount;  int orig_bracount;
5790  int max_bracount;  int max_bracount;
5791    int old_external_options = cd->external_options;
5792  branch_chain bc;  branch_chain bc;
5793    
5794  bc.outer = bcptr;  bc.outer = bcptr;
5795  bc.current = code;  bc.current_branch = code;
5796    
5797  firstbyte = reqbyte = REQ_UNSET;  firstbyte = reqbyte = REQ_UNSET;
5798    
# Line 5562  the code that abstracts option settings Line 5810  the code that abstracts option settings
5810  them global. It tests the value of length for (2 + 2*LINK_SIZE) in the  them global. It tests the value of length for (2 + 2*LINK_SIZE) in the
5811  pre-compile phase to find out whether anything has yet been compiled or not. */  pre-compile phase to find out whether anything has yet been compiled or not. */
5812    
5813    /* If this is a capturing subpattern, add to the chain of open capturing items
5814    so that we can detect them if (*ACCEPT) is encountered. */
5815    
5816    if (*code == OP_CBRA)
5817      {
5818      capnumber = GET2(code, 1 + LINK_SIZE);
5819      capitem.number = capnumber;
5820      capitem.next = cd->open_caps;
5821      cd->open_caps = &capitem;
5822      }
5823    
5824  /* Offset is set zero to mark that this bracket is still open */  /* Offset is set zero to mark that this bracket is still open */
5825    
5826  PUT(code, 1, 0);  PUT(code, 1, 0);
# Line 5606  for (;;) Line 5865  for (;;)
5865      return FALSE;      return FALSE;
5866      }      }
5867    
5868      /* If the external options have changed during this branch, it means that we
5869      are at the top level, and a leading option setting has been encountered. We
5870      need to re-set the original option values to take account of this so that,
5871      during the pre-compile phase, we know to allow for a re-set at the start of
5872      subsequent branches. */
5873    
5874      if (old_external_options != cd->external_options)
5875        oldims = cd->external_options & PCRE_IMS;
5876    
5877    /* Keep the highest bracket count in case (?| was used and some branch    /* Keep the highest bracket count in case (?| was used and some branch
5878    has fewer than the rest. */    has fewer than the rest. */
5879    
# Line 5656  for (;;) Line 5924  for (;;)
5924    
5925      /* If lookbehind, check that this branch matches a fixed-length string, and      /* If lookbehind, check that this branch matches a fixed-length string, and
5926      put the length into the OP_REVERSE item. Temporarily mark the end of the      put the length into the OP_REVERSE item. Temporarily mark the end of the
5927      branch with OP_END. */      branch with OP_END. If the branch contains OP_RECURSE, the result is -3
5928        because there may be forward references that we can't check here. Set a
5929        flag to cause another lookbehind check at the end. Why not do it all at the
5930        end? Because common, erroneous checks are picked up here and the offset of
5931        the problem can be shown. */
5932    
5933      if (lookbehind)      if (lookbehind)
5934        {        {
5935        int fixed_length;        int fixed_length;
5936        *code = OP_END;        *code = OP_END;
5937        fixed_length = find_fixedlength(last_branch, options);        fixed_length = find_fixedlength(last_branch, options, FALSE, cd);
5938        DPRINTF(("fixed length = %d\n", fixed_length));        DPRINTF(("fixed length = %d\n", fixed_length));
5939        if (fixed_length < 0)        if (fixed_length == -3)
5940            {
5941            cd->check_lookbehind = TRUE;
5942            }
5943          else if (fixed_length < 0)
5944          {          {
5945          *errorcodeptr = (fixed_length == -2)? ERR36 : ERR25;          *errorcodeptr = (fixed_length == -2)? ERR36 : ERR25;
5946          *ptrptr = ptr;          *ptrptr = ptr;
5947          return FALSE;          return FALSE;
5948          }          }
5949        PUT(reverse_count, 0, fixed_length);        else { PUT(reverse_count, 0, fixed_length); }
5950        }        }
5951      }      }
5952    
# Line 5698  for (;;) Line 5974  for (;;)
5974        while (branch_length > 0);        while (branch_length > 0);
5975        }        }
5976    
5977        /* If it was a capturing subpattern, remove it from the chain. */
5978    
5979        if (capnumber > 0) cd->open_caps = cd->open_caps->next;
5980    
5981      /* Fill in the ket */      /* Fill in the ket */
5982    
5983      *code = OP_KET;      *code = OP_KET;
5984      PUT(code, 1, code - start_bracket);      PUT(code, 1, code - start_bracket);
5985      code += 1 + LINK_SIZE;      code += 1 + LINK_SIZE;
5986    
5987      /* Resetting option if needed */      /* Reset options if needed. */
5988    
5989      if ((options & PCRE_IMS) != oldims && *ptr == CHAR_RIGHT_PARENTHESIS)      if ((options & PCRE_IMS) != oldims && *ptr == CHAR_RIGHT_PARENTHESIS)
5990        {        {
# Line 5753  for (;;) Line 6033  for (;;)
6033      {      {
6034      *code = OP_ALT;      *code = OP_ALT;
6035      PUT(code, 1, code - last_branch);      PUT(code, 1, code - last_branch);
6036      bc.current = last_branch = code;      bc.current_branch = last_branch = code;
6037      code += 1 + LINK_SIZE;      code += 1 + LINK_SIZE;
6038      }      }
6039    
# Line 5900  do { Line 6180  do {
6180       switch (*scode)       switch (*scode)
6181         {         {
6182         case OP_CREF:         case OP_CREF:
6183           case OP_NCREF:
6184         case OP_RREF:         case OP_RREF:
6185           case OP_NRREF:
6186         case OP_DEF:         case OP_DEF:
6187         return FALSE;         return FALSE;
6188    
# Line 6069  int length = 1;  /* For final END opcode Line 6351  int length = 1;  /* For final END opcode
6351  int firstbyte, reqbyte, newline;  int firstbyte, reqbyte, newline;
6352  int errorcode = 0;  int errorcode = 0;
6353  int skipatstart = 0;  int skipatstart = 0;
6354  #ifdef SUPPORT_UTF8  BOOL utf8 = (options & PCRE_UTF8) != 0;
 BOOL utf8;  
 #endif  
6355  size_t size;  size_t size;
6356  uschar *code;  uschar *code;
6357  const uschar *codestart;  const uschar *codestart;
# Line 6114  if (erroroffset == NULL) Line 6394  if (erroroffset == NULL)
6394    
6395  *erroroffset = 0;  *erroroffset = 0;
6396    
 /* Can't support UTF8 unless PCRE has been compiled to include the code. */  
   
 #ifdef SUPPORT_UTF8  
 utf8 = (options & PCRE_UTF8) != 0;  
 if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 &&  
      (*erroroffset = _pcre_valid_utf8((uschar *)pattern, -1)) >= 0)  
   {  
   errorcode = ERR44;  
   goto PCRE_EARLY_ERROR_RETURN2;  
   }  
 #else  
 if ((options & PCRE_UTF8) != 0)  
   {  
   errorcode = ERR32;  
   goto PCRE_EARLY_ERROR_RETURN;  
   }  
 #endif  
   
 if ((options & ~PUBLIC_COMPILE_OPTIONS) != 0)  
   {  
   errorcode = ERR17;  
   goto PCRE_EARLY_ERROR_RETURN;  
   }  
   
6397  /* Set up pointers to the individual character tables */  /* Set up pointers to the individual character tables */
6398    
6399  if (tables == NULL) tables = _pcre_default_tables;  if (tables == NULL) tables = _pcre_default_tables;
# Line 6146  cd->fcc = tables + fcc_offset; Line 6402  cd->fcc = tables + fcc_offset;
6402  cd->cbits = tables + cbits_offset;  cd->cbits = tables + cbits_offset;
6403  cd->ctypes = tables + ctypes_offset;  cd->ctypes = tables + ctypes_offset;
6404    
6405    /* Check that all undefined public option bits are zero */
6406    
6407    if ((options & ~PUBLIC_COMPILE_OPTIONS) != 0)
6408      {
6409      errorcode = ERR17;
6410      goto PCRE_EARLY_ERROR_RETURN;
6411      }
6412    
6413  /* Check for global one-time settings at the start of the pattern, and remember  /* Check for global one-time settings at the start of the pattern, and remember
6414  the offset for later. */  the offset for later. */
6415    
# Line 6155  while (ptr[skipatstart] == CHAR_LEFT_PAR Line 6419  while (ptr[skipatstart] == CHAR_LEFT_PAR
6419    int newnl = 0;    int newnl = 0;
6420    int newbsr = 0;    int newbsr = 0;
6421    
6422      if (strncmp((char *)(ptr+skipatstart+2), STRING_UTF8_RIGHTPAR, 5) == 0)
6423        { skipatstart += 7; options |= PCRE_UTF8; continue; }
6424    
6425    if (strncmp((char *)(ptr+skipatstart+2), STRING_CR_RIGHTPAR, 3) == 0)    if (strncmp((char *)(ptr+skipatstart+2), STRING_CR_RIGHTPAR, 3) == 0)
6426      { skipatstart += 5; newnl = PCRE_NEWLINE_CR; }      { skipatstart += 5; newnl = PCRE_NEWLINE_CR; }
6427    else if (strncmp((char *)(ptr+skipatstart+2), STRING_LF_RIGHTPAR, 3)  == 0)    else if (strncmp((char *)(ptr+skipatstart+2), STRING_LF_RIGHTPAR, 3)  == 0)
# Line 6178  while (ptr[skipatstart] == CHAR_LEFT_PAR Line 6445  while (ptr[skipatstart] == CHAR_LEFT_PAR
6445    else break;    else break;
6446    }    }
6447    
6448    /* Can't support UTF8 unless PCRE has been compiled to include the code. */
6449    
6450    #ifdef SUPPORT_UTF8
6451    if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 &&
6452         (*erroroffset = _pcre_valid_utf8((USPTR)pattern, -1)) >= 0)
6453      {
6454      errorcode = ERR44;
6455      goto PCRE_EARLY_ERROR_RETURN2;
6456      }
6457    #else
6458    if (utf8)
6459      {
6460      errorcode = ERR32;
6461      goto PCRE_EARLY_ERROR_RETURN;
6462      }
6463    #endif
6464    
6465  /* Check validity of \R options. */  /* Check validity of \R options. */
6466    
6467  switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))  switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
# Line 6260  cd->end_pattern = (const uschar *)(patte Line 6544  cd->end_pattern = (const uschar *)(patte
6544  cd->req_varyopt = 0;  cd->req_varyopt = 0;
6545  cd->external_options = options;  cd->external_options = options;
6546  cd->external_flags = 0;  cd->external_flags = 0;
6547    cd->open_caps = NULL;
6548    
6549  /* Now do the pre-compile. On error, errorcode will be set non-zero, so we  /* Now do the pre-compile. On error, errorcode will be set non-zero, so we
6550  don't need to look at the result of the function here. The initial options have  don't need to look at the result of the function here. The initial options have
# Line 6334  cd->start_code = codestart; Line 6619  cd->start_code = codestart;
6619  cd->hwm = cworkspace;  cd->hwm = cworkspace;
6620  cd->req_varyopt = 0;  cd->req_varyopt = 0;
6621  cd->had_accept = FALSE;  cd->had_accept = FALSE;
6622    cd->check_lookbehind = FALSE;
6623    cd->open_caps = NULL;
6624    
6625  /* Set up a starting, non-extracting bracket, then compile the expression. On  /* Set up a starting, non-extracting bracket, then compile the expression. On
6626  error, errorcode will be set non-zero, so we don't need to look at the result  error, errorcode will be set non-zero, so we don't need to look at the result
# Line 6359  if debugging, leave the test till after Line 6646  if debugging, leave the test till after
6646    
6647  *code++ = OP_END;  *code++ = OP_END;
6648    
6649  #ifndef DEBUG  #ifndef PCRE_DEBUG
6650  if (code - codestart > length) errorcode = ERR23;  if (code - codestart > length) errorcode = ERR23;
6651  #endif  #endif
6652    
# Line 6372  while (errorcode == 0 && cd->hwm > cwork Line 6659  while (errorcode == 0 && cd->hwm > cwork
6659    cd->hwm -= LINK_SIZE;    cd->hwm -= LINK_SIZE;
6660    offset = GET(cd->hwm, 0);    offset = GET(cd->hwm, 0);
6661    recno = GET(codestart, offset);    recno = GET(codestart, offset);
6662    groupptr = find_bracket(codestart, (re->options & PCRE_UTF8) != 0, recno);    groupptr = _pcre_find_bracket(codestart, utf8, recno);
6663    if (groupptr == NULL) errorcode = ERR53;    if (groupptr == NULL) errorcode = ERR53;
6664      else PUT(((uschar *)codestart), offset, groupptr - codestart);      else PUT(((uschar *)codestart), offset, groupptr - codestart);
6665    }    }
# Line 6382  subpattern. */ Line 6669  subpattern. */
6669    
6670  if (errorcode == 0 && re->top_backref > re->top_bracket) errorcode = ERR15;  if (errorcode == 0 && re->top_backref > re->top_bracket) errorcode = ERR15;
6671    
6672    /* If there were any lookbehind assertions that contained OP_RECURSE
6673    (recursions or subroutine calls), a flag is set for them to be checked here,
6674    because they may contain forward references. Actual recursions can't be fixed
6675    length, but subroutine calls can. It is done like this so that those without
6676    OP_RECURSE that are not fixed length get a diagnosic with a useful offset. The
6677    exceptional ones forgo this. We scan the pattern to check that they are fixed
6678    length, and set their lengths. */
6679    
6680    if (cd->check_lookbehind)
6681      {
6682      uschar *cc = (uschar *)codestart;
6683    
6684      /* Loop, searching for OP_REVERSE items, and process those that do not have
6685      their length set. (Actually, it will also re-process any that have a length
6686      of zero, but that is a pathological case, and it does no harm.) When we find
6687      one, we temporarily terminate the branch it is in while we scan it. */
6688    
6689      for (cc = (uschar *)_pcre_find_bracket(codestart, utf8, -1);
6690           cc != NULL;
6691           cc = (uschar *)_pcre_find_bracket(cc, utf8, -1))
6692        {
6693        if (GET(cc, 1) == 0)
6694          {
6695          int fixed_length;
6696          uschar *be = cc - 1 - LINK_SIZE + GET(cc, -LINK_SIZE);
6697          int end_op = *be;
6698          *be = OP_END;
6699          fixed_length = find_fixedlength(cc, re->options, TRUE, cd);
6700          *be = end_op;
6701          DPRINTF(("fixed length = %d\n", fixed_length));
6702          if (fixed_length < 0)
6703            {
6704            errorcode = (fixed_length == -2)? ERR36 : ERR25;
6705            break;
6706            }
6707          PUT(cc, 1, fixed_length);
6708          }
6709        cc += 1 + LINK_SIZE;
6710        }
6711      }
6712    
6713  /* Failed to compile, or error while post-processing */  /* Failed to compile, or error while post-processing */
6714    
6715  if (errorcode != 0)  if (errorcode != 0)
# Line 6442  if (reqbyte >= 0 && Line 6770  if (reqbyte >= 0 &&
6770  /* Print out the compiled data if debugging is enabled. This is never the  /* Print out the compiled data if debugging is enabled. This is never the
6771  case when building a production library. */  case when building a production library. */
6772    
6773  #ifdef DEBUG  #ifdef PCRE_DEBUG
6774    
6775  printf("Length = %d top_bracket = %d top_backref = %d\n",  printf("Length = %d top_bracket = %d top_backref = %d\n",
6776    length, re->top_bracket, re->top_backref);    length, re->top_bracket, re->top_backref);
# Line 6480  if (code - codestart > length) Line 6808  if (code - codestart > length)
6808    if (errorcodeptr != NULL) *errorcodeptr = ERR23;    if (errorcodeptr != NULL) *errorcodeptr = ERR23;
6809    return NULL;    return NULL;
6810    }    }
6811  #endif   /* DEBUG */  #endif   /* PCRE_DEBUG */
6812    
6813  return (pcre *)re;  return (pcre *)re;
6814  }  }

Legend:
Removed from v.406  
changed lines
  Added in v.487

  ViewVC Help
Powered by ViewVC 1.1.5