/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1071 by chpe, Tue Oct 16 15:54:37 2012 UTC revision 1348 by ph10, Fri Jul 5 10:38:37 2013 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2012 University of Cambridge             Copyright (c) 1997-2013 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 80  to check them every time. */ Line 80  to check them every time. */
80  /* Definitions to allow mutual recursion */  /* Definitions to allow mutual recursion */
81    
82  static int  static int
83    add_list_to_class(pcre_uint8 *, pcre_uchar **, int, compile_data *,    add_list_to_class(pcre_uint8 *, pcre_uchar **, int, compile_data *,
84      const pcre_uint32 *, unsigned int);      const pcre_uint32 *, unsigned int);
85    
86  static BOOL  static BOOL
87    compile_regex(int, pcre_uchar **, const pcre_uchar **, int *, BOOL, BOOL,    compile_regex(int, pcre_uchar **, const pcre_uchar **, int *, BOOL, BOOL, int, int,
88      int, int, int *, int *, branch_chain *, compile_data *, int *);      pcre_uint32 *, pcre_int32 *, pcre_uint32 *, pcre_int32 *, branch_chain *,
89        compile_data *, int *);
90    
91    
92    
# Line 121  overrun before it actually does run off Line 122  overrun before it actually does run off
122    
123  /* Private flags added to firstchar and reqchar. */  /* Private flags added to firstchar and reqchar. */
124    
125  #define REQ_CASELESS   0x10000000l      /* Indicates caselessness */  #define REQ_CASELESS    (1 << 0)        /* Indicates caselessness */
126  #define REQ_VARY       0x20000000l      /* Reqchar followed non-literal item */  #define REQ_VARY        (1 << 1)        /* Reqchar followed non-literal item */
127  #define REQ_MASK       (REQ_CASELESS | REQ_VARY)  /* Negative values for the firstchar and reqchar flags */
128    #define REQ_UNSET       (-2)
129    #define REQ_NONE        (-1)
130    
131  /* Repeated character flags. */  /* Repeated character flags. */
132    
# Line 484  static const char error_texts[] = Line 487  static const char error_texts[] =
487    "a numbered reference must not be zero\0"    "a numbered reference must not be zero\0"
488    "an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)\0"    "an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)\0"
489    /* 60 */    /* 60 */
490    "(*VERB) not recognized\0"    "(*VERB) not recognized or malformed\0"
491    "number is too big\0"    "number is too big\0"
492    "subpattern name expected\0"    "subpattern name expected\0"
493    "digit expected after (?+\0"    "digit expected after (?+\0"
# Line 505  static const char error_texts[] = Line 508  static const char error_texts[] =
508    "name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0"    "name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0"
509    "character value in \\u.... sequence is too large\0"    "character value in \\u.... sequence is too large\0"
510    "invalid UTF-32 string\0"    "invalid UTF-32 string\0"
511      "setting UTF is disabled by the application\0"
512    ;    ;
513    
514  /* Table to identify digits and hex digits. This is used when compiling  /* Table to identify digits and hex digits. This is used when compiling
# Line 645  static const pcre_uint8 ebcdic_chartab[] Line 649  static const pcre_uint8 ebcdic_chartab[]
649    
650    
651    
   
652  /*************************************************  /*************************************************
653  *            Find an error text                  *  *            Find an error text                  *
654  *************************************************/  *************************************************/
# Line 665  find_error_text(int n) Line 668  find_error_text(int n)
668  const char *s = error_texts;  const char *s = error_texts;
669  for (; n > 0; n--)  for (; n > 0; n--)
670    {    {
671    while (*s++ != 0) {};    while (*s++ != CHAR_NULL) {};
672    if (*s == 0) return "Error text not found (please report)";    if (*s == CHAR_NULL) return "Error text not found (please report)";
673    }    }
674  return s;  return s;
675  }  }
# Line 771  Returns:         zero => a data characte Line 774  Returns:         zero => a data characte
774  */  */
775    
776  static int  static int
777  check_escape(const pcre_uchar **ptrptr, pcre_uint32 *chptr, int *errorcodeptr,  check_escape(const pcre_uchar **ptrptr, pcre_uint32 *chptr, int *errorcodeptr,
778    int bracount, int options, BOOL isclass)    int bracount, int options, BOOL isclass)
779  {  {
780  /* PCRE_UTF16 has the same value as PCRE_UTF8. */  /* PCRE_UTF16 has the same value as PCRE_UTF8. */
# Line 786  ptr--;                            /* Set Line 789  ptr--;                            /* Set
789    
790  /* If backslash is at the end of the pattern, it's an error. */  /* If backslash is at the end of the pattern, it's an error. */
791    
792  if (c == 0) *errorcodeptr = ERR1;  if (c == CHAR_NULL) *errorcodeptr = ERR1;
793    
794  /* Non-alphanumerics are literals. For digits or letters, do an initial lookup  /* Non-alphanumerics are literals. For digits or letters, do an initial lookup
795  in a table. A non-zero result is something that can be returned immediately.  in a table. A non-zero result is something that can be returned immediately.
# Line 795  Otherwise further processing may be requ Line 798  Otherwise further processing may be requ
798  #ifndef EBCDIC  /* ASCII/UTF-8 coding */  #ifndef EBCDIC  /* ASCII/UTF-8 coding */
799  /* Not alphanumeric */  /* Not alphanumeric */
800  else if (c < CHAR_0 || c > CHAR_z) {}  else if (c < CHAR_0 || c > CHAR_z) {}
801  else if ((i = escapes[c - CHAR_0]) != 0) { if (i > 0) c = (pcre_uint32)i; else escape = -i; }  else if ((i = escapes[c - CHAR_0]) != 0)
802      { if (i > 0) c = (pcre_uint32)i; else escape = -i; }
803    
804  #else           /* EBCDIC coding */  #else           /* EBCDIC coding */
805  /* Not alphanumeric */  /* Not alphanumeric */
# Line 845  else Line 849  else
849            }            }
850    
851  #if defined COMPILE_PCRE8  #if defined COMPILE_PCRE8
852          if (c > (utf ? 0x10ffff : 0xff))          if (c > (utf ? 0x10ffffU : 0xffU))
853  #elif defined COMPILE_PCRE16  #elif defined COMPILE_PCRE16
854          if (c > (utf ? 0x10ffff : 0xffff))          if (c > (utf ? 0x10ffffU : 0xffffU))
855  #elif defined COMPILE_PCRE32  #elif defined COMPILE_PCRE32
856          if (utf && c > 0x10ffff)          if (utf && c > 0x10ffffU)
857  #endif  #endif
858            {            {
859            *errorcodeptr = ERR76;            *errorcodeptr = ERR76;
# Line 896  else Line 900  else
900      if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)      if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)
901        {        {
902        const pcre_uchar *p;        const pcre_uchar *p;
903        for (p = ptr+2; *p != 0 && *p != CHAR_RIGHT_CURLY_BRACKET; p++)        for (p = ptr+2; *p != CHAR_NULL && *p != CHAR_RIGHT_CURLY_BRACKET; p++)
904          if (*p != CHAR_MINUS && !IS_DIGIT(*p)) break;          if (*p != CHAR_MINUS && !IS_DIGIT(*p)) break;
905        if (*p != 0 && *p != CHAR_RIGHT_CURLY_BRACKET)        if (*p != CHAR_NULL && *p != CHAR_RIGHT_CURLY_BRACKET)
906          {          {
907          escape = ESC_k;          escape = ESC_k;
908          break;          break;
# Line 1083  else Line 1087  else
1087  #endif  #endif
1088    
1089  #if defined COMPILE_PCRE8  #if defined COMPILE_PCRE8
1090          if (c > (utf ? 0x10ffff : 0xff)) { overflow = TRUE; break; }          if (c > (utf ? 0x10ffffU : 0xffU)) { overflow = TRUE; break; }
1091  #elif defined COMPILE_PCRE16  #elif defined COMPILE_PCRE16
1092          if (c > (utf ? 0x10ffff : 0xffff)) { overflow = TRUE; break; }          if (c > (utf ? 0x10ffffU : 0xffffU)) { overflow = TRUE; break; }
1093  #elif defined COMPILE_PCRE32  #elif defined COMPILE_PCRE32
1094          if (utf && c > 0x10ffff) { overflow = TRUE; break; }          if (utf && c > 0x10ffffU) { overflow = TRUE; break; }
1095  #endif  #endif
1096          }          }
1097    
# Line 1132  else Line 1136  else
1136    
1137      case CHAR_c:      case CHAR_c:
1138      c = *(++ptr);      c = *(++ptr);
1139      if (c == 0)      if (c == CHAR_NULL)
1140        {        {
1141        *errorcodeptr = ERR2;        *errorcodeptr = ERR2;
1142        break;        break;
# Line 1201  escape sequence. Line 1205  escape sequence.
1205  Argument:  Argument:
1206    ptrptr         points to the pattern position pointer    ptrptr         points to the pattern position pointer
1207    negptr         points to a boolean that is set TRUE for negation else FALSE    negptr         points to a boolean that is set TRUE for negation else FALSE
1208    dptr           points to an int that is set to the detailed property value    ptypeptr       points to an unsigned int that is set to the type value
1209      pdataptr       points to an unsigned int that is set to the detailed property value
1210    errorcodeptr   points to the error code variable    errorcodeptr   points to the error code variable
1211    
1212  Returns:         type value from ucp_type_table, or -1 for an invalid type  Returns:         TRUE if the type value was found, or FALSE for an invalid type
1213  */  */
1214    
1215  static int  static BOOL
1216  get_ucp(const pcre_uchar **ptrptr, BOOL *negptr, int *dptr, int *errorcodeptr)  get_ucp(const pcre_uchar **ptrptr, BOOL *negptr, unsigned int *ptypeptr,
1217      unsigned int *pdataptr, int *errorcodeptr)
1218  {  {
1219  pcre_uchar c;  pcre_uchar c;
1220  int i, bot, top;  int i, bot, top;
# Line 1216  const pcre_uchar *ptr = *ptrptr; Line 1222  const pcre_uchar *ptr = *ptrptr;
1222  pcre_uchar name[32];  pcre_uchar name[32];
1223    
1224  c = *(++ptr);  c = *(++ptr);
1225  if (c == 0) goto ERROR_RETURN;  if (c == CHAR_NULL) goto ERROR_RETURN;
1226    
1227  *negptr = FALSE;  *negptr = FALSE;
1228    
# Line 1233  if (c == CHAR_LEFT_CURLY_BRACKET) Line 1239  if (c == CHAR_LEFT_CURLY_BRACKET)
1239    for (i = 0; i < (int)(sizeof(name) / sizeof(pcre_uchar)) - 1; i++)    for (i = 0; i < (int)(sizeof(name) / sizeof(pcre_uchar)) - 1; i++)
1240      {      {
1241      c = *(++ptr);      c = *(++ptr);
1242      if (c == 0) goto ERROR_RETURN;      if (c == CHAR_NULL) goto ERROR_RETURN;
1243      if (c == CHAR_RIGHT_CURLY_BRACKET) break;      if (c == CHAR_RIGHT_CURLY_BRACKET) break;
1244      name[i] = c;      name[i] = c;
1245      }      }
# Line 1263  while (bot < top) Line 1269  while (bot < top)
1269    r = STRCMP_UC_C8(name, PRIV(utt_names) + PRIV(utt)[i].name_offset);    r = STRCMP_UC_C8(name, PRIV(utt_names) + PRIV(utt)[i].name_offset);
1270    if (r == 0)    if (r == 0)
1271      {      {
1272      *dptr = PRIV(utt)[i].value;      *ptypeptr = PRIV(utt)[i].type;
1273      return PRIV(utt)[i].type;      *pdataptr = PRIV(utt)[i].value;
1274        return TRUE;
1275      }      }
1276    if (r > 0) bot = i + 1; else top = i;    if (r > 0) bot = i + 1; else top = i;
1277    }    }
1278    
1279  *errorcodeptr = ERR47;  *errorcodeptr = ERR47;
1280  *ptrptr = ptr;  *ptrptr = ptr;
1281  return -1;  return FALSE;
1282    
1283  ERROR_RETURN:  ERROR_RETURN:
1284  *errorcodeptr = ERR46;  *errorcodeptr = ERR46;
1285  *ptrptr = ptr;  *ptrptr = ptr;
1286  return -1;  return FALSE;
1287  }  }
1288  #endif  #endif
1289    
# Line 1403  if (ptr[0] == CHAR_LEFT_PARENTHESIS) Line 1410  if (ptr[0] == CHAR_LEFT_PARENTHESIS)
1410    {    {
1411    /* Handle specials such as (*SKIP) or (*UTF8) etc. */    /* Handle specials such as (*SKIP) or (*UTF8) etc. */
1412    
1413    if (ptr[1] == CHAR_ASTERISK) ptr += 2;    if (ptr[1] == CHAR_ASTERISK)
1414        {
1415        ptr += 2;
1416        while (ptr < cd->end_pattern && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
1417        }
1418    
1419    /* Handle a normal, unnamed capturing parenthesis. */    /* Handle a normal, unnamed capturing parenthesis. */
1420    
# Line 1427  if (ptr[0] == CHAR_LEFT_PARENTHESIS) Line 1438  if (ptr[0] == CHAR_LEFT_PARENTHESIS)
1438    
1439    else if (ptr[2] == CHAR_NUMBER_SIGN)    else if (ptr[2] == CHAR_NUMBER_SIGN)
1440      {      {
1441      for (ptr += 3; *ptr != 0; ptr++) if (*ptr == CHAR_RIGHT_PARENTHESIS) break;      for (ptr += 3; *ptr != CHAR_NULL; ptr++)
1442          if (*ptr == CHAR_RIGHT_PARENTHESIS) break;
1443      goto FAIL_EXIT;      goto FAIL_EXIT;
1444      }      }
1445    
# Line 1440  if (ptr[0] == CHAR_LEFT_PARENTHESIS) Line 1452  if (ptr[0] == CHAR_LEFT_PARENTHESIS)
1452      ptr += 2;      ptr += 2;
1453      if (ptr[1] != CHAR_QUESTION_MARK)      if (ptr[1] != CHAR_QUESTION_MARK)
1454        {        {
1455        while (*ptr != 0 && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;        while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
1456        if (*ptr != 0) ptr++;        if (*ptr != CHAR_NULL) ptr++;
1457        }        }
1458      }      }
1459    
# Line 1484  for (; ptr < cd->end_pattern; ptr++) Line 1496  for (; ptr < cd->end_pattern; ptr++)
1496    
1497    if (*ptr == CHAR_BACKSLASH)    if (*ptr == CHAR_BACKSLASH)
1498      {      {
1499      if (*(++ptr) == 0) goto FAIL_EXIT;      if (*(++ptr) == CHAR_NULL) goto FAIL_EXIT;
1500      if (*ptr == CHAR_Q) for (;;)      if (*ptr == CHAR_Q) for (;;)
1501        {        {
1502        while (*(++ptr) != 0 && *ptr != CHAR_BACKSLASH) {};        while (*(++ptr) != CHAR_NULL && *ptr != CHAR_BACKSLASH) {};
1503        if (*ptr == 0) goto FAIL_EXIT;        if (*ptr == CHAR_NULL) goto FAIL_EXIT;
1504        if (*(++ptr) == CHAR_E) break;        if (*(++ptr) == CHAR_E) break;
1505        }        }
1506      continue;      continue;
# Line 1532  for (; ptr < cd->end_pattern; ptr++) Line 1544  for (; ptr < cd->end_pattern; ptr++)
1544    
1545      while (*(++ptr) != CHAR_RIGHT_SQUARE_BRACKET)      while (*(++ptr) != CHAR_RIGHT_SQUARE_BRACKET)
1546        {        {
1547        if (*ptr == 0) return -1;        if (*ptr == CHAR_NULL) return -1;
1548        if (*ptr == CHAR_BACKSLASH)        if (*ptr == CHAR_BACKSLASH)
1549          {          {
1550          if (*(++ptr) == 0) goto FAIL_EXIT;          if (*(++ptr) == CHAR_NULL) goto FAIL_EXIT;
1551          if (*ptr == CHAR_Q) for (;;)          if (*ptr == CHAR_Q) for (;;)
1552            {            {
1553            while (*(++ptr) != 0 && *ptr != CHAR_BACKSLASH) {};            while (*(++ptr) != CHAR_NULL && *ptr != CHAR_BACKSLASH) {};
1554            if (*ptr == 0) goto FAIL_EXIT;            if (*ptr == CHAR_NULL) goto FAIL_EXIT;
1555            if (*(++ptr) == CHAR_E) break;            if (*(++ptr) == CHAR_E) break;
1556            }            }
1557          continue;          continue;
# Line 1553  for (; ptr < cd->end_pattern; ptr++) Line 1565  for (; ptr < cd->end_pattern; ptr++)
1565    if (xmode && *ptr == CHAR_NUMBER_SIGN)    if (xmode && *ptr == CHAR_NUMBER_SIGN)
1566      {      {
1567      ptr++;      ptr++;
1568      while (*ptr != 0)      while (*ptr != CHAR_NULL)
1569        {        {
1570        if (IS_NEWLINE(ptr)) { ptr += cd->nllen - 1; break; }        if (IS_NEWLINE(ptr)) { ptr += cd->nllen - 1; break; }
1571        ptr++;        ptr++;
# Line 1561  for (; ptr < cd->end_pattern; ptr++) Line 1573  for (; ptr < cd->end_pattern; ptr++)
1573        if (utf) FORWARDCHAR(ptr);        if (utf) FORWARDCHAR(ptr);
1574  #endif  #endif
1575        }        }
1576      if (*ptr == 0) goto FAIL_EXIT;      if (*ptr == CHAR_NULL) goto FAIL_EXIT;
1577      continue;      continue;
1578      }      }
1579    
# Line 1571  for (; ptr < cd->end_pattern; ptr++) Line 1583  for (; ptr < cd->end_pattern; ptr++)
1583      {      {
1584      int rc = find_parens_sub(&ptr, cd, name, lorn, xmode, utf, count);      int rc = find_parens_sub(&ptr, cd, name, lorn, xmode, utf, count);
1585      if (rc > 0) return rc;      if (rc > 0) return rc;
1586      if (*ptr == 0) goto FAIL_EXIT;      if (*ptr == CHAR_NULL) goto FAIL_EXIT;
1587      }      }
1588    
1589    else if (*ptr == CHAR_RIGHT_PARENTHESIS)    else if (*ptr == CHAR_RIGHT_PARENTHESIS)
# Line 1636  matching closing parens. That is why we Line 1648  matching closing parens. That is why we
1648  for (;;)  for (;;)
1649    {    {
1650    rc = find_parens_sub(&ptr, cd, name, lorn, xmode, utf, &count);    rc = find_parens_sub(&ptr, cd, name, lorn, xmode, utf, &count);
1651    if (rc > 0 || *ptr++ == 0) break;    if (rc > 0 || *ptr++ == CHAR_NULL) break;
1652    }    }
1653    
1654  return rc;  return rc;
# Line 1852  for (;;) Line 1864  for (;;)
1864      case OP_NOTI:      case OP_NOTI:
1865      branchlength++;      branchlength++;
1866      cc += 2;      cc += 2;
1867  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32  #ifdef SUPPORT_UTF
1868      if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);      if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1869  #endif  #endif
1870      break;      break;
# Line 1866  for (;;) Line 1878  for (;;)
1878      case OP_NOTEXACTI:      case OP_NOTEXACTI:
1879      branchlength += (int)GET2(cc,1);      branchlength += (int)GET2(cc,1);
1880      cc += 2 + IMM2_SIZE;      cc += 2 + IMM2_SIZE;
1881  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32  #ifdef SUPPORT_UTF
1882      if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);      if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1883  #endif  #endif
1884      break;      break;
1885    
1886      case OP_TYPEEXACT:      case OP_TYPEEXACT:
1887      branchlength += GET2(cc,1);      branchlength += GET2(cc,1);
1888      if (cc[1 + IMM2_SIZE] == OP_PROP || cc[1 + IMM2_SIZE] == OP_NOTPROP)      if (cc[1 + IMM2_SIZE] == OP_PROP || cc[1 + IMM2_SIZE] == OP_NOTPROP)
1889        cc += 2;        cc += 2;
1890      cc += 1 + IMM2_SIZE + 1;      cc += 1 + IMM2_SIZE + 1;
1891      break;      break;
# Line 1909  for (;;) Line 1921  for (;;)
1921    
1922      /* Check a class for variable quantification */      /* Check a class for variable quantification */
1923    
 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32  
     case OP_XCLASS:  
     cc += GET(cc, 1) - PRIV(OP_lengths)[OP_CLASS];  
     /* Fall through */  
 #endif  
   
1924      case OP_CLASS:      case OP_CLASS:
1925      case OP_NCLASS:      case OP_NCLASS:
1926    #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1927        case OP_XCLASS:
1928        /* The original code caused an unsigned overflow in 64 bit systems,
1929        so now we use a conditional statement. */
1930        if (op == OP_XCLASS)
1931          cc += GET(cc, 1);
1932        else
1933          cc += PRIV(OP_lengths)[OP_CLASS];
1934    #else
1935      cc += PRIV(OP_lengths)[OP_CLASS];      cc += PRIV(OP_lengths)[OP_CLASS];
1936    #endif
1937    
1938      switch (*cc)      switch (*cc)
1939        {        {
# Line 2112  for (;;) Line 2128  for (;;)
2128        case OP_TYPEMINUPTO:        case OP_TYPEMINUPTO:
2129        case OP_TYPEEXACT:        case OP_TYPEEXACT:
2130        case OP_TYPEPOSUPTO:        case OP_TYPEPOSUPTO:
2131        if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)        if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
2132          code += 2;          code += 2;
2133        break;        break;
2134    
2135        case OP_MARK:        case OP_MARK:
2136        case OP_PRUNE_ARG:        case OP_PRUNE_ARG:
2137        case OP_SKIP_ARG:        case OP_SKIP_ARG:
       code += code[1];  
       break;  
   
2138        case OP_THEN_ARG:        case OP_THEN_ARG:
2139        code += code[1];        code += code[1];
2140        break;        break;
# Line 2232  for (;;) Line 2245  for (;;)
2245        case OP_TYPEUPTO:        case OP_TYPEUPTO:
2246        case OP_TYPEMINUPTO:        case OP_TYPEMINUPTO:
2247        case OP_TYPEEXACT:        case OP_TYPEEXACT:
2248        if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)        if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
2249          code += 2;          code += 2;
2250        break;        break;
2251    
2252        case OP_MARK:        case OP_MARK:
2253        case OP_PRUNE_ARG:        case OP_PRUNE_ARG:
2254        case OP_SKIP_ARG:        case OP_SKIP_ARG:
       code += code[1];  
       break;  
   
2255        case OP_THEN_ARG:        case OP_THEN_ARG:
2256        code += code[1];        code += code[1];
2257        break;        break;
# Line 2343  Arguments: Line 2353  Arguments:
2353    endcode     points to where to stop    endcode     points to where to stop
2354    utf         TRUE if in UTF-8 / UTF-16 / UTF-32 mode    utf         TRUE if in UTF-8 / UTF-16 / UTF-32 mode
2355    cd          contains pointers to tables etc.    cd          contains pointers to tables etc.
2356      recurses    chain of recurse_check to catch mutual recursion
2357    
2358  Returns:      TRUE if what is matched could be empty  Returns:      TRUE if what is matched could be empty
2359  */  */
2360    
2361    typedef struct recurse_check {
2362      struct recurse_check *prev;
2363      const pcre_uchar *group;
2364    } recurse_check;
2365    
2366  static BOOL  static BOOL
2367  could_be_empty_branch(const pcre_uchar *code, const pcre_uchar *endcode,  could_be_empty_branch(const pcre_uchar *code, const pcre_uchar *endcode,
2368    BOOL utf, compile_data *cd)    BOOL utf, compile_data *cd, recurse_check *recurses)
2369  {  {
2370  register pcre_uchar c;  register pcre_uchar c;
2371    recurse_check this_recurse;
2372    
2373  for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);  for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
2374       code < endcode;       code < endcode;
2375       code = first_significant_code(code + PRIV(OP_lengths)[c], TRUE))       code = first_significant_code(code + PRIV(OP_lengths)[c], TRUE))
# Line 2359  for (code = first_significant_code(code Line 2377  for (code = first_significant_code(code
2377    const pcre_uchar *ccode;    const pcre_uchar *ccode;
2378    
2379    c = *code;    c = *code;
2380    
2381    /* Skip over forward assertions; the other assertions are skipped by    /* Skip over forward assertions; the other assertions are skipped by
2382    first_significant_code() with a TRUE final argument. */    first_significant_code() with a TRUE final argument. */
2383    
# Line 2379  for (code = first_significant_code(code Line 2397  for (code = first_significant_code(code
2397    
2398    if (c == OP_RECURSE)    if (c == OP_RECURSE)
2399      {      {
2400      const pcre_uchar *scode;      const pcre_uchar *scode = cd->start_code + GET(code, 1);
2401      BOOL empty_branch;      BOOL empty_branch;
2402    
2403      /* Test for forward reference */      /* Test for forward reference or uncompleted reference. This is disabled
2404        when called to scan a completed pattern by setting cd->start_workspace to
2405      for (scode = cd->start_workspace; scode < cd->hwm; scode += LINK_SIZE)      NULL. */
2406        if ((int)GET(scode, 0) == (int)(code + 1 - cd->start_code)) return TRUE;  
2407        if (cd->start_workspace != NULL)
2408          {
2409          const pcre_uchar *tcode;
2410          for (tcode = cd->start_workspace; tcode < cd->hwm; tcode += LINK_SIZE)
2411            if ((int)GET(tcode, 0) == (int)(code + 1 - cd->start_code)) return TRUE;
2412          if (GET(scode, 1) == 0) return TRUE;    /* Unclosed */
2413          }
2414    
2415        /* If we are scanning a completed pattern, there are no forward references
2416        and all groups are complete. We need to detect whether this is a recursive
2417        call, as otherwise there will be an infinite loop. If it is a recursion,
2418        just skip over it. Simple recursions are easily detected. For mutual
2419        recursions we keep a chain on the stack. */
2420    
2421        else
2422          {
2423          recurse_check *r = recurses;
2424          const pcre_uchar *endgroup = scode;
2425    
2426          do endgroup += GET(endgroup, 1); while (*endgroup == OP_ALT);
2427          if (code >= scode && code <= endgroup) continue;  /* Simple recursion */
2428    
2429          for (r = recurses; r != NULL; r = r->prev)
2430            if (r->group == scode) break;
2431          if (r != NULL) continue;   /* Mutual recursion */
2432          }
2433    
2434      /* Not a forward reference, test for completed backward reference */      /* Completed reference; scan the referenced group, remembering it on the
2435        stack chain to detect mutual recursions. */
2436    
2437      empty_branch = FALSE;      empty_branch = FALSE;
2438      scode = cd->start_code + GET(code, 1);      this_recurse.prev = recurses;
2439      if (GET(scode, 1) == 0) return TRUE;    /* Unclosed */      this_recurse.group = scode;
2440    
     /* Completed backwards reference */  
   
2441      do      do
2442        {        {
2443        if (could_be_empty_branch(scode, endcode, utf, cd))        if (could_be_empty_branch(scode, endcode, utf, cd, &this_recurse))
2444          {          {
2445          empty_branch = TRUE;          empty_branch = TRUE;
2446          break;          break;
# Line 2453  for (code = first_significant_code(code Line 2496  for (code = first_significant_code(code
2496        empty_branch = FALSE;        empty_branch = FALSE;
2497        do        do
2498          {          {
2499          if (!empty_branch && could_be_empty_branch(code, endcode, utf, cd))          if (!empty_branch && could_be_empty_branch(code, endcode, utf, cd, NULL))
2500            empty_branch = TRUE;            empty_branch = TRUE;
2501          code += GET(code, 1);          code += GET(code, 1);
2502          }          }
# Line 2511  for (code = first_significant_code(code Line 2554  for (code = first_significant_code(code
2554    
2555      /* Opcodes that must match a character */      /* Opcodes that must match a character */
2556    
2557        case OP_ANY:
2558        case OP_ALLANY:
2559        case OP_ANYBYTE:
2560    
2561      case OP_PROP:      case OP_PROP:
2562      case OP_NOTPROP:      case OP_NOTPROP:
2563        case OP_ANYNL:
2564    
2565        case OP_NOT_HSPACE:
2566        case OP_HSPACE:
2567        case OP_NOT_VSPACE:
2568        case OP_VSPACE:
2569      case OP_EXTUNI:      case OP_EXTUNI:
2570    
2571      case OP_NOT_DIGIT:      case OP_NOT_DIGIT:
2572      case OP_DIGIT:      case OP_DIGIT:
2573      case OP_NOT_WHITESPACE:      case OP_NOT_WHITESPACE:
2574      case OP_WHITESPACE:      case OP_WHITESPACE:
2575      case OP_NOT_WORDCHAR:      case OP_NOT_WORDCHAR:
2576      case OP_WORDCHAR:      case OP_WORDCHAR:
2577      case OP_ANY:  
     case OP_ALLANY:  
     case OP_ANYBYTE:  
2578      case OP_CHAR:      case OP_CHAR:
2579      case OP_CHARI:      case OP_CHARI:
2580      case OP_NOT:      case OP_NOT:
2581      case OP_NOTI:      case OP_NOTI:
2582    
2583      case OP_PLUS:      case OP_PLUS:
2584        case OP_PLUSI:
2585      case OP_MINPLUS:      case OP_MINPLUS:
2586      case OP_POSPLUS:      case OP_MINPLUSI:
2587      case OP_EXACT:  
2588      case OP_NOTPLUS:      case OP_NOTPLUS:
2589        case OP_NOTPLUSI:
2590      case OP_NOTMINPLUS:      case OP_NOTMINPLUS:
2591        case OP_NOTMINPLUSI:
2592    
2593        case OP_POSPLUS:
2594        case OP_POSPLUSI:
2595      case OP_NOTPOSPLUS:      case OP_NOTPOSPLUS:
2596        case OP_NOTPOSPLUSI:
2597    
2598        case OP_EXACT:
2599        case OP_EXACTI:
2600      case OP_NOTEXACT:      case OP_NOTEXACT:
2601        case OP_NOTEXACTI:
2602    
2603      case OP_TYPEPLUS:      case OP_TYPEPLUS:
2604      case OP_TYPEMINPLUS:      case OP_TYPEMINPLUS:
2605      case OP_TYPEPOSPLUS:      case OP_TYPEPOSPLUS:
2606      case OP_TYPEEXACT:      case OP_TYPEEXACT:
2607    
2608      return FALSE;      return FALSE;
2609    
2610      /* These are going to continue, as they may be empty, but we have to      /* These are going to continue, as they may be empty, but we have to
# Line 2558  for (code = first_significant_code(code Line 2624  for (code = first_significant_code(code
2624      case OP_TYPEUPTO:      case OP_TYPEUPTO:
2625      case OP_TYPEMINUPTO:      case OP_TYPEMINUPTO:
2626      case OP_TYPEPOSUPTO:      case OP_TYPEPOSUPTO:
2627      if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)      if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
2628        code += 2;        code += 2;
2629      break;      break;
2630    
# Line 2572  for (code = first_significant_code(code Line 2638  for (code = first_significant_code(code
2638      return TRUE;      return TRUE;
2639    
2640      /* In UTF-8 mode, STAR, MINSTAR, POSSTAR, QUERY, MINQUERY, POSQUERY, UPTO,      /* In UTF-8 mode, STAR, MINSTAR, POSSTAR, QUERY, MINQUERY, POSQUERY, UPTO,
2641      MINUPTO, and POSUPTO may be followed by a multibyte character */      MINUPTO, and POSUPTO and their caseless and negative versions may be
2642        followed by a multibyte character. */
2643    
2644  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2645      case OP_STAR:      case OP_STAR:
2646      case OP_STARI:      case OP_STARI:
2647        case OP_NOTSTAR:
2648        case OP_NOTSTARI:
2649    
2650      case OP_MINSTAR:      case OP_MINSTAR:
2651      case OP_MINSTARI:      case OP_MINSTARI:
2652        case OP_NOTMINSTAR:
2653        case OP_NOTMINSTARI:
2654    
2655      case OP_POSSTAR:      case OP_POSSTAR:
2656      case OP_POSSTARI:      case OP_POSSTARI:
2657        case OP_NOTPOSSTAR:
2658        case OP_NOTPOSSTARI:
2659    
2660      case OP_QUERY:      case OP_QUERY:
2661      case OP_QUERYI:      case OP_QUERYI:
2662        case OP_NOTQUERY:
2663        case OP_NOTQUERYI:
2664    
2665      case OP_MINQUERY:      case OP_MINQUERY:
2666      case OP_MINQUERYI:      case OP_MINQUERYI:
2667        case OP_NOTMINQUERY:
2668        case OP_NOTMINQUERYI:
2669    
2670      case OP_POSQUERY:      case OP_POSQUERY:
2671      case OP_POSQUERYI:      case OP_POSQUERYI:
2672        case OP_NOTPOSQUERY:
2673        case OP_NOTPOSQUERYI:
2674    
2675      if (utf && HAS_EXTRALEN(code[1])) code += GET_EXTRALEN(code[1]);      if (utf && HAS_EXTRALEN(code[1])) code += GET_EXTRALEN(code[1]);
2676      break;      break;
2677    
2678      case OP_UPTO:      case OP_UPTO:
2679      case OP_UPTOI:      case OP_UPTOI:
2680        case OP_NOTUPTO:
2681        case OP_NOTUPTOI:
2682    
2683      case OP_MINUPTO:      case OP_MINUPTO:
2684      case OP_MINUPTOI:      case OP_MINUPTOI:
2685        case OP_NOTMINUPTO:
2686        case OP_NOTMINUPTOI:
2687    
2688      case OP_POSUPTO:      case OP_POSUPTO:
2689      case OP_POSUPTOI:      case OP_POSUPTOI:
2690        case OP_NOTPOSUPTO:
2691        case OP_NOTPOSUPTOI:
2692    
2693      if (utf && HAS_EXTRALEN(code[1 + IMM2_SIZE])) code += GET_EXTRALEN(code[1 + IMM2_SIZE]);      if (utf && HAS_EXTRALEN(code[1 + IMM2_SIZE])) code += GET_EXTRALEN(code[1 + IMM2_SIZE]);
2694      break;      break;
2695  #endif  #endif
# Line 2606  for (code = first_significant_code(code Line 2700  for (code = first_significant_code(code
2700      case OP_MARK:      case OP_MARK:
2701      case OP_PRUNE_ARG:      case OP_PRUNE_ARG:
2702      case OP_SKIP_ARG:      case OP_SKIP_ARG:
     code += code[1];  
     break;  
   
2703      case OP_THEN_ARG:      case OP_THEN_ARG:
2704      code += code[1];      code += code[1];
2705      break;      break;
# Line 2652  could_be_empty(const pcre_uchar *code, c Line 2743  could_be_empty(const pcre_uchar *code, c
2743  {  {
2744  while (bcptr != NULL && bcptr->current_branch >= code)  while (bcptr != NULL && bcptr->current_branch >= code)
2745    {    {
2746    if (!could_be_empty_branch(bcptr->current_branch, endcode, utf, cd))    if (!could_be_empty_branch(bcptr->current_branch, endcode, utf, cd, NULL))
2747      return FALSE;      return FALSE;
2748    bcptr = bcptr->outer;    bcptr = bcptr->outer;
2749    }    }
# Line 2706  Returns:   TRUE or FALSE Line 2797  Returns:   TRUE or FALSE
2797  static BOOL  static BOOL
2798  check_posix_syntax(const pcre_uchar *ptr, const pcre_uchar **endptr)  check_posix_syntax(const pcre_uchar *ptr, const pcre_uchar **endptr)
2799  {  {
2800  int terminator;          /* Don't combine these lines; the Solaris cc */  pcre_uchar terminator;          /* Don't combine these lines; the Solaris cc */
2801  terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */  terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */
2802  for (++ptr; *ptr != 0; ptr++)  for (++ptr; *ptr != CHAR_NULL; ptr++)
2803    {    {
2804    if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)    if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
2805      ptr++;      ptr++;
# Line 2755  register int yield = 0; Line 2846  register int yield = 0;
2846  while (posix_name_lengths[yield] != 0)  while (posix_name_lengths[yield] != 0)
2847    {    {
2848    if (len == posix_name_lengths[yield] &&    if (len == posix_name_lengths[yield] &&
2849      STRNCMP_UC_C8(ptr, pn, len) == 0) return yield;      STRNCMP_UC_C8(ptr, pn, (unsigned int)len) == 0) return yield;
2850    pn += posix_name_lengths[yield] + 1;    pn += posix_name_lengths[yield] + 1;
2851    yield++;    yield++;
2852    }    }
# Line 2810  while ((ptr = (pcre_uchar *)find_recurse Line 2901  while ((ptr = (pcre_uchar *)find_recurse
2901    
2902    for (hc = save_hwm; hc < cd->hwm; hc += LINK_SIZE)    for (hc = save_hwm; hc < cd->hwm; hc += LINK_SIZE)
2903      {      {
2904      offset = GET(hc, 0);      offset = (int)GET(hc, 0);
2905      if (cd->start_code + offset == ptr + 1)      if (cd->start_code + offset == ptr + 1)
2906        {        {
2907        PUT(hc, 0, offset + adjust);        PUT(hc, 0, offset + adjust);
# Line 2823  while ((ptr = (pcre_uchar *)find_recurse Line 2914  while ((ptr = (pcre_uchar *)find_recurse
2914    
2915    if (hc >= cd->hwm)    if (hc >= cd->hwm)
2916      {      {
2917      offset = GET(ptr, 1);      offset = (int)GET(ptr, 1);
2918      if (cd->start_code + offset >= group) PUT(ptr, 1, offset + adjust);      if (cd->start_code + offset >= group) PUT(ptr, 1, offset + adjust);
2919      }      }
2920    
# Line 2893  PUT(previous_callout, 2 + LINK_SIZE, len Line 2984  PUT(previous_callout, 2 + LINK_SIZE, len
2984  /* This function is passed the start and end of a class range, in UTF-8 mode  /* This function is passed the start and end of a class range, in UTF-8 mode
2985  with UCP support. It searches up the characters, looking for ranges of  with UCP support. It searches up the characters, looking for ranges of
2986  characters in the "other" case. Each call returns the next one, updating the  characters in the "other" case. Each call returns the next one, updating the
2987  start address. A character with multiple other cases is returned on its own  start address. A character with multiple other cases is returned on its own
2988  with a special return value.  with a special return value.
2989    
2990  Arguments:  Arguments:
# Line 2905  Arguments: Line 2996  Arguments:
2996  Yield:        -1 when no more  Yield:        -1 when no more
2997                 0 when a range is returned                 0 when a range is returned
2998                >0 the CASESET offset for char with multiple other cases                >0 the CASESET offset for char with multiple other cases
2999                  in this case, ocptr contains the original                  in this case, ocptr contains the original
3000  */  */
3001    
3002  static int  static int
# Line 2913  get_othercase_range(pcre_uint32 *cptr, p Line 3004  get_othercase_range(pcre_uint32 *cptr, p
3004    pcre_uint32 *odptr)    pcre_uint32 *odptr)
3005  {  {
3006  pcre_uint32 c, othercase, next;  pcre_uint32 c, othercase, next;
3007  int co;  unsigned int co;
3008    
3009  /* Find the first character that has an other case. If it has multiple other  /* Find the first character that has an other case. If it has multiple other
3010  cases, return its case offset value. */  cases, return its case offset value. */
3011    
3012  for (c = *cptr; c <= d; c++)  for (c = *cptr; c <= d; c++)
3013    {    {
3014    if ((co = UCD_CASESET(c)) != 0)    if ((co = UCD_CASESET(c)) != 0)
3015      {      {
3016      *ocptr = c++;   /* Character that has the set */      *ocptr = c++;   /* Character that has the set */
3017      *cptr = c;      /* Rest of input range */      *cptr = c;      /* Rest of input range */
3018      return co;      return (int)co;
3019      }      }
3020    if ((othercase = UCD_OTHERCASE(c)) != c) break;    if ((othercase = UCD_OTHERCASE(c)) != c) break;
3021    }    }
3022    
3023  if (c > d) return -1;  /* Reached end of range */  if (c > d) return -1;  /* Reached end of range */
# Line 2964  Returns:       TRUE if auto-possessifyin Line 3055  Returns:       TRUE if auto-possessifyin
3055  */  */
3056    
3057  static BOOL  static BOOL
3058  check_char_prop(pcre_uint32 c, int ptype, int pdata, BOOL negated)  check_char_prop(pcre_uint32 c, unsigned int ptype, unsigned int pdata, BOOL negated)
3059  {  {
3060  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3061  const pcre_uint32 *p;  const pcre_uint32 *p;
# Line 3009  switch(ptype) Line 3100  switch(ptype)
3100    return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||    return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
3101            PRIV(ucp_gentype)[prop->chartype] == ucp_N ||            PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
3102            c == CHAR_UNDERSCORE) == negated;            c == CHAR_UNDERSCORE) == negated;
3103    
3104  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3105    case PT_CLIST:    case PT_CLIST:
3106    p = PRIV(ucd_caseless_sets) + prop->caseset;    p = PRIV(ucd_caseless_sets) + prop->caseset;
3107    for (;;)    for (;;)
3108      {      {
3109      if ((unsigned int)c < *p) return !negated;      if (c < *p) return !negated;
3110      if ((unsigned int)c == *p++) return negated;      if (c == *p++) return negated;
3111      }      }
3112    break;  /* Control never reaches here */    break;  /* Control never reaches here */
3113  #endif  #endif
# Line 3053  check_auto_possessive(const pcre_uchar * Line 3144  check_auto_possessive(const pcre_uchar *
3144  pcre_uint32 c = NOTACHAR;  pcre_uint32 c = NOTACHAR;
3145  pcre_uint32 next;  pcre_uint32 next;
3146  int escape;  int escape;
3147  int op_code = *previous++;  pcre_uchar op_code = *previous++;
3148    
3149  /* Skip whitespace and comments in extended mode */  /* Skip whitespace and comments in extended mode */
3150    
# Line 3065  if ((options & PCRE_EXTENDED) != 0) Line 3156  if ((options & PCRE_EXTENDED) != 0)
3156      if (*ptr == CHAR_NUMBER_SIGN)      if (*ptr == CHAR_NUMBER_SIGN)
3157        {        {
3158        ptr++;        ptr++;
3159        while (*ptr != 0)        while (*ptr != CHAR_NULL)
3160          {          {
3161          if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }          if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }
3162          ptr++;          ptr++;
# Line 3084  value is a character, a negative value i Line 3175  value is a character, a negative value i
3175  if (*ptr == CHAR_BACKSLASH)  if (*ptr == CHAR_BACKSLASH)
3176    {    {
3177    int temperrorcode = 0;    int temperrorcode = 0;
3178    escape = check_escape(&ptr, &next, &temperrorcode, cd->bracount, options, FALSE);    escape = check_escape(&ptr, &next, &temperrorcode, cd->bracount, options,
3179        FALSE);
3180    if (temperrorcode != 0) return FALSE;    if (temperrorcode != 0) return FALSE;
3181    ptr++;    /* Point after the escape sequence */    ptr++;    /* Point after the escape sequence */
3182    }    }
# Line 3108  if ((options & PCRE_EXTENDED) != 0) Line 3200  if ((options & PCRE_EXTENDED) != 0)
3200      if (*ptr == CHAR_NUMBER_SIGN)      if (*ptr == CHAR_NUMBER_SIGN)
3201        {        {
3202        ptr++;        ptr++;
3203        while (*ptr != 0)        while (*ptr != CHAR_NULL)
3204          {          {
3205          if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }          if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }
3206          ptr++;          ptr++;
# Line 3129  if (*ptr == CHAR_ASTERISK || *ptr == CHA Line 3221  if (*ptr == CHAR_ASTERISK || *ptr == CHA
3221    
3222  /* If the previous item is a character, get its value. */  /* If the previous item is a character, get its value. */
3223    
3224  if (op_code == OP_CHAR || op_code == OP_CHARI ||  if (op_code == OP_CHAR || op_code == OP_CHARI ||
3225      op_code == OP_NOT || op_code == OP_NOTI)      op_code == OP_NOT || op_code == OP_NOTI)
   //if (escape == 0) switch(op_code)  
3226    {    {
3227  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
3228    GETCHARTEST(c, previous);    GETCHARTEST(c, previous);
# Line 3147  if (escape == 0) Line 3238  if (escape == 0)
3238    {    {
3239    /* For a caseless UTF match, the next character may have more than one other    /* For a caseless UTF match, the next character may have more than one other
3240    case, which maps to the special PT_CLIST property. Check this first. */    case, which maps to the special PT_CLIST property. Check this first. */
3241    
3242  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3243    if (utf && c != NOTACHAR && (options & PCRE_CASELESS) != 0)    if (utf && c != NOTACHAR && (options & PCRE_CASELESS) != 0)
3244      {      {
3245      int ocs = UCD_CASESET(next);      unsigned int ocs = UCD_CASESET(next);
3246      if (ocs > 0) return check_char_prop(c, PT_CLIST, ocs, op_code >= OP_NOT);      if (ocs > 0) return check_char_prop(c, PT_CLIST, ocs, op_code >= OP_NOT);
3247      }      }
3248  #endif  #endif
# Line 3163  if (escape == 0) Line 3254  if (escape == 0)
3254    
3255      /* For CHARI (caseless character) we must check the other case. If we have      /* For CHARI (caseless character) we must check the other case. If we have
3256      Unicode property support, we can use it to test the other case of      Unicode property support, we can use it to test the other case of
3257      high-valued characters. We know that next can have only one other case,      high-valued characters. We know that next can have only one other case,
3258      because multi-other-case characters are dealt with above. */      because multi-other-case characters are dealt with above. */
3259    
3260      case OP_CHARI:      case OP_CHARI:
# Line 3183  if (escape == 0) Line 3274  if (escape == 0)
3274      else      else
3275  #endif  /* SUPPORT_UTF */  #endif  /* SUPPORT_UTF */
3276      return (c != TABLE_GET(next, cd->fcc, next));  /* Not UTF */      return (c != TABLE_GET(next, cd->fcc, next));  /* Not UTF */
3277    
3278      case OP_NOT:      case OP_NOT:
3279      return c == next;      return c == next;
3280    
3281      case OP_NOTI:      case OP_NOTI:
3282      if (c == next) return TRUE;      if (c == next) return TRUE;
3283  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
# Line 3230  if (escape == 0) Line 3321  if (escape == 0)
3321      case OP_NOT_HSPACE:      case OP_NOT_HSPACE:
3322      switch(next)      switch(next)
3323        {        {
3324        HSPACE_CASES:        HSPACE_CASES:
3325        return op_code == OP_NOT_HSPACE;        return op_code == OP_NOT_HSPACE;
3326    
3327        default:        default:
# Line 3242  if (escape == 0) Line 3333  if (escape == 0)
3333      case OP_NOT_VSPACE:      case OP_NOT_VSPACE:
3334      switch(next)      switch(next)
3335        {        {
3336        VSPACE_CASES:        VSPACE_CASES:
3337        return op_code == OP_NOT_VSPACE;        return op_code == OP_NOT_VSPACE;
3338    
3339        default:        default:
# Line 3296  switch(op_code) Line 3387  switch(op_code)
3387      case ESC_H:      case ESC_H:
3388      switch(c)      switch(c)
3389        {        {
3390        HSPACE_CASES:        HSPACE_CASES:
3391        return escape != ESC_h;        return escape != ESC_h;
3392    
3393        default:        default:
3394        return escape == ESC_h;        return escape == ESC_h;
3395        }        }
# Line 3307  switch(op_code) Line 3398  switch(op_code)
3398      case ESC_V:      case ESC_V:
3399      switch(c)      switch(c)
3400        {        {
3401        VSPACE_CASES:        VSPACE_CASES:
3402        return escape != ESC_v;        return escape != ESC_v;
3403    
3404        default:        default:
# Line 3337  switch(op_code) Line 3428  switch(op_code)
3428      case ESC_p:      case ESC_p:
3429      case ESC_P:      case ESC_P:
3430        {        {
3431        int ptype, pdata, errorcodeptr;        unsigned int ptype = 0, pdata = 0;
3432          int errorcodeptr;
3433        BOOL negated;        BOOL negated;
3434    
3435        ptr--;      /* Make ptr point at the p or P */        ptr--;      /* Make ptr point at the p or P */
3436        ptype = get_ucp(&ptr, &negated, &pdata, &errorcodeptr);        if (!get_ucp(&ptr, &negated, &ptype, &pdata, &errorcodeptr))
3437        if (ptype < 0) return FALSE;          return FALSE;
3438        ptr++;      /* Point past the final curly ket */        ptr++;      /* Point past the final curly ket */
3439    
3440        /* If the property item is optional, we have to give up. (When generated        /* If the property item is optional, we have to give up. (When generated
# Line 3418  switch(op_code) Line 3510  switch(op_code)
3510  *************************************************/  *************************************************/
3511    
3512  /* This function packages up the logic of adding a character or range of  /* This function packages up the logic of adding a character or range of
3513  characters to a class. The character values in the arguments will be within the  characters to a class. The character values in the arguments will be within the
3514  valid values for the current mode (8-bit, 16-bit, UTF, etc). This function is  valid values for the current mode (8-bit, 16-bit, UTF, etc). This function is
3515  mutually recursive with the function immediately below.  mutually recursive with the function immediately below.
3516    
3517  Arguments:  Arguments:
3518    classbits     the bit map for characters < 256    classbits     the bit map for characters < 256
3519    uchardptr     points to the pointer for extra data    uchardptr     points to the pointer for extra data
3520    options       the options word    options       the options word
3521    cd            contains pointers to tables etc.    cd            contains pointers to tables etc.
3522    start         start of range character    start         start of range character
3523    end           end of range character    end           end of range character
3524    
3525  Returns:        the number of < 256 characters added  Returns:        the number of < 256 characters added
3526                  the pointer to extra data is updated                  the pointer to extra data is updated
3527  */  */
# Line 3441  add_to_class(pcre_uint8 *classbits, pcre Line 3533  add_to_class(pcre_uint8 *classbits, pcre
3533  pcre_uint32 c;  pcre_uint32 c;
3534  int n8 = 0;  int n8 = 0;
3535    
3536  /* If caseless matching is required, scan the range and process alternate  /* If caseless matching is required, scan the range and process alternate
3537  cases. In Unicode, there are 8-bit characters that have alternate cases that  cases. In Unicode, there are 8-bit characters that have alternate cases that
3538  are greater than 255 and vice-versa. Sometimes we can just extend the original  are greater than 255 and vice-versa. Sometimes we can just extend the original
3539  range. */  range. */
3540    
3541  if ((options & PCRE_CASELESS) != 0)  if ((options & PCRE_CASELESS) != 0)
3542    {    {
3543  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3544    if ((options & PCRE_UTF8) != 0)    if ((options & PCRE_UTF8) != 0)
3545      {      {
3546      int rc;      int rc;
3547      pcre_uint32 oc, od;      pcre_uint32 oc, od;
3548    
3549      options &= ~PCRE_CASELESS;   /* Remove for recursive calls */      options &= ~PCRE_CASELESS;   /* Remove for recursive calls */
3550      c = start;      c = start;
3551    
3552      while ((rc = get_othercase_range(&c, end, &oc, &od)) >= 0)      while ((rc = get_othercase_range(&c, end, &oc, &od)) >= 0)
3553        {        {
3554        /* Handle a single character that has more than one other case. */        /* Handle a single character that has more than one other case. */
3555    
3556        if (rc > 0) n8 += add_list_to_class(classbits, uchardptr, options, cd,        if (rc > 0) n8 += add_list_to_class(classbits, uchardptr, options, cd,
3557          PRIV(ucd_caseless_sets) + rc, oc);          PRIV(ucd_caseless_sets) + rc, oc);
3558    
3559        /* Do nothing if the other case range is within the original range. */        /* Do nothing if the other case range is within the original range. */
3560    
3561        else if (oc >= start && od <= end) continue;        else if (oc >= start && od <= end) continue;
3562    
3563        /* Extend the original range if there is overlap, noting that if oc < c, we        /* Extend the original range if there is overlap, noting that if oc < c, we
3564        can't have od > end because a subrange is always shorter than the basic        can't have od > end because a subrange is always shorter than the basic
3565        range. Otherwise, use a recursive call to add the additional range. */        range. Otherwise, use a recursive call to add the additional range. */
3566    
3567        else if (oc < start && od >= start - 1) start = oc; /* Extend downwards */        else if (oc < start && od >= start - 1) start = oc; /* Extend downwards */
3568        else if (od > end && oc <= end + 1) end = od;       /* Extend upwards */        else if (od > end && oc <= end + 1) end = od;       /* Extend upwards */
3569        else n8 += add_to_class(classbits, uchardptr, options, cd, oc, od);        else n8 += add_to_class(classbits, uchardptr, options, cd, oc, od);
# Line 3481  if ((options & PCRE_CASELESS) != 0) Line 3573  if ((options & PCRE_CASELESS) != 0)
3573  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
3574    
3575    /* Not UTF-mode, or no UCP */    /* Not UTF-mode, or no UCP */
3576    
3577    for (c = start; c <= end && c < 256; c++)    for (c = start; c <= end && c < 256; c++)
3578      {      {
3579      SETBIT(classbits, cd->fcc[c]);      SETBIT(classbits, cd->fcc[c]);
3580      n8++;      n8++;
3581      }      }
3582    }    }
3583    
3584  /* Now handle the original range. Adjust the final value according to the bit  /* Now handle the original range. Adjust the final value according to the bit
3585  length - this means that the same lists of (e.g.) horizontal spaces can be used  length - this means that the same lists of (e.g.) horizontal spaces can be used
3586  in all cases. */  in all cases. */
# Line 3514  if (end < 0x100) Line 3606  if (end < 0x100)
3606    {    {
3607    for (c = start; c <= end; c++)    for (c = start; c <= end; c++)
3608      {      {
3609      n8++;      n8++;
3610      SETBIT(classbits, c);      SETBIT(classbits, c);
3611      }      }
3612    }    }
3613    
3614  else  else
3615    {    {
3616    pcre_uchar *uchardata = *uchardptr;    pcre_uchar *uchardata = *uchardptr;
3617    
3618  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
3619    if ((options & PCRE_UTF8) != 0)  /* All UTFs use the same flag bit */    if ((options & PCRE_UTF8) != 0)  /* All UTFs use the same flag bit */
3620      {      {
3621      if (start < end)      if (start < end)
3622        {        {
3623        *uchardata++ = XCL_RANGE;        *uchardata++ = XCL_RANGE;
3624        uchardata += PRIV(ord2utf)(start, uchardata);        uchardata += PRIV(ord2utf)(start, uchardata);
3625        uchardata += PRIV(ord2utf)(end, uchardata);        uchardata += PRIV(ord2utf)(end, uchardata);
3626        }        }
3627      else if (start == end)      else if (start == end)
3628        {        {
3629        *uchardata++ = XCL_SINGLE;        *uchardata++ = XCL_SINGLE;
3630        uchardata += PRIV(ord2utf)(start, uchardata);        uchardata += PRIV(ord2utf)(start, uchardata);
3631        }        }
3632      }      }
3633    else    else
3634  #endif  /* SUPPORT_UTF */  #endif  /* SUPPORT_UTF */
3635    
3636    /* Without UTF support, character values are constrained by the bit length,    /* Without UTF support, character values are constrained by the bit length,
3637    and can only be > 256 for 16-bit and 32-bit libraries. */    and can only be > 256 for 16-bit and 32-bit libraries. */
3638    
3639  #ifdef COMPILE_PCRE8  #ifdef COMPILE_PCRE8
3640      {}      {}
3641  #else  #else
3642    if (start < end)    if (start < end)
3643      {      {
3644      *uchardata++ = XCL_RANGE;      *uchardata++ = XCL_RANGE;
# Line 3557  else Line 3649  else
3649      {      {
3650      *uchardata++ = XCL_SINGLE;      *uchardata++ = XCL_SINGLE;
3651      *uchardata++ = start;      *uchardata++ = start;
3652      }      }
3653  #endif  #endif
3654    
3655    *uchardptr = uchardata;   /* Updata extra data pointer */    *uchardptr = uchardata;   /* Updata extra data pointer */
3656    }    }
3657    
3658  return n8;    /* Number of 8-bit characters */  return n8;    /* Number of 8-bit characters */
3659  }  }
3660    
3661    
3662    
3663    
3664  /*************************************************  /*************************************************
3665  *        Add a list of characters to a class     *  *        Add a list of characters to a class     *
3666  *************************************************/  *************************************************/
3667    
3668  /* This function is used for adding a list of case-equivalent characters to a  /* This function is used for adding a list of case-equivalent characters to a
3669  class, and also for adding a list of horizontal or vertical whitespace. If the  class, and also for adding a list of horizontal or vertical whitespace. If the
3670  list is in order (which it should be), ranges of characters are detected and  list is in order (which it should be), ranges of characters are detected and
3671  handled appropriately. This function is mutually recursive with the function  handled appropriately. This function is mutually recursive with the function
# Line 3583  Arguments: Line 3675  Arguments:
3675    classbits     the bit map for characters < 256    classbits     the bit map for characters < 256
3676    uchardptr     points to the pointer for extra data    uchardptr     points to the pointer for extra data
3677    options       the options word    options       the options word
3678    cd            contains pointers to tables etc.    cd            contains pointers to tables etc.
3679    p             points to row of 32-bit values, terminated by NOTACHAR    p             points to row of 32-bit values, terminated by NOTACHAR
3680    except        character to omit; this is used when adding lists of    except        character to omit; this is used when adding lists of
3681                    case-equivalent characters to avoid including the one we                    case-equivalent characters to avoid including the one we
3682                    already know about                    already know about
3683    
3684  Returns:        the number of < 256 characters added  Returns:        the number of < 256 characters added
3685                  the pointer to extra data is updated                  the pointer to extra data is updated
3686  */  */
# Line 3602  while (p[0] < NOTACHAR) Line 3694  while (p[0] < NOTACHAR)
3694    {    {
3695    int n = 0;    int n = 0;
3696    if (p[0] != except)    if (p[0] != except)
3697      {      {
3698      while(p[n+1] == p[0] + n + 1) n++;      while(p[n+1] == p[0] + n + 1) n++;
3699      n8 += add_to_class(classbits, uchardptr, options, cd, p[0], p[n]);      n8 += add_to_class(classbits, uchardptr, options, cd, p[0], p[n]);
3700      }      }
3701    p += n + 1;    p += n + 1;
3702    }    }
3703  return n8;  return n8;
3704  }  }
3705    
3706    
3707    
# Line 3624  Arguments: Line 3716  Arguments:
3716    classbits     the bit map for characters < 256    classbits     the bit map for characters < 256
3717    uchardptr     points to the pointer for extra data    uchardptr     points to the pointer for extra data
3718    options       the options word    options       the options word
3719    cd            contains pointers to tables etc.    cd            contains pointers to tables etc.
3720    p             points to row of 32-bit values, terminated by NOTACHAR    p             points to row of 32-bit values, terminated by NOTACHAR
3721    
3722  Returns:        the number of < 256 characters added  Returns:        the number of < 256 characters added
3723                  the pointer to extra data is updated                  the pointer to extra data is updated
3724  */  */
3725    
3726  static int  static int
3727  add_not_list_to_class(pcre_uint8 *classbits, pcre_uchar **uchardptr,  add_not_list_to_class(pcre_uint8 *classbits, pcre_uchar **uchardptr,
3728    int options, compile_data *cd, const pcre_uint32 *p)    int options, compile_data *cd, const pcre_uint32 *p)
3729  {  {
3730  BOOL utf = (options & PCRE_UTF8) != 0;  BOOL utf = (options & PCRE_UTF8) != 0;
# Line 3644  while (p[0] < NOTACHAR) Line 3736  while (p[0] < NOTACHAR)
3736    while (p[1] == p[0] + 1) p++;    while (p[1] == p[0] + 1) p++;
3737    n8 += add_to_class(classbits, uchardptr, options, cd, p[0] + 1,    n8 += add_to_class(classbits, uchardptr, options, cd, p[0] + 1,
3738      (p[1] == NOTACHAR) ? (utf ? 0x10ffffu : 0xffffffffu) : p[1] - 1);      (p[1] == NOTACHAR) ? (utf ? 0x10ffffu : 0xffffffffu) : p[1] - 1);
3739    p++;    p++;
3740    }    }
3741  return n8;  return n8;
3742  }  }
3743    
3744    
3745    
# Line 3666  Arguments: Line 3758  Arguments:
3758    codeptr        points to the pointer to the current code point    codeptr        points to the pointer to the current code point
3759    ptrptr         points to the current pattern pointer    ptrptr         points to the current pattern pointer
3760    errorcodeptr   points to error code variable    errorcodeptr   points to error code variable
3761    firstcharptr   set to initial literal character, or < 0 (REQ_UNSET, REQ_NONE)    firstcharptr    place to put the first required character
3762    reqcharptr     set to the last literal character required, else < 0    firstcharflagsptr place to put the first character flags, or a negative number
3763      reqcharptr     place to put the last required character
3764      reqcharflagsptr place to put the last required character flags, or a negative number
3765    bcptr          points to current branch chain    bcptr          points to current branch chain
3766    cond_depth     conditional nesting depth    cond_depth     conditional nesting depth
3767    cd             contains pointers to tables etc.    cd             contains pointers to tables etc.
# Line 3680  Returns:         TRUE on success Line 3774  Returns:         TRUE on success
3774    
3775  static BOOL  static BOOL
3776  compile_branch(int *optionsptr, pcre_uchar **codeptr,  compile_branch(int *optionsptr, pcre_uchar **codeptr,
3777    const pcre_uchar **ptrptr, int *errorcodeptr, pcre_int32 *firstcharptr,    const pcre_uchar **ptrptr, int *errorcodeptr,
3778    pcre_int32 *reqcharptr, branch_chain *bcptr, int cond_depth,    pcre_uint32 *firstcharptr, pcre_int32 *firstcharflagsptr,
3779      pcre_uint32 *reqcharptr, pcre_int32 *reqcharflagsptr,
3780      branch_chain *bcptr, int cond_depth,
3781    compile_data *cd, int *lengthptr)    compile_data *cd, int *lengthptr)
3782  {  {
3783  int repeat_type, op_type;  int repeat_type, op_type;
3784  int repeat_min = 0, repeat_max = 0;      /* To please picky compilers */  int repeat_min = 0, repeat_max = 0;      /* To please picky compilers */
3785  int bravalue = 0;  int bravalue = 0;
3786  int greedy_default, greedy_non_default;  int greedy_default, greedy_non_default;
3787  pcre_int32 firstchar, reqchar;  pcre_uint32 firstchar, reqchar;
3788  pcre_int32 zeroreqchar, zerofirstchar;  pcre_int32 firstcharflags, reqcharflags;
3789    pcre_uint32 zeroreqchar, zerofirstchar;
3790    pcre_int32 zeroreqcharflags, zerofirstcharflags;
3791  pcre_int32 req_caseopt, reqvary, tempreqvary;  pcre_int32 req_caseopt, reqvary, tempreqvary;
3792  int options = *optionsptr;               /* May change dynamically */  int options = *optionsptr;               /* May change dynamically */
3793  int after_manual_callout = 0;  int after_manual_callout = 0;
# Line 3717  dynamically as we process the pattern. * Line 3815  dynamically as we process the pattern. *
3815  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
3816  /* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */  /* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */
3817  BOOL utf = (options & PCRE_UTF8) != 0;  BOOL utf = (options & PCRE_UTF8) != 0;
3818    #ifndef COMPILE_PCRE32
3819  pcre_uchar utf_chars[6];  pcre_uchar utf_chars[6];
3820    #endif
3821  #else  #else
3822  BOOL utf = FALSE;  BOOL utf = FALSE;
3823  #endif  #endif
3824    
3825  /* Helper variables for OP_XCLASS opcode (for characters > 255). We define  /* Helper variables for OP_XCLASS opcode (for characters > 255). We define
3826  class_uchardata always so that it can be passed to add_to_class() always,  class_uchardata always so that it can be passed to add_to_class() always,
3827  though it will not be used in non-UTF 8-bit cases. This avoids having to supply  though it will not be used in non-UTF 8-bit cases. This avoids having to supply
3828  alternative calls for the different cases. */  alternative calls for the different cases. */
3829    
3830  pcre_uchar *class_uchardata;  pcre_uchar *class_uchardata;
# Line 3752  to take the zero repeat into account. Th Line 3852  to take the zero repeat into account. Th
3852  zerofirstbyte and zeroreqchar when such a repeat is encountered. The individual  zerofirstbyte and zeroreqchar when such a repeat is encountered. The individual
3853  item types that can be repeated set these backoff variables appropriately. */  item types that can be repeated set these backoff variables appropriately. */
3854    
3855  firstchar = reqchar = zerofirstchar = zeroreqchar = REQ_UNSET;  firstchar = reqchar = zerofirstchar = zeroreqchar = 0;
3856    firstcharflags = reqcharflags = zerofirstcharflags = zeroreqcharflags = REQ_UNSET;
3857    
3858  /* The variable req_caseopt contains either the REQ_CASELESS value  /* The variable req_caseopt contains either the REQ_CASELESS value
3859  or zero, according to the current setting of the caseless flag. The  or zero, according to the current setting of the caseless flag. The
# Line 3778  for (;; ptr++) Line 3879  for (;; ptr++)
3879    int recno;    int recno;
3880    int refsign;    int refsign;
3881    int skipbytes;    int skipbytes;
3882    int subreqchar;    pcre_uint32 subreqchar, subfirstchar;
3883    int subfirstchar;    pcre_int32 subreqcharflags, subfirstcharflags;
3884    int terminator;    int terminator;
3885    int mclength;    unsigned int mclength;
3886    int tempbracount;    unsigned int tempbracount;
3887    int ec; // FIXMEchpe pcre_uint32    pcre_uint32 ec;
3888    pcre_uchar mcbuffer[8];    pcre_uchar mcbuffer[8];
3889    
3890    /* Get next character in the pattern */    /* Get next character in the pattern */
# Line 3793  for (;; ptr++) Line 3894  for (;; ptr++)
3894    /* If we are at the end of a nested substitution, revert to the outer level    /* If we are at the end of a nested substitution, revert to the outer level
3895    string. Nesting only happens one level deep. */    string. Nesting only happens one level deep. */
3896    
3897    if (c == 0 && nestptr != NULL)    if (c == CHAR_NULL && nestptr != NULL)
3898      {      {
3899      ptr = nestptr;      ptr = nestptr;
3900      nestptr = NULL;      nestptr = NULL;
# Line 3868  for (;; ptr++) Line 3969  for (;; ptr++)
3969    
3970    /* If in \Q...\E, check for the end; if not, we have a literal */    /* If in \Q...\E, check for the end; if not, we have a literal */
3971    
3972    if (inescq && c != 0)    if (inescq && c != CHAR_NULL)
3973      {      {
3974      if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E)      if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E)
3975        {        {
# Line 3916  for (;; ptr++) Line 4017  for (;; ptr++)
4017      if (c == CHAR_NUMBER_SIGN)      if (c == CHAR_NUMBER_SIGN)
4018        {        {
4019        ptr++;        ptr++;
4020        while (*ptr != 0)        while (*ptr != CHAR_NULL)
4021          {          {
4022          if (IS_NEWLINE(ptr)) { ptr += cd->nllen - 1; break; }          if (IS_NEWLINE(ptr)) { ptr += cd->nllen - 1; break; }
4023          ptr++;          ptr++;
# Line 3924  for (;; ptr++) Line 4025  for (;; ptr++)
4025          if (utf) FORWARDCHAR(ptr);          if (utf) FORWARDCHAR(ptr);
4026  #endif  #endif
4027          }          }
4028        if (*ptr != 0) continue;        if (*ptr != CHAR_NULL) continue;
4029    
4030        /* Else fall through to handle end of string */        /* Else fall through to handle end of string */
4031        c = 0;        c = 0;
# Line 3946  for (;; ptr++) Line 4047  for (;; ptr++)
4047      case CHAR_VERTICAL_LINE:       /* or | or ) */      case CHAR_VERTICAL_LINE:       /* or | or ) */
4048      case CHAR_RIGHT_PARENTHESIS:      case CHAR_RIGHT_PARENTHESIS:
4049      *firstcharptr = firstchar;      *firstcharptr = firstchar;
4050        *firstcharflagsptr = firstcharflags;
4051      *reqcharptr = reqchar;      *reqcharptr = reqchar;
4052        *reqcharflagsptr = reqcharflags;
4053      *codeptr = code;      *codeptr = code;
4054      *ptrptr = ptr;      *ptrptr = ptr;
4055      if (lengthptr != NULL)      if (lengthptr != NULL)
# Line 3970  for (;; ptr++) Line 4073  for (;; ptr++)
4073      previous = NULL;      previous = NULL;
4074      if ((options & PCRE_MULTILINE) != 0)      if ((options & PCRE_MULTILINE) != 0)
4075        {        {
4076        if (firstchar == REQ_UNSET) firstchar = REQ_NONE;        if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;
4077        *code++ = OP_CIRCM;        *code++ = OP_CIRCM;
4078        }        }
4079      else *code++ = OP_CIRC;      else *code++ = OP_CIRC;
# Line 3985  for (;; ptr++) Line 4088  for (;; ptr++)
4088      repeats. The value of reqchar doesn't change either. */      repeats. The value of reqchar doesn't change either. */
4089    
4090      case CHAR_DOT:      case CHAR_DOT:
4091      if (firstchar == REQ_UNSET) firstchar = REQ_NONE;      if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;
4092      zerofirstchar = firstchar;      zerofirstchar = firstchar;
4093        zerofirstcharflags = firstcharflags;
4094      zeroreqchar = reqchar;      zeroreqchar = reqchar;
4095        zeroreqcharflags = reqcharflags;
4096      previous = code;      previous = code;
4097      *code++ = ((options & PCRE_DOTALL) != 0)? OP_ALLANY: OP_ANY;      *code++ = ((options & PCRE_DOTALL) != 0)? OP_ALLANY: OP_ANY;
4098      break;      break;
# Line 4061  for (;; ptr++) Line 4166  for (;; ptr++)
4166          (cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0)          (cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0)
4167        {        {
4168        *code++ = negate_class? OP_ALLANY : OP_FAIL;        *code++ = negate_class? OP_ALLANY : OP_FAIL;
4169        if (firstchar == REQ_UNSET) firstchar = REQ_NONE;        if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;
4170        zerofirstchar = firstchar;        zerofirstchar = firstchar;
4171          zerofirstcharflags = firstcharflags;
4172        break;        break;
4173        }        }
4174    
# Line 4097  for (;; ptr++) Line 4203  for (;; ptr++)
4203      means that an initial ] is taken as a data character. At the start of the      means that an initial ] is taken as a data character. At the start of the
4204      loop, c contains the first byte of the character. */      loop, c contains the first byte of the character. */
4205    
4206      if (c != 0) do      if (c != CHAR_NULL) do
4207        {        {
4208        const pcre_uchar *oldptr;        const pcre_uchar *oldptr;
4209    
4210  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32  #ifdef SUPPORT_UTF
4211        if (utf && HAS_EXTRALEN(c))        if (utf && HAS_EXTRALEN(c))
4212          {                           /* Braces are required because the */          {                           /* Braces are required because the */
4213          GETCHARLEN(c, ptr, ptr);    /* macro generates multiple statements */          GETCHARLEN(c, ptr, ptr);    /* macro generates multiple statements */
# Line 4112  for (;; ptr++) Line 4218  for (;; ptr++)
4218        /* In the pre-compile phase, accumulate the length of any extra        /* In the pre-compile phase, accumulate the length of any extra
4219        data and reset the pointer. This is so that very large classes that        data and reset the pointer. This is so that very large classes that
4220        contain a zillion > 255 characters no longer overwrite the work space        contain a zillion > 255 characters no longer overwrite the work space
4221        (which is on the stack). We have to remember that there was XCLASS data,        (which is on the stack). We have to remember that there was XCLASS data,
4222        however. */        however. */
4223    
4224        if (lengthptr != NULL && class_uchardata > class_uchardata_base)        if (lengthptr != NULL && class_uchardata > class_uchardata_base)
# Line 4176  for (;; ptr++) Line 4282  for (;; ptr++)
4282          alpha. This relies on the fact that the class table starts with          alpha. This relies on the fact that the class table starts with
4283          alpha, lower, upper as the first 3 entries. */          alpha, lower, upper as the first 3 entries. */
4284    
4285          if ((options & PCRE_CASELESS) != 0 && posix_class <= 2)          if ((options & PCRE_CASELESS) != 0 && posix_class <= 2)
4286            posix_class = 0;            posix_class = 0;
4287    
4288          /* When PCRE_UCP is set, some of the POSIX classes are converted to          /* When PCRE_UCP is set, some of the POSIX classes are converted to
# Line 4253  for (;; ptr++) Line 4359  for (;; ptr++)
4359    
4360        if (c == CHAR_BACKSLASH)        if (c == CHAR_BACKSLASH)
4361          {          {
4362          escape = check_escape(&ptr, &ec, errorcodeptr, cd->bracount, options, TRUE);          escape = check_escape(&ptr, &ec, errorcodeptr, cd->bracount, options,
4363              TRUE);
4364          if (*errorcodeptr != 0) goto FAILED;          if (*errorcodeptr != 0) goto FAILED;
4365            if (escape == 0) c = ec;
         if (escape == 0)  
           c = ec;  
4366          else if (escape == ESC_b) c = CHAR_BS; /* \b is backspace in a class */          else if (escape == ESC_b) c = CHAR_BS; /* \b is backspace in a class */
4367          else if (escape == ESC_N)            /* \N is not supported in a class */          else if (escape == ESC_N)          /* \N is not supported in a class */
4368            {            {
4369            *errorcodeptr = ERR71;            *errorcodeptr = ERR71;
4370            goto FAILED;            goto FAILED;
# Line 4332  for (;; ptr++) Line 4436  for (;; ptr++)
4436              for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_space];              for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_space];
4437              classbits[1] |= 0x08;    /* Perl 5.004 onwards omits VT from \s */              classbits[1] |= 0x08;    /* Perl 5.004 onwards omits VT from \s */
4438              continue;              continue;
4439    
4440              /* The rest apply in both UCP and non-UCP cases. */              /* The rest apply in both UCP and non-UCP cases. */
4441    
4442              case ESC_h:              case ESC_h:
4443              (void)add_list_to_class(classbits, &class_uchardata, options, cd,              (void)add_list_to_class(classbits, &class_uchardata, options, cd,
4444                PRIV(hspace_list), NOTACHAR);                PRIV(hspace_list), NOTACHAR);
4445              continue;              continue;
4446    
4447              case ESC_H:              case ESC_H:
4448              (void)add_not_list_to_class(classbits, &class_uchardata, options,              (void)add_not_list_to_class(classbits, &class_uchardata, options,
4449                cd, PRIV(hspace_list));                cd, PRIV(hspace_list));
4450              continue;              continue;
4451    
4452              case ESC_v:              case ESC_v:
4453              (void)add_list_to_class(classbits, &class_uchardata, options, cd,              (void)add_list_to_class(classbits, &class_uchardata, options, cd,
4454                PRIV(vspace_list), NOTACHAR);                PRIV(vspace_list), NOTACHAR);
4455              continue;              continue;
4456    
4457              case ESC_V:              case ESC_V:
4458              (void)add_not_list_to_class(classbits, &class_uchardata, options,              (void)add_not_list_to_class(classbits, &class_uchardata, options,
4459                cd, PRIV(vspace_list));                cd, PRIV(vspace_list));
4460              continue;              continue;
4461    
4462  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 4360  for (;; ptr++) Line 4464  for (;; ptr++)
4464              case ESC_P:              case ESC_P:
4465                {                {
4466                BOOL negated;                BOOL negated;
4467                int pdata;                unsigned int ptype = 0, pdata = 0;
4468                int ptype = get_ucp(&ptr, &negated, &pdata, errorcodeptr);                if (!get_ucp(&ptr, &negated, &ptype, &pdata, errorcodeptr))
4469                if (ptype < 0) goto FAILED;                  goto FAILED;
4470                *class_uchardata++ = ((escape == ESC_p) != negated)?                *class_uchardata++ = ((escape == ESC_p) != negated)?
4471                  XCL_PROP : XCL_NOTPROP;                  XCL_PROP : XCL_NOTPROP;
4472                *class_uchardata++ = ptype;                *class_uchardata++ = ptype;
# Line 4390  for (;; ptr++) Line 4494  for (;; ptr++)
4494    
4495          /* Fall through if the escape just defined a single character (c >= 0).          /* Fall through if the escape just defined a single character (c >= 0).
4496          This may be greater than 256. */          This may be greater than 256. */
4497    
4498          escape = 0;          escape = 0;
4499    
4500          }   /* End of backslash handling */          }   /* End of backslash handling */
# Line 4416  for (;; ptr++) Line 4520  for (;; ptr++)
4520    
4521        if (!inescq && ptr[1] == CHAR_MINUS)        if (!inescq && ptr[1] == CHAR_MINUS)
4522          {          {
4523          int d;          pcre_uint32 d;
4524          ptr += 2;          ptr += 2;
4525          while (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_E) ptr += 2;          while (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_E) ptr += 2;
4526    
# Line 4431  for (;; ptr++) Line 4535  for (;; ptr++)
4535            inescq = TRUE;            inescq = TRUE;
4536            break;            break;
4537            }            }
4538    
4539          /* Minus (hyphen) at the end of a class is treated as a literal, so put          /* Minus (hyphen) at the end of a class is treated as a literal, so put
4540          back the pointer and jump to handle the character that preceded it. */          back the pointer and jump to handle the character that preceded it. */
4541    
4542          if (*ptr == 0 || (!inescq && *ptr == CHAR_RIGHT_SQUARE_BRACKET))          if (*ptr == CHAR_NULL || (!inescq && *ptr == CHAR_RIGHT_SQUARE_BRACKET))
4543            {            {
4544            ptr = oldptr;            ptr = oldptr;
4545            goto CLASS_SINGLE_CHARACTER;            goto CLASS_SINGLE_CHARACTER;
4546            }            }
4547    
4548          /* Otherwise, we have a potential range; pick up the next character */          /* Otherwise, we have a potential range; pick up the next character */
4549    
4550  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
# Line 4487  for (;; ptr++) Line 4591  for (;; ptr++)
4591          /* We have found a character range, so single character optimizations          /* We have found a character range, so single character optimizations
4592          cannot be done anymore. Any value greater than 1 indicates that there          cannot be done anymore. Any value greater than 1 indicates that there
4593          is more than one character. */          is more than one character. */
4594    
4595          class_one_char = 2;          class_one_char = 2;
4596    
4597          /* Remember an explicit \r or \n, and add the range to the class. */          /* Remember an explicit \r or \n, and add the range to the class. */
4598    
4599          if (d == CHAR_CR || d == CHAR_NL) cd->external_flags |= PCRE_HASCRORLF;          if (d == CHAR_CR || d == CHAR_NL) cd->external_flags |= PCRE_HASCRORLF;
4600    
4601          class_has_8bitchar +=          class_has_8bitchar +=
4602            add_to_class(classbits, &class_uchardata, options, cd, c, d);            add_to_class(classbits, &class_uchardata, options, cd, c, d);
4603    
4604          continue;   /* Go get the next char in the class */          continue;   /* Go get the next char in the class */
4605          }          }
4606    
4607        /* Handle a single character - we can get here for a normal non-escape        /* Handle a single character - we can get here for a normal non-escape
4608        char, or after \ that introduces a single character or for an apparent        char, or after \ that introduces a single character or for an apparent
4609        range that isn't. Only the value 1 matters for class_one_char, so don't        range that isn't. Only the value 1 matters for class_one_char, so don't
4610        increase it if it is already 2 or more ... just in case there's a class        increase it if it is already 2 or more ... just in case there's a class
4611        with a zillion characters in it. */        with a zillion characters in it. */
4612    
4613        CLASS_SINGLE_CHARACTER:        CLASS_SINGLE_CHARACTER:
# Line 4522  for (;; ptr++) Line 4626  for (;; ptr++)
4626          {          {
4627          ptr++;          ptr++;
4628          zeroreqchar = reqchar;          zeroreqchar = reqchar;
4629            zeroreqcharflags = reqcharflags;
4630    
4631          if (negate_class)          if (negate_class)
4632            {            {
4633  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
             // FIXMEchpe pcreuint32?  
4634            int d;            int d;
4635  #endif  #endif
4636            if (firstchar == REQ_UNSET) firstchar = REQ_NONE;            if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;
4637            zerofirstchar = firstchar;            zerofirstchar = firstchar;
4638              zerofirstcharflags = firstcharflags;
4639    
4640            /* For caseless UTF-8 mode when UCP support is available, check            /* For caseless UTF-8 mode when UCP support is available, check
4641            whether this character has more than one other case. If so, generate            whether this character has more than one other case. If so, generate
4642            a special OP_NOTPROP item instead of OP_NOTI. */            a special OP_NOTPROP item instead of OP_NOTI. */
4643    
4644  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
4645            if (utf && (options & PCRE_CASELESS) != 0 &&            if (utf && (options & PCRE_CASELESS) != 0 &&
4646                (d = UCD_CASESET(c)) != 0)                (d = UCD_CASESET(c)) != 0)
4647              {              {
4648              *code++ = OP_NOTPROP;              *code++ = OP_NOTPROP;
4649              *code++ = PT_CLIST;              *code++ = PT_CLIST;
4650              *code++ = d;              *code++ = d;
4651              }              }
4652            else            else
4653  #endif  #endif
4654            /* Char has only one other case, or UCP not available */            /* Char has only one other case, or UCP not available */
4655    
# Line 4557  for (;; ptr++) Line 4662  for (;; ptr++)
4662  #endif  #endif
4663                *code++ = c;                *code++ = c;
4664              }              }
4665    
4666            /* We are finished with this character class */            /* We are finished with this character class */
4667    
4668            goto END_CLASS;            goto END_CLASS;
4669            }            }
4670    
# Line 4577  for (;; ptr++) Line 4682  for (;; ptr++)
4682            }            }
4683          goto ONE_CHAR;          goto ONE_CHAR;
4684          }       /* End of 1-char optimization */          }       /* End of 1-char optimization */
4685    
4686        /* There is more than one character in the class, or an XCLASS item        /* There is more than one character in the class, or an XCLASS item
4687        has been generated. Add this character to the class. */        has been generated. Add this character to the class. */
4688    
4689        class_has_8bitchar +=        class_has_8bitchar +=
4690          add_to_class(classbits, &class_uchardata, options, cd, c, c);          add_to_class(classbits, &class_uchardata, options, cd, c, c);
4691        }        }
4692    
# Line 4589  for (;; ptr++) Line 4694  for (;; ptr++)
4694      If we are at the end of an internal nested string, revert to the outer      If we are at the end of an internal nested string, revert to the outer
4695      string. */      string. */
4696    
4697      while (((c = *(++ptr)) != 0 ||      while (((c = *(++ptr)) != CHAR_NULL ||
4698             (nestptr != NULL &&             (nestptr != NULL &&
4699               (ptr = nestptr, nestptr = NULL, c = *(++ptr)) != 0)) &&               (ptr = nestptr, nestptr = NULL, c = *(++ptr)) != CHAR_NULL)) &&
4700             (c != CHAR_RIGHT_SQUARE_BRACKET || inescq));             (c != CHAR_RIGHT_SQUARE_BRACKET || inescq));
4701    
4702      /* Check for missing terminating ']' */      /* Check for missing terminating ']' */
4703    
4704      if (c == 0)      if (c == CHAR_NULL)
4705        {        {
4706        *errorcodeptr = ERR6;        *errorcodeptr = ERR6;
4707        goto FAILED;        goto FAILED;
4708        }        }
4709    
4710      /* We will need an XCLASS if data has been placed in class_uchardata. In      /* We will need an XCLASS if data has been placed in class_uchardata. In
4711      the second phase this is a sufficient test. However, in the pre-compile      the second phase this is a sufficient test. However, in the pre-compile
4712      phase, class_uchardata gets emptied to prevent workspace overflow, so it      phase, class_uchardata gets emptied to prevent workspace overflow, so it
4713      only if the very last character in the class needs XCLASS will it contain      only if the very last character in the class needs XCLASS will it contain
4714      anything at this point. For this reason, xclass gets set TRUE above when      anything at this point. For this reason, xclass gets set TRUE above when
4715      uchar_classdata is emptied, and that's why this code is the way it is here      uchar_classdata is emptied, and that's why this code is the way it is here
4716      instead of just doing a test on class_uchardata below. */      instead of just doing a test on class_uchardata below. */
4717    
4718  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4719      if (class_uchardata > class_uchardata_base) xclass = TRUE;      if (class_uchardata > class_uchardata_base) xclass = TRUE;
4720  #endif  #endif
# Line 4618  for (;; ptr++) Line 4723  for (;; ptr++)
4723      setting, whatever the repeat count. Any reqchar setting must remain      setting, whatever the repeat count. Any reqchar setting must remain
4724      unchanged after any kind of repeat. */      unchanged after any kind of repeat. */
4725    
4726      if (firstchar == REQ_UNSET) firstchar = REQ_NONE;      if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;
4727      zerofirstchar = firstchar;      zerofirstchar = firstchar;
4728        zerofirstcharflags = firstcharflags;
4729      zeroreqchar = reqchar;      zeroreqchar = reqchar;
4730        zeroreqcharflags = reqcharflags;
4731    
4732      /* If there are characters with values > 255, we have to compile an      /* If there are characters with values > 255, we have to compile an
4733      extended class, with its own opcode, unless there was a negated special      extended class, with its own opcode, unless there was a negated special
# Line 4676  for (;; ptr++) Line 4783  for (;; ptr++)
4783        memcpy(code, classbits, 32);        memcpy(code, classbits, 32);
4784        }        }
4785      code += 32 / sizeof(pcre_uchar);      code += 32 / sizeof(pcre_uchar);
4786    
4787      END_CLASS:      END_CLASS:
4788      break;      break;
4789    
# Line 4715  for (;; ptr++) Line 4822  for (;; ptr++)
4822      if (repeat_min == 0)      if (repeat_min == 0)
4823        {        {
4824        firstchar = zerofirstchar;    /* Adjust for zero repeat */        firstchar = zerofirstchar;    /* Adjust for zero repeat */
4825          firstcharflags = zerofirstcharflags;
4826        reqchar = zeroreqchar;        /* Ditto */        reqchar = zeroreqchar;        /* Ditto */
4827          reqcharflags = zeroreqcharflags;
4828        }        }
4829    
4830      /* Remember whether this is a variable length repeat */      /* Remember whether this is a variable length repeat */
# Line 4818  for (;; ptr++) Line 4927  for (;; ptr++)
4927          {          {
4928          c = code[-1];          c = code[-1];
4929          if (*previous <= OP_CHARI && repeat_min > 1)          if (*previous <= OP_CHARI && repeat_min > 1)
4930            reqchar = c | req_caseopt | cd->req_varyopt;            {
4931              reqchar = c;
4932              reqcharflags = req_caseopt | cd->req_varyopt;
4933              }
4934          }          }
4935    
4936        /* If the repetition is unlimited, it pays to see if the next thing on        /* If the repetition is unlimited, it pays to see if the next thing on
# Line 4875  for (;; ptr++) Line 4987  for (;; ptr++)
4987    
4988        if (repeat_max == 0) goto END_REPEAT;        if (repeat_max == 0) goto END_REPEAT;
4989    
       /*--------------------------------------------------------------------*/  
       /* This code is obsolete from release 8.00; the restriction was finally  
       removed: */  
   
       /* All real repeats make it impossible to handle partial matching (maybe  
       one day we will be able to remove this restriction). */  
   
       /* if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL; */  
       /*--------------------------------------------------------------------*/  
   
4990        /* Combine the op_type with the repeat_type */        /* Combine the op_type with the repeat_type */
4991    
4992        repeat_type += op_type;        repeat_type += op_type;
# Line 5031  for (;; ptr++) Line 5133  for (;; ptr++)
5133          goto END_REPEAT;          goto END_REPEAT;
5134          }          }
5135    
       /*--------------------------------------------------------------------*/  
       /* This code is obsolete from release 8.00; the restriction was finally  
       removed: */  
   
       /* All real repeats make it impossible to handle partial matching (maybe  
       one day we will be able to remove this restriction). */  
   
       /* if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL; */  
       /*--------------------------------------------------------------------*/  
   
5136        if (repeat_min == 0 && repeat_max == -1)        if (repeat_min == 0 && repeat_max == -1)
5137          *code++ = OP_CRSTAR + repeat_type;          *code++ = OP_CRSTAR + repeat_type;
5138        else if (repeat_min == 1 && repeat_max == -1)        else if (repeat_min == 1 && repeat_max == -1)
# Line 5200  for (;; ptr++) Line 5292  for (;; ptr++)
5292    
5293            else            else
5294              {              {
5295              if (groupsetfirstchar && reqchar < 0) reqchar = firstchar;              if (groupsetfirstchar && reqcharflags < 0)
5296                  {
5297                  reqchar = firstchar;
5298                  reqcharflags = firstcharflags;
5299                  }
5300    
5301              for (i = 1; i < repeat_min; i++)              for (i = 1; i < repeat_min; i++)
5302                {                {
# Line 5379  for (;; ptr++) Line 5475  for (;; ptr++)
5475              pcre_uchar *scode = bracode;              pcre_uchar *scode = bracode;
5476              do              do
5477                {                {
5478                if (could_be_empty_branch(scode, ketcode, utf, cd))                if (could_be_empty_branch(scode, ketcode, utf, cd, NULL))
5479                  {                  {
5480                  *bracode += OP_SBRA - OP_BRA;                  *bracode += OP_SBRA - OP_BRA;
5481                  break;                  break;
# Line 5479  for (;; ptr++) Line 5575  for (;; ptr++)
5575        else if (*tempcode == OP_EXACT || *tempcode == OP_NOTEXACT)        else if (*tempcode == OP_EXACT || *tempcode == OP_NOTEXACT)
5576          {          {
5577          tempcode += PRIV(OP_lengths)[*tempcode];          tempcode += PRIV(OP_lengths)[*tempcode];
5578  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32  #ifdef SUPPORT_UTF
5579          if (utf && HAS_EXTRALEN(tempcode[-1]))          if (utf && HAS_EXTRALEN(tempcode[-1]))
5580            tempcode += GET_EXTRALEN(tempcode[-1]);            tempcode += GET_EXTRALEN(tempcode[-1]);
5581  #endif  #endif
# Line 5575  for (;; ptr++) Line 5671  for (;; ptr++)
5671        if (*ptr == CHAR_COLON)        if (*ptr == CHAR_COLON)
5672          {          {
5673          arg = ++ptr;          arg = ++ptr;
5674          while (*ptr != 0 && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;          while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
5675          arglen = (int)(ptr - arg);          arglen = (int)(ptr - arg);
5676          if ((unsigned int)arglen > MAX_MARK)          if ((unsigned int)arglen > MAX_MARK)
5677            {            {
# Line 5620  for (;; ptr++) Line 5716  for (;; ptr++)
5716                (cd->assert_depth > 0)? OP_ASSERT_ACCEPT : OP_ACCEPT;                (cd->assert_depth > 0)? OP_ASSERT_ACCEPT : OP_ACCEPT;
5717    
5718              /* Do not set firstchar after *ACCEPT */              /* Do not set firstchar after *ACCEPT */
5719              if (firstchar == REQ_UNSET) firstchar = REQ_NONE;              if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;
5720              }              }
5721    
5722            /* Handle other cases with/without an argument */            /* Handle other cases with/without an argument */
# Line 5689  for (;; ptr++) Line 5785  for (;; ptr++)
5785          {          {
5786          case CHAR_NUMBER_SIGN:                 /* Comment; skip to ket */          case CHAR_NUMBER_SIGN:                 /* Comment; skip to ket */
5787          ptr++;          ptr++;
5788          while (*ptr != 0 && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;          while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
5789          if (*ptr == 0)          if (*ptr == CHAR_NULL)
5790            {            {
5791            *errorcodeptr = ERR18;            *errorcodeptr = ERR18;
5792            goto FAILED;            goto FAILED;
# Line 5713  for (;; ptr++) Line 5809  for (;; ptr++)
5809          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
5810          case CHAR_LEFT_PARENTHESIS:          case CHAR_LEFT_PARENTHESIS:
5811          bravalue = OP_COND;       /* Conditional group */          bravalue = OP_COND;       /* Conditional group */
5812            tempptr = ptr;
5813    
5814          /* A condition can be an assertion, a number (referring to a numbered          /* A condition can be an assertion, a number (referring to a numbered
5815          group), a name (referring to a named group), or 'R', referring to          group), a name (referring to a named group), or 'R', referring to
# Line 5725  for (;; ptr++) Line 5822  for (;; ptr++)
5822          be the recursive thing or the name 'R' (and similarly for 'R' followed          be the recursive thing or the name 'R' (and similarly for 'R' followed
5823          by digits), and (b) a number could be a name that consists of digits.          by digits), and (b) a number could be a name that consists of digits.
5824          In both cases, we look for a name first; if not found, we try the other          In both cases, we look for a name first; if not found, we try the other
5825          cases. */          cases.
5826    
5827            For compatibility with auto-callouts, we allow a callout to be
5828            specified before a condition that is an assertion. First, check for the
5829            syntax of a callout; if found, adjust the temporary pointer that is
5830            used to check for an assertion condition. That's all that is needed! */
5831    
5832            if (ptr[1] == CHAR_QUESTION_MARK && ptr[2] == CHAR_C)
5833              {
5834              for (i = 3;; i++) if (!IS_DIGIT(ptr[i])) break;
5835              if (ptr[i] == CHAR_RIGHT_PARENTHESIS)
5836                tempptr += i + 1;
5837              }
5838    
5839          /* For conditions that are assertions, check the syntax, and then exit          /* For conditions that are assertions, check the syntax, and then exit
5840          the switch. This will take control down to where bracketed groups,          the switch. This will take control down to where bracketed groups,
5841          including assertions, are processed. */          including assertions, are processed. */
5842    
5843          if (ptr[1] == CHAR_QUESTION_MARK && (ptr[2] == CHAR_EQUALS_SIGN ||          if (tempptr[1] == CHAR_QUESTION_MARK &&
5844              ptr[2] == CHAR_EXCLAMATION_MARK || ptr[2] == CHAR_LESS_THAN_SIGN))                (tempptr[2] == CHAR_EQUALS_SIGN ||
5845                   tempptr[2] == CHAR_EXCLAMATION_MARK ||
5846                   tempptr[2] == CHAR_LESS_THAN_SIGN))
5847            break;            break;
5848    
5849          /* Most other conditions use OP_CREF (a couple change to OP_RREF          /* Most other conditions use OP_CREF (a couple change to OP_RREF
# Line 5766  for (;; ptr++) Line 5877  for (;; ptr++)
5877            }            }
5878          else          else
5879            {            {
5880            terminator = 0;            terminator = CHAR_NULL;
5881            if (ptr[1] == CHAR_MINUS || ptr[1] == CHAR_PLUS) refsign = *(++ptr);            if (ptr[1] == CHAR_MINUS || ptr[1] == CHAR_PLUS) refsign = *(++ptr);
5882            }            }
5883    
# Line 5786  for (;; ptr++) Line 5897  for (;; ptr++)
5897          while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_word) != 0)          while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_word) != 0)
5898            {            {
5899            if (recno >= 0)            if (recno >= 0)
5900              recno = (IS_DIGIT(*ptr))? recno * 10 + *ptr - CHAR_0 : -1;              recno = (IS_DIGIT(*ptr))? recno * 10 + (int)(*ptr - CHAR_0) : -1;
5901            ptr++;            ptr++;
5902            }            }
5903          namelen = (int)(ptr - name);          namelen = (int)(ptr - name);
5904    
5905          if ((terminator > 0 && *ptr++ != terminator) ||          if ((terminator > 0 && *ptr++ != (pcre_uchar)terminator) ||
5906              *ptr++ != CHAR_RIGHT_PARENTHESIS)              *ptr++ != CHAR_RIGHT_PARENTHESIS)
5907            {            {
5908            ptr--;      /* Error offset */            ptr--;      /* Error offset */
# Line 5856  for (;; ptr++) Line 5967  for (;; ptr++)
5967            code[1+LINK_SIZE]++;            code[1+LINK_SIZE]++;
5968            }            }
5969    
5970          /* If terminator == 0 it means that the name followed directly after          /* If terminator == CHAR_NULL it means that the name followed directly
5971          the opening parenthesis [e.g. (?(abc)...] and in this case there are          after the opening parenthesis [e.g. (?(abc)...] and in this case there
5972          some further alternatives to try. For the cases where terminator != 0          are some further alternatives to try. For the cases where terminator !=
5973          [things like (?(<name>... or (?('name')... or (?(R&name)... ] we have          0 [things like (?(<name>... or (?('name')... or (?(R&name)... ] we have
5974          now checked all the possibilities, so give an error. */          now checked all the possibilities, so give an error. */
5975    
5976          else if (terminator != 0)          else if (terminator != CHAR_NULL)
5977            {            {
5978            *errorcodeptr = ERR15;            *errorcodeptr = ERR15;
5979            goto FAILED;            goto FAILED;
# Line 6031  for (;; ptr++) Line 6142  for (;; ptr++)
6142    
6143            if (lengthptr != NULL)            if (lengthptr != NULL)
6144              {              {
6145              if (*ptr != terminator)              if (*ptr != (pcre_uchar)terminator)
6146                {                {
6147                *errorcodeptr = ERR42;                *errorcodeptr = ERR42;
6148                goto FAILED;                goto FAILED;
# Line 6173  for (;; ptr++) Line 6284  for (;; ptr++)
6284              *errorcodeptr = ERR62;              *errorcodeptr = ERR62;
6285              goto FAILED;              goto FAILED;
6286              }              }
6287            if (*ptr != terminator)            if (*ptr != (pcre_uchar)terminator)
6288              {              {
6289              *errorcodeptr = ERR42;              *errorcodeptr = ERR42;
6290              goto FAILED;              goto FAILED;
# Line 6279  for (;; ptr++) Line 6390  for (;; ptr++)
6390            while(IS_DIGIT(*ptr))            while(IS_DIGIT(*ptr))
6391              recno = recno * 10 + *ptr++ - CHAR_0;              recno = recno * 10 + *ptr++ - CHAR_0;
6392    
6393            if (*ptr != terminator)            if (*ptr != (pcre_uchar)terminator)
6394              {              {
6395              *errorcodeptr = ERR29;              *errorcodeptr = ERR29;
6396              goto FAILED;              goto FAILED;
# Line 6383  for (;; ptr++) Line 6494  for (;; ptr++)
6494    
6495          /* Can't determine a first byte now */          /* Can't determine a first byte now */
6496    
6497          if (firstchar == REQ_UNSET) firstchar = REQ_NONE;          if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;
6498          continue;          continue;
6499    
6500    
# Line 6517  for (;; ptr++) Line 6628  for (;; ptr++)
6628           cond_depth +           cond_depth +
6629             ((bravalue == OP_COND)?1:0),   /* Depth of condition subpatterns */             ((bravalue == OP_COND)?1:0),   /* Depth of condition subpatterns */
6630           &subfirstchar,                   /* For possible first char */           &subfirstchar,                   /* For possible first char */
6631             &subfirstcharflags,
6632           &subreqchar,                     /* For possible last char */           &subreqchar,                     /* For possible last char */
6633             &subreqcharflags,
6634           bcptr,                           /* Current branch chain */           bcptr,                           /* Current branch chain */
6635           cd,                              /* Tables block */           cd,                              /* Tables block */
6636           (lengthptr == NULL)? NULL :      /* Actual compile phase */           (lengthptr == NULL)? NULL :      /* Actual compile phase */
# Line 6578  for (;; ptr++) Line 6691  for (;; ptr++)
6691            *errorcodeptr = ERR27;            *errorcodeptr = ERR27;
6692            goto FAILED;            goto FAILED;
6693            }            }
6694          if (condcount == 1) subfirstchar = subreqchar = REQ_NONE;          if (condcount == 1) subfirstcharflags = subreqcharflags = REQ_NONE;
6695          }          }
6696        }        }
6697    
# Line 6627  for (;; ptr++) Line 6740  for (;; ptr++)
6740      back off. */      back off. */
6741    
6742      zeroreqchar = reqchar;      zeroreqchar = reqchar;
6743        zeroreqcharflags = reqcharflags;
6744      zerofirstchar = firstchar;      zerofirstchar = firstchar;
6745        zerofirstcharflags = firstcharflags;
6746      groupsetfirstchar = FALSE;      groupsetfirstchar = FALSE;
6747    
6748      if (bravalue >= OP_ONCE)      if (bravalue >= OP_ONCE)
# Line 6638  for (;; ptr++) Line 6753  for (;; ptr++)
6753        no firstchar, set "none" for the whole branch. In both cases, a zero        no firstchar, set "none" for the whole branch. In both cases, a zero
6754        repeat forces firstchar to "none". */        repeat forces firstchar to "none". */
6755    
6756        if (firstchar == REQ_UNSET)        if (firstcharflags == REQ_UNSET)
6757          {          {
6758          if (subfirstchar >= 0)          if (subfirstcharflags >= 0)
6759            {            {
6760            firstchar = subfirstchar;            firstchar = subfirstchar;
6761              firstcharflags = subfirstcharflags;
6762            groupsetfirstchar = TRUE;            groupsetfirstchar = TRUE;
6763            }            }
6764          else firstchar = REQ_NONE;          else firstcharflags = REQ_NONE;
6765          zerofirstchar = REQ_NONE;          zerofirstcharflags = REQ_NONE;
6766          }          }
6767    
6768        /* If firstchar was previously set, convert the subpattern's firstchar        /* If firstchar was previously set, convert the subpattern's firstchar
6769        into reqchar if there wasn't one, using the vary flag that was in        into reqchar if there wasn't one, using the vary flag that was in
6770        existence beforehand. */        existence beforehand. */
6771    
6772        else if (subfirstchar >= 0 && subreqchar < 0)        else if (subfirstcharflags >= 0 && subreqcharflags < 0)
6773          subreqchar = subfirstchar | tempreqvary;          {
6774            subreqchar = subfirstchar;
6775            subreqcharflags = subfirstcharflags | tempreqvary;
6776            }
6777    
6778        /* If the subpattern set a required byte (or set a first byte that isn't        /* If the subpattern set a required byte (or set a first byte that isn't
6779        really the first byte - see above), set it. */        really the first byte - see above), set it. */
6780    
6781        if (subreqchar >= 0) reqchar = subreqchar;        if (subreqcharflags >= 0)
6782            {
6783            reqchar = subreqchar;
6784            reqcharflags = subreqcharflags;
6785            }
6786        }        }
6787    
6788      /* For a forward assertion, we take the reqchar, if set. This can be      /* For a forward assertion, we take the reqchar, if set. This can be
# Line 6670  for (;; ptr++) Line 6793  for (;; ptr++)
6793      of a firstchar. This is overcome by a scan at the end if there's no      of a firstchar. This is overcome by a scan at the end if there's no
6794      firstchar, looking for an asserted first char. */      firstchar, looking for an asserted first char. */
6795    
6796      else if (bravalue == OP_ASSERT && subreqchar >= 0) reqchar = subreqchar;      else if (bravalue == OP_ASSERT && subreqcharflags >= 0)
6797          {
6798          reqchar = subreqchar;
6799          reqcharflags = subreqcharflags;
6800          }
6801      break;     /* End of processing '(' */      break;     /* End of processing '(' */
6802    
6803    
# Line 6686  for (;; ptr++) Line 6813  for (;; ptr++)
6813      case CHAR_BACKSLASH:      case CHAR_BACKSLASH:
6814      tempptr = ptr;      tempptr = ptr;
6815      escape = check_escape(&ptr, &ec, errorcodeptr, cd->bracount, options, FALSE);      escape = check_escape(&ptr, &ec, errorcodeptr, cd->bracount, options, FALSE);
   
6816      if (*errorcodeptr != 0) goto FAILED;      if (*errorcodeptr != 0) goto FAILED;
6817    
6818      if (escape == 0)      if (escape == 0)                  /* The escape coded a single character */
6819        c = ec;        c = ec;
6820      else      else
6821        {        {
# Line 6706  for (;; ptr++) Line 6832  for (;; ptr++)
6832        /* For metasequences that actually match a character, we disable the        /* For metasequences that actually match a character, we disable the
6833        setting of a first character if it hasn't already been set. */        setting of a first character if it hasn't already been set. */
6834    
6835        if (firstchar == REQ_UNSET && escape > ESC_b && escape < ESC_Z)        if (firstcharflags == REQ_UNSET && escape > ESC_b && escape < ESC_Z)
6836          firstchar = REQ_NONE;          firstcharflags = REQ_NONE;
6837    
6838        /* Set values to reset to if this is followed by a zero repeat. */        /* Set values to reset to if this is followed by a zero repeat. */
6839    
6840        zerofirstchar = firstchar;        zerofirstchar = firstchar;
6841          zerofirstcharflags = firstcharflags;
6842        zeroreqchar = reqchar;        zeroreqchar = reqchar;
6843          zeroreqcharflags = reqcharflags;
6844    
6845        /* \g<name> or \g'name' is a subroutine call by name and \g<n> or \g'n'        /* \g<name> or \g'name' is a subroutine call by name and \g<n> or \g'n'
6846        is a subroutine call by number (Oniguruma syntax). In fact, the value        is a subroutine call by number (Oniguruma syntax). In fact, the value
# Line 6741  for (;; ptr++) Line 6869  for (;; ptr++)
6869          if (ptr[1] != CHAR_PLUS && ptr[1] != CHAR_MINUS)          if (ptr[1] != CHAR_PLUS && ptr[1] != CHAR_MINUS)
6870            {            {
6871            BOOL is_a_number = TRUE;            BOOL is_a_number = TRUE;
6872            for (p = ptr + 1; *p != 0 && *p != terminator; p++)            for (p = ptr + 1; *p != CHAR_NULL && *p != (pcre_uchar)terminator; p++)
6873              {              {
6874              if (!MAX_255(*p)) { is_a_number = FALSE; break; }              if (!MAX_255(*p)) { is_a_number = FALSE; break; }
6875              if ((cd->ctypes[*p] & ctype_digit) == 0) is_a_number = FALSE;              if ((cd->ctypes[*p] & ctype_digit) == 0) is_a_number = FALSE;
6876              if ((cd->ctypes[*p] & ctype_word) == 0) break;              if ((cd->ctypes[*p] & ctype_word) == 0) break;
6877              }              }
6878            if (*p != terminator)            if (*p != (pcre_uchar)terminator)
6879              {              {
6880              *errorcodeptr = ERR57;              *errorcodeptr = ERR57;
6881              break;              break;
# Line 6765  for (;; ptr++) Line 6893  for (;; ptr++)
6893    
6894          p = ptr + 2;          p = ptr + 2;
6895          while (IS_DIGIT(*p)) p++;          while (IS_DIGIT(*p)) p++;
6896          if (*p != terminator)          if (*p != (pcre_uchar)terminator)
6897            {            {
6898            *errorcodeptr = ERR57;            *errorcodeptr = ERR57;
6899            break;            break;
# Line 6802  for (;; ptr++) Line 6930  for (;; ptr++)
6930          recno = -escape;          recno = -escape;
6931    
6932          HANDLE_REFERENCE:    /* Come here from named backref handling */          HANDLE_REFERENCE:    /* Come here from named backref handling */
6933          if (firstchar == REQ_UNSET) firstchar = REQ_NONE;          if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;
6934          previous = code;          previous = code;
6935          *code++ = ((options & PCRE_CASELESS) != 0)? OP_REFI : OP_REF;          *code++ = ((options & PCRE_CASELESS) != 0)? OP_REFI : OP_REF;
6936          PUT2INC(code, 0, recno);          PUT2INC(code, 0, recno);
# Line 6829  for (;; ptr++) Line 6957  for (;; ptr++)
6957        else if (escape == ESC_P || escape == ESC_p)        else if (escape == ESC_P || escape == ESC_p)
6958          {          {
6959          BOOL negated;          BOOL negated;
6960          int pdata;          unsigned int ptype = 0, pdata = 0;
6961          int ptype = get_ucp(&ptr, &negated, &pdata, errorcodeptr);          if (!get_ucp(&ptr, &negated, &ptype, &pdata, errorcodeptr))
6962          if (ptype < 0) goto FAILED;            goto FAILED;
6963          previous = code;          previous = code;
6964          *code++ = ((escape == ESC_p) != negated)? OP_PROP : OP_NOTPROP;          *code++ = ((escape == ESC_p) != negated)? OP_PROP : OP_NOTPROP;
6965          *code++ = ptype;          *code++ = ptype;
# Line 6853  for (;; ptr++) Line 6981  for (;; ptr++)
6981        can obtain the OP value by negating the escape value in the default        can obtain the OP value by negating the escape value in the default
6982        situation when PCRE_UCP is not set. When it *is* set, we substitute        situation when PCRE_UCP is not set. When it *is* set, we substitute
6983        Unicode property tests. Note that \b and \B do a one-character        Unicode property tests. Note that \b and \B do a one-character
6984        lookbehind. */        lookbehind, and \A also behaves as if it does. */
6985    
6986        else        else
6987          {          {
6988          if ((escape == ESC_b || escape == ESC_B) && cd->max_lookbehind == 0)          if ((escape == ESC_b || escape == ESC_B || escape == ESC_A) &&
6989                 cd->max_lookbehind == 0)
6990            cd->max_lookbehind = 1;            cd->max_lookbehind = 1;
6991  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
6992          if (escape >= ESC_DU && escape <= ESC_wu)          if (escape >= ESC_DU && escape <= ESC_wu)
# Line 6905  for (;; ptr++) Line 7034  for (;; ptr++)
7034      mclength = 1;      mclength = 1;
7035      mcbuffer[0] = c;      mcbuffer[0] = c;
7036    
7037  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32  #ifdef SUPPORT_UTF
7038      if (utf && HAS_EXTRALEN(c))      if (utf && HAS_EXTRALEN(c))
7039        ACROSSCHAR(TRUE, ptr[1], mcbuffer[mclength++] = *(++ptr));        ACROSSCHAR(TRUE, ptr[1], mcbuffer[mclength++] = *(++ptr));
7040  #endif  #endif
# Line 6915  for (;; ptr++) Line 7044  for (;; ptr++)
7044    
7045      ONE_CHAR:      ONE_CHAR:
7046      previous = code;      previous = code;
7047    
7048      /* For caseless UTF-8 mode when UCP support is available, check whether      /* For caseless UTF-8 mode when UCP support is available, check whether
7049      this character has more than one other case. If so, generate a special      this character has more than one other case. If so, generate a special
7050      OP_PROP item instead of OP_CHARI. */      OP_PROP item instead of OP_CHARI. */
7051    
7052  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
7053      if (utf && (options & PCRE_CASELESS) != 0)      if (utf && (options & PCRE_CASELESS) != 0)
7054        {        {
# Line 6929  for (;; ptr++) Line 7058  for (;; ptr++)
7058          *code++ = OP_PROP;          *code++ = OP_PROP;
7059          *code++ = PT_CLIST;          *code++ = PT_CLIST;
7060          *code++ = c;          *code++ = c;
7061          if (firstchar == REQ_UNSET) firstchar = zerofirstchar = REQ_NONE;          if (firstcharflags == REQ_UNSET) firstcharflags = zerofirstcharflags = REQ_NONE;
7062          break;          break;
7063          }          }
7064        }        }
7065  #endif  #endif
7066    
7067      /* Caseful matches, or not one of the multicase characters. */      /* Caseful matches, or not one of the multicase characters. */
7068    
7069      *code++ = ((options & PCRE_CASELESS) != 0)? OP_CHARI : OP_CHAR;      *code++ = ((options & PCRE_CASELESS) != 0)? OP_CHARI : OP_CHAR;
7070      for (c = 0; c < mclength; c++) *code++ = mcbuffer[c];      for (c = 0; c < mclength; c++) *code++ = mcbuffer[c];
7071    
# Line 6950  for (;; ptr++) Line 7079  for (;; ptr++)
7079      Otherwise, leave the firstchar value alone, and don't change it on a zero      Otherwise, leave the firstchar value alone, and don't change it on a zero
7080      repeat. */      repeat. */
7081    
7082      if (firstchar == REQ_UNSET)      if (firstcharflags == REQ_UNSET)
7083        {        {
7084        zerofirstchar = REQ_NONE;        zerofirstcharflags = REQ_NONE;
7085        zeroreqchar = reqchar;        zeroreqchar = reqchar;
7086          zeroreqcharflags = reqcharflags;
7087    
7088        /* If the character is more than one byte long, we can set firstchar        /* If the character is more than one byte long, we can set firstchar
7089        only if it is not to be matched caselessly. */        only if it is not to be matched caselessly. */
# Line 6961  for (;; ptr++) Line 7091  for (;; ptr++)
7091        if (mclength == 1 || req_caseopt == 0)        if (mclength == 1 || req_caseopt == 0)
7092          {          {
7093          firstchar = mcbuffer[0] | req_caseopt;          firstchar = mcbuffer[0] | req_caseopt;
7094          if (mclength != 1) reqchar = code[-1] | cd->req_varyopt;          firstchar = mcbuffer[0];
7095            firstcharflags = req_caseopt;
7096    
7097            if (mclength != 1)
7098              {
7099              reqchar = code[-1];
7100              reqcharflags = cd->req_varyopt;
7101              }
7102          }          }
7103        else firstchar = reqchar = REQ_NONE;        else firstcharflags = reqcharflags = REQ_NONE;
7104        }        }
7105    
7106      /* firstchar was previously set; we can set reqchar only if the length is      /* firstchar was previously set; we can set reqchar only if the length is
# Line 6972  for (;; ptr++) Line 7109  for (;; ptr++)
7109      else      else
7110        {        {
7111        zerofirstchar = firstchar;        zerofirstchar = firstchar;
7112          zerofirstcharflags = firstcharflags;
7113        zeroreqchar = reqchar;        zeroreqchar = reqchar;
7114          zeroreqcharflags = reqcharflags;
7115        if (mclength == 1 || req_caseopt == 0)        if (mclength == 1 || req_caseopt == 0)
7116          reqchar = code[-1] | req_caseopt | cd->req_varyopt;          {
7117            reqchar = code[-1];
7118            reqcharflags = req_caseopt | cd->req_varyopt;
7119            }
7120        }        }
7121    
7122      break;            /* End of literal character handling */      break;            /* End of literal character handling */
# Line 6993  return FALSE; Line 7135  return FALSE;
7135    
7136    
7137    
   
7138  /*************************************************  /*************************************************
7139  *     Compile sequence of alternatives           *  *     Compile sequence of alternatives           *
7140  *************************************************/  *************************************************/
# Line 7014  Arguments: Line 7155  Arguments:
7155    reset_bracount TRUE to reset the count for each branch    reset_bracount TRUE to reset the count for each branch
7156    skipbytes      skip this many bytes at start (for brackets and OP_COND)    skipbytes      skip this many bytes at start (for brackets and OP_COND)
7157    cond_depth     depth of nesting for conditional subpatterns    cond_depth     depth of nesting for conditional subpatterns
7158    firstcharptr   place to put the first required character, or a negative number    firstcharptr    place to put the first required character
7159    reqcharptr     place to put the last required character, or a negative number    firstcharflagsptr place to put the first character flags, or a negative number
7160      reqcharptr     place to put the last required character
7161      reqcharflagsptr place to put the last required character flags, or a negative number
7162    bcptr          pointer to the chain of currently open branches    bcptr          pointer to the chain of currently open branches
7163    cd             points to the data block with tables pointers etc.    cd             points to the data block with tables pointers etc.
7164    lengthptr      NULL during the real compile phase    lengthptr      NULL during the real compile phase
# Line 7027  Returns:         TRUE on success Line 7170  Returns:         TRUE on success
7170  static BOOL  static BOOL
7171  compile_regex(int options, pcre_uchar **codeptr, const pcre_uchar **ptrptr,  compile_regex(int options, pcre_uchar **codeptr, const pcre_uchar **ptrptr,
7172    int *errorcodeptr, BOOL lookbehind, BOOL reset_bracount, int skipbytes,    int *errorcodeptr, BOOL lookbehind, BOOL reset_bracount, int skipbytes,
7173    int cond_depth, pcre_int32 *firstcharptr, pcre_int32 *reqcharptr,    int cond_depth,
7174      pcre_uint32 *firstcharptr, pcre_int32 *firstcharflagsptr,
7175      pcre_uint32 *reqcharptr, pcre_int32 *reqcharflagsptr,
7176    branch_chain *bcptr, compile_data *cd, int *lengthptr)    branch_chain *bcptr, compile_data *cd, int *lengthptr)
7177  {  {
7178  const pcre_uchar *ptr = *ptrptr;  const pcre_uchar *ptr = *ptrptr;
# Line 7037  pcre_uchar *start_bracket = code; Line 7182  pcre_uchar *start_bracket = code;
7182  pcre_uchar *reverse_count = NULL;  pcre_uchar *reverse_count = NULL;
7183  open_capitem capitem;  open_capitem capitem;
7184  int capnumber = 0;  int capnumber = 0;
7185  pcre_int32 firstchar, reqchar;  pcre_uint32 firstchar, reqchar;
7186  pcre_int32 branchfirstchar, branchreqchar;  pcre_int32 firstcharflags, reqcharflags;
7187    pcre_uint32 branchfirstchar, branchreqchar;
7188    pcre_int32 branchfirstcharflags, branchreqcharflags;
7189  int length;  int length;
7190  int orig_bracount;  unsigned int orig_bracount;
7191  int max_bracount;  unsigned int max_bracount;
7192  branch_chain bc;  branch_chain bc;
7193    
7194  bc.outer = bcptr;  bc.outer = bcptr;
7195  bc.current_branch = code;  bc.current_branch = code;
7196    
7197  firstchar = reqchar = REQ_UNSET;  firstchar = reqchar = 0;
7198    firstcharflags = reqcharflags = REQ_UNSET;
7199    
7200  /* Accumulate the length for use in the pre-compile phase. Start with the  /* Accumulate the length for use in the pre-compile phase. Start with the
7201  length of the BRA and KET and any extra bytes that are required at the  length of the BRA and KET and any extra bytes that are required at the
# Line 7107  for (;;) Line 7255  for (;;)
7255    into the length. */    into the length. */
7256    
7257    if (!compile_branch(&options, &code, &ptr, errorcodeptr, &branchfirstchar,    if (!compile_branch(&options, &code, &ptr, errorcodeptr, &branchfirstchar,
7258          &branchreqchar, &bc, cond_depth, cd,          &branchfirstcharflags, &branchreqchar, &branchreqcharflags, &bc,
7259          (lengthptr == NULL)? NULL : &length))          cond_depth, cd, (lengthptr == NULL)? NULL : &length))
7260      {      {
7261      *ptrptr = ptr;      *ptrptr = ptr;
7262      return FALSE;      return FALSE;
# Line 7129  for (;;) Line 7277  for (;;)
7277      if (*last_branch != OP_ALT)      if (*last_branch != OP_ALT)
7278        {        {
7279        firstchar = branchfirstchar;        firstchar = branchfirstchar;
7280          firstcharflags = branchfirstcharflags;
7281        reqchar = branchreqchar;        reqchar = branchreqchar;
7282          reqcharflags = branchreqcharflags;
7283        }        }
7284    
7285      /* If this is not the first branch, the first char and reqchar have to      /* If this is not the first branch, the first char and reqchar have to
# Line 7143  for (;;) Line 7293  for (;;)
7293        we have to abandon the firstchar for the regex, but if there was        we have to abandon the firstchar for the regex, but if there was
7294        previously no reqchar, it takes on the value of the old firstchar. */        previously no reqchar, it takes on the value of the old firstchar. */
7295    
7296        if (firstchar >= 0 && firstchar != branchfirstchar)        if (firstcharflags >= 0 &&
7297              (firstcharflags != branchfirstcharflags || firstchar != branchfirstchar))
7298          {          {
7299          if (reqchar < 0) reqchar = firstchar;          if (reqcharflags < 0)
7300          firstchar = REQ_NONE;            {
7301              reqchar = firstchar;
7302              reqcharflags = firstcharflags;
7303              }
7304            firstcharflags = REQ_NONE;
7305          }          }
7306    
7307        /* If we (now or from before) have no firstchar, a firstchar from the        /* If we (now or from before) have no firstchar, a firstchar from the
7308        branch becomes a reqchar if there isn't a branch reqchar. */        branch becomes a reqchar if there isn't a branch reqchar. */
7309    
7310        if (firstchar < 0 && branchfirstchar >= 0 && branchreqchar < 0)        if (firstcharflags < 0 && branchfirstcharflags >= 0 && branchreqcharflags < 0)
7311            branchreqchar = branchfirstchar;          {
7312            branchreqchar = branchfirstchar;
7313            branchreqcharflags = branchfirstcharflags;
7314            }
7315    
7316        /* Now ensure that the reqchars match */        /* Now ensure that the reqchars match */
7317    
7318        if ((reqchar & ~REQ_VARY) != (branchreqchar & ~REQ_VARY))        if (((reqcharflags & ~REQ_VARY) != (branchreqcharflags & ~REQ_VARY)) ||
7319          reqchar = REQ_NONE;            reqchar != branchreqchar)
7320        else reqchar |= branchreqchar;   /* To "or" REQ_VARY */          reqcharflags = REQ_NONE;
7321          else
7322            {
7323            reqchar = branchreqchar;
7324            reqcharflags |= branchreqcharflags; /* To "or" REQ_VARY */
7325            }
7326        }        }
7327    
7328      /* If lookbehind, check that this branch matches a fixed-length string, and      /* If lookbehind, check that this branch matches a fixed-length string, and
# Line 7255  for (;;) Line 7418  for (;;)
7418      *codeptr = code;      *codeptr = code;
7419      *ptrptr = ptr;      *ptrptr = ptr;
7420      *firstcharptr = firstchar;      *firstcharptr = firstchar;
7421        *firstcharflagsptr = firstcharflags;
7422      *reqcharptr = reqchar;      *reqcharptr = reqchar;
7423        *reqcharflagsptr = reqcharflags;
7424      if (lengthptr != NULL)      if (lengthptr != NULL)
7425        {        {
7426        if (OFLOW_MAX - *lengthptr < length)        if (OFLOW_MAX - *lengthptr < length)
# Line 7490  do { Line 7655  do {
7655       {       {
7656       if (!is_startline(scode, bracket_map, cd, atomcount)) return FALSE;       if (!is_startline(scode, bracket_map, cd, atomcount)) return FALSE;
7657       }       }
7658    
7659     /* Atomic brackets */     /* Atomic brackets */
7660    
7661     else if (op == OP_ONCE || op == OP_ONCE_NC)     else if (op == OP_ONCE || op == OP_ONCE_NC)
# Line 7542  we return that char, otherwise -1. Line 7707  we return that char, otherwise -1.
7707    
7708  Arguments:  Arguments:
7709    code       points to start of expression (the bracket)    code       points to start of expression (the bracket)
7710      flags       points to the first char flags, or to REQ_NONE
7711    inassert   TRUE if in an assertion    inassert   TRUE if in an assertion
7712    
7713  Returns:     -1 or the fixed first char  Returns:     the fixed first char, or 0 with REQ_NONE in flags
7714  */  */
7715    
7716  static int  static pcre_uint32
7717  find_firstassertedchar(const pcre_uchar *code, BOOL inassert)  find_firstassertedchar(const pcre_uchar *code, pcre_int32 *flags,
7718      BOOL inassert)
7719  {  {
7720  register int c = -1;  register pcre_uint32 c = 0;
7721    int cflags = REQ_NONE;
7722    
7723    *flags = REQ_NONE;
7724  do {  do {
7725     int d;     pcre_uint32 d;
7726       int dflags;
7727     int xl = (*code == OP_CBRA || *code == OP_SCBRA ||     int xl = (*code == OP_CBRA || *code == OP_SCBRA ||
7728               *code == OP_CBRAPOS || *code == OP_SCBRAPOS)? IMM2_SIZE:0;               *code == OP_CBRAPOS || *code == OP_SCBRAPOS)? IMM2_SIZE:0;
7729     const pcre_uchar *scode = first_significant_code(code + 1+LINK_SIZE + xl,     const pcre_uchar *scode = first_significant_code(code + 1+LINK_SIZE + xl,
7730       TRUE);       TRUE);
7731     register int op = *scode;     register pcre_uchar op = *scode;
7732    
7733     switch(op)     switch(op)
7734       {       {
7735       default:       default:
7736       return -1;       return 0;
7737    
7738       case OP_BRA:       case OP_BRA:
7739       case OP_BRAPOS:       case OP_BRAPOS:
# Line 7574  do { Line 7745  do {
7745       case OP_ONCE:       case OP_ONCE:
7746       case OP_ONCE_NC:       case OP_ONCE_NC:
7747       case OP_COND:       case OP_COND:
7748       if ((d = find_firstassertedchar(scode, op == OP_ASSERT)) < 0)       d = find_firstassertedchar(scode, &dflags, op == OP_ASSERT);
7749         return -1;       if (dflags < 0)
7750       if (c < 0) c = d; else if (c != d) return -1;         return 0;
7751         if (cflags < 0) { c = d; cflags = dflags; } else if (c != d || cflags != dflags) return 0;
7752       break;       break;
7753    
7754       case OP_EXACT:       case OP_EXACT:
# Line 7587  do { Line 7759  do {
7759       case OP_PLUS:       case OP_PLUS:
7760       case OP_MINPLUS:       case OP_MINPLUS:
7761       case OP_POSPLUS:       case OP_POSPLUS:
7762       if (!inassert) return -1;       if (!inassert) return 0;
7763       if (c < 0) c = scode[1];       if (cflags < 0) { c = scode[1]; cflags = 0; }
7764         else if (c != scode[1]) return -1;         else if (c != scode[1]) return 0;
7765       break;       break;
7766    
7767       case OP_EXACTI:       case OP_EXACTI:
# Line 7600  do { Line 7772  do {
7772       case OP_PLUSI:       case OP_PLUSI:
7773       case OP_MINPLUSI:       case OP_MINPLUSI:
7774       case OP_POSPLUSI:       case OP_POSPLUSI:
7775       if (!inassert) return -1;       if (!inassert) return 0;
7776       if (c < 0) c = scode[1] | REQ_CASELESS;       if (cflags < 0) { c = scode[1]; cflags = REQ_CASELESS; }
7777         else if (c != scode[1]) return -1;         else if (c != scode[1]) return 0;
7778       break;       break;
7779       }       }
7780    
7781     code += GET(code, 1);     code += GET(code, 1);
7782     }     }
7783  while (*code == OP_ALT);  while (*code == OP_ALT);
7784    
7785    *flags = cflags;
7786  return c;  return c;
7787  }  }
7788    
# Line 7676  pcre32_compile2(PCRE_SPTR32 pattern, int Line 7850  pcre32_compile2(PCRE_SPTR32 pattern, int
7850  {  {
7851  REAL_PCRE *re;  REAL_PCRE *re;
7852  int length = 1;  /* For final END opcode */  int length = 1;  /* For final END opcode */
7853  pcre_int32 firstchar, reqchar;  pcre_int32 firstcharflags, reqcharflags;
7854    pcre_uint32 firstchar, reqchar;
7855    pcre_uint32 limit_match = PCRE_UINT32_MAX;
7856    pcre_uint32 limit_recursion = PCRE_UINT32_MAX;
7857  int newline;  int newline;
7858  int errorcode = 0;  int errorcode = 0;
7859  int skipatstart = 0;  int skipatstart = 0;
7860  BOOL utf;  BOOL utf;
7861    BOOL never_utf = FALSE;
7862  size_t size;  size_t size;
7863  pcre_uchar *code;  pcre_uchar *code;
7864  const pcre_uchar *codestart;  const pcre_uchar *codestart;
# Line 7740  if ((options & ~PUBLIC_COMPILE_OPTIONS) Line 7918  if ((options & ~PUBLIC_COMPILE_OPTIONS)
7918    goto PCRE_EARLY_ERROR_RETURN;    goto PCRE_EARLY_ERROR_RETURN;
7919    }    }
7920    
7921    /* If PCRE_NEVER_UTF is set, remember it. */
7922    
7923    if ((options & PCRE_NEVER_UTF) != 0) never_utf = TRUE;
7924    
7925  /* Check for global one-time settings at the start of the pattern, and remember  /* Check for global one-time settings at the start of the pattern, and remember
7926  the offset for later. */  the offset for later. */
7927    
7928    cd->external_flags = 0;   /* Initialize here for LIMIT_MATCH/RECURSION */
7929    
7930  while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&  while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&
7931         ptr[skipatstart+1] == CHAR_ASTERISK)         ptr[skipatstart+1] == CHAR_ASTERISK)
7932    {    {
7933    int newnl = 0;    int newnl = 0;
7934    int newbsr = 0;    int newbsr = 0;
7935    
7936    /* For completeness and backward compatibility, (*UTFn) is supported in the
7937    relevant libraries, but (*UTF) is generic and always supported. Note that
7938    PCRE_UTF8 == PCRE_UTF16 == PCRE_UTF32. */
7939    
7940  #ifdef COMPILE_PCRE8  #ifdef COMPILE_PCRE8
7941    if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF_RIGHTPAR, 5) == 0)    if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF8_RIGHTPAR, 5) == 0)
7942      { skipatstart += 7; options |= PCRE_UTF8; continue; }      { skipatstart += 7; options |= PCRE_UTF8; continue; }
7943  #endif  #endif
7944  #ifdef COMPILE_PCRE16  #ifdef COMPILE_PCRE16
7945    if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF_RIGHTPAR, 6) == 0)    if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF16_RIGHTPAR, 6) == 0)
7946      { skipatstart += 8; options |= PCRE_UTF16; continue; }      { skipatstart += 8; options |= PCRE_UTF16; continue; }
7947  #endif  #endif
7948  #ifdef COMPILE_PCRE32  #ifdef COMPILE_PCRE32
7949    if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF_RIGHTPAR, 6) == 0)    if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF32_RIGHTPAR, 6) == 0)
7950      { skipatstart += 8; options |= PCRE_UTF32; continue; }      { skipatstart += 8; options |= PCRE_UTF32; continue; }
7951  #endif  #endif
7952    
7953      else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF_RIGHTPAR, 4) == 0)
7954        { skipatstart += 6; options |= PCRE_UTF8; continue; }
7955    else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UCP_RIGHTPAR, 4) == 0)    else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UCP_RIGHTPAR, 4) == 0)
7956      { skipatstart += 6; options |= PCRE_UCP; continue; }      { skipatstart += 6; options |= PCRE_UCP; continue; }
7957    else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_NO_START_OPT_RIGHTPAR, 13) == 0)    else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_NO_START_OPT_RIGHTPAR, 13) == 0)
7958      { skipatstart += 15; options |= PCRE_NO_START_OPTIMIZE; continue; }      { skipatstart += 15; options |= PCRE_NO_START_OPTIMIZE; continue; }
7959    
7960      else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_LIMIT_MATCH_EQ, 12) == 0)
7961        {
7962        pcre_uint32 c = 0;
7963        int p = skipatstart + 14;
7964        while (isdigit(ptr[p]))
7965          {
7966          if (c > PCRE_UINT32_MAX / 10 - 1) break;   /* Integer overflow */
7967          c = c*10 + ptr[p++] - CHAR_0;
7968          }
7969        if (ptr[p++] != CHAR_RIGHT_PARENTHESIS) break;
7970        if (c < limit_match)
7971          {
7972          limit_match = c;
7973          cd->external_flags |= PCRE_MLSET;
7974          }
7975        skipatstart = p;
7976        continue;
7977        }
7978    
7979      else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_LIMIT_RECURSION_EQ, 16) == 0)
7980        {
7981        pcre_uint32 c = 0;
7982        int p = skipatstart + 18;
7983        while (isdigit(ptr[p]))
7984          {
7985          if (c > PCRE_UINT32_MAX / 10 - 1) break;   /* Integer overflow check */
7986          c = c*10 + ptr[p++] - CHAR_0;
7987          }
7988        if (ptr[p++] != CHAR_RIGHT_PARENTHESIS) break;
7989        if (c < limit_recursion)
7990          {
7991          limit_recursion = c;
7992          cd->external_flags |= PCRE_RLSET;
7993          }
7994        skipatstart = p;
7995        continue;
7996        }
7997    
7998    if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_CR_RIGHTPAR, 3) == 0)    if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_CR_RIGHTPAR, 3) == 0)
7999      { skipatstart += 5; newnl = PCRE_NEWLINE_CR; }      { skipatstart += 5; newnl = PCRE_NEWLINE_CR; }
8000    else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_LF_RIGHTPAR, 3)  == 0)    else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_LF_RIGHTPAR, 3)  == 0)
# Line 7791  while (ptr[skipatstart] == CHAR_LEFT_PAR Line 8020  while (ptr[skipatstart] == CHAR_LEFT_PAR
8020    
8021  /* PCRE_UTF(16|32) have the same value as PCRE_UTF8. */  /* PCRE_UTF(16|32) have the same value as PCRE_UTF8. */
8022  utf = (options & PCRE_UTF8) != 0;  utf = (options & PCRE_UTF8) != 0;
8023    if (utf && never_utf)
8024      {
8025      errorcode = ERR78;
8026      goto PCRE_EARLY_ERROR_RETURN2;
8027      }
8028    
8029  /* Can't support UTF unless PCRE has been compiled to include the code. The  /* Can't support UTF unless PCRE has been compiled to include the code. The
8030  return of an error code from PRIV(valid_utf)() is a new feature, introduced in  return of an error code from PRIV(valid_utf)() is a new feature, introduced in
# Line 7913  cd->req_varyopt = 0; Line 8147  cd->req_varyopt = 0;
8147  cd->assert_depth = 0;  cd->assert_depth = 0;
8148  cd->max_lookbehind = 0;  cd->max_lookbehind = 0;
8149  cd->external_options = options;  cd->external_options = options;
 cd->external_flags = 0;  
8150  cd->open_caps = NULL;  cd->open_caps = NULL;
8151    
8152  /* Now do the pre-compile. On error, errorcode will be set non-zero, so we  /* Now do the pre-compile. On error, errorcode will be set non-zero, so we
# Line 7926  ptr += skipatstart; Line 8159  ptr += skipatstart;
8159  code = cworkspace;  code = cworkspace;
8160  *code = OP_BRA;  *code = OP_BRA;
8161  (void)compile_regex(cd->external_options, &code, &ptr, &errorcode, FALSE,  (void)compile_regex(cd->external_options, &code, &ptr, &errorcode, FALSE,
8162    FALSE, 0, 0, &firstchar, &reqchar, NULL, cd, &length);    FALSE, 0, 0, &firstchar, &firstcharflags, &reqchar, &reqcharflags, NULL,
8163      cd, &length);
8164  if (errorcode != 0) goto PCRE_EARLY_ERROR_RETURN;  if (errorcode != 0) goto PCRE_EARLY_ERROR_RETURN;
8165    
8166  DPRINTF(("end pre-compile: length=%d workspace=%d\n", length,  DPRINTF(("end pre-compile: length=%d workspace=%d\n", length,
# Line 7962  re->magic_number = MAGIC_NUMBER; Line 8196  re->magic_number = MAGIC_NUMBER;
8196  re->size = (int)size;  re->size = (int)size;
8197  re->options = cd->external_options;  re->options = cd->external_options;
8198  re->flags = cd->external_flags;  re->flags = cd->external_flags;
8199    re->limit_match = limit_match;
8200    re->limit_recursion = limit_recursion;
8201  re->first_char = 0;  re->first_char = 0;
8202  re->req_char = 0;  re->req_char = 0;
8203  re->name_table_offset = sizeof(REAL_PCRE) / sizeof(pcre_uchar);  re->name_table_offset = sizeof(REAL_PCRE) / sizeof(pcre_uchar);
# Line 7971  re->ref_count = 0; Line 8207  re->ref_count = 0;
8207  re->tables = (tables == PRIV(default_tables))? NULL : tables;  re->tables = (tables == PRIV(default_tables))? NULL : tables;
8208  re->nullpad = NULL;  re->nullpad = NULL;
8209  #ifdef COMPILE_PCRE32  #ifdef COMPILE_PCRE32
8210  re->dummy1 = re->dummy2 = 0;  re->dummy = 0;
8211    #else
8212    re->dummy1 = re->dummy2 = re->dummy3 = 0;
8213  #endif  #endif
8214    
8215  /* The starting points of the name/number translation table and of the code are  /* The starting points of the name/number translation table and of the code are
# Line 8004  ptr = (const pcre_uchar *)pattern + skip Line 8242  ptr = (const pcre_uchar *)pattern + skip
8242  code = (pcre_uchar *)codestart;  code = (pcre_uchar *)codestart;
8243  *code = OP_BRA;  *code = OP_BRA;
8244  (void)compile_regex(re->options, &code, &ptr, &errorcode, FALSE, FALSE, 0, 0,  (void)compile_regex(re->options, &code, &ptr, &errorcode, FALSE, FALSE, 0, 0,
8245    &firstchar, &reqchar, NULL, cd, NULL);    &firstchar, &firstcharflags, &reqchar, &reqcharflags, NULL, cd, NULL);
8246  re->top_bracket = cd->bracount;  re->top_bracket = cd->bracount;
8247  re->top_backref = cd->top_backref;  re->top_backref = cd->top_backref;
8248  re->max_lookbehind = cd->max_lookbehind;  re->max_lookbehind = cd->max_lookbehind;
8249  re->flags = cd->external_flags | PCRE_MODE;  re->flags = cd->external_flags | PCRE_MODE;
8250    
8251  if (cd->had_accept) reqchar = REQ_NONE;   /* Must disable after (*ACCEPT) */  if (cd->had_accept)
8252      {
8253      reqchar = 0;              /* Must disable after (*ACCEPT) */
8254      reqcharflags = REQ_NONE;
8255      }
8256    
8257  /* If not reached end of pattern on success, there's an excess bracket. */  /* If not reached end of pattern on success, there's an excess bracket. */
8258    
8259  if (errorcode == 0 && *ptr != 0) errorcode = ERR22;  if (errorcode == 0 && *ptr != CHAR_NULL) errorcode = ERR22;
8260    
8261  /* Fill in the terminating state and check for disastrous overflow, but  /* Fill in the terminating state and check for disastrous overflow, but
8262  if debugging, leave the test till after things are printed out. */  if debugging, leave the test till after things are printed out. */
# Line 8025  if debugging, leave the test till after Line 8267  if debugging, leave the test till after
8267  if (code - codestart > length) errorcode = ERR23;  if (code - codestart > length) errorcode = ERR23;
8268  #endif  #endif
8269    
8270    #ifdef SUPPORT_VALGRIND
8271    /* If the estimated length exceeds the really used length, mark the extra
8272    allocated memory as unaddressable, so that any out-of-bound reads can be
8273    detected. */
8274    VALGRIND_MAKE_MEM_NOACCESS(code, (length - (code - codestart)) * sizeof(pcre_uchar));
8275    #endif
8276    
8277  /* Fill in any forward references that are required. There may be repeated  /* Fill in any forward references that are required. There may be repeated
8278  references; optimize for them, as searching a large regex takes time. */  references; optimize for them, as searching a large regex takes time. */
8279    
# Line 8048  if (cd->hwm > cd->start_workspace) Line 8297  if (cd->hwm > cd->start_workspace)
8297      }      }
8298    }    }
8299    
8300  /* If the workspace had to be expanded, free the new memory. */  /* If the workspace had to be expanded, free the new memory. Set the pointer to
8301    NULL to indicate that forward references have been filled in. */
8302    
8303  if (cd->workspace_size > COMPILE_WORK_SIZE)  if (cd->workspace_size > COMPILE_WORK_SIZE)
8304    (PUBL(free))((void *)cd->start_workspace);    (PUBL(free))((void *)cd->start_workspace);
8305    cd->start_workspace = NULL;
8306    
8307  /* Give an error if there's back reference to a non-existent capturing  /* Give an error if there's back reference to a non-existent capturing
8308  subpattern. */  subpattern. */
# Line 8060  if (errorcode == 0 && re->top_backref > Line 8311  if (errorcode == 0 && re->top_backref >
8311    
8312  /* If there were any lookbehind assertions that contained OP_RECURSE  /* If there were any lookbehind assertions that contained OP_RECURSE
8313  (recursions or subroutine calls), a flag is set for them to be checked here,  (recursions or subroutine calls), a flag is set for them to be checked here,
8314  because they may contain forward references. Actual recursions can't be fixed  because they may contain forward references. Actual recursions cannot be fixed
8315  length, but subroutine calls can. It is done like this so that those without  length, but subroutine calls can. It is done like this so that those without
8316  OP_RECURSE that are not fixed length get a diagnosic with a useful offset. The  OP_RECURSE that are not fixed length get a diagnosic with a useful offset. The
8317  exceptional ones forgo this. We scan the pattern to check that they are fixed  exceptional ones forgo this. We scan the pattern to check that they are fixed
# Line 8131  if ((re->options & PCRE_ANCHORED) == 0) Line 8382  if ((re->options & PCRE_ANCHORED) == 0)
8382    if (is_anchored(codestart, 0, cd, 0)) re->options |= PCRE_ANCHORED;    if (is_anchored(codestart, 0, cd, 0)) re->options |= PCRE_ANCHORED;
8383    else    else
8384      {      {
8385      if (firstchar < 0)      if (firstcharflags < 0)
8386        firstchar = find_firstassertedchar(codestart, FALSE);        firstchar = find_firstassertedchar(codestart, &firstcharflags, FALSE);
8387      if (firstchar >= 0)   /* Remove caseless flag for non-caseable chars */      if (firstcharflags >= 0)   /* Remove caseless flag for non-caseable chars */
8388        {        {
8389  #if defined COMPILE_PCRE8  #if defined COMPILE_PCRE8
8390        re->first_char = firstchar & 0xff;        re->first_char = firstchar & 0xff;
8391  #elif defined COMPILE_PCRE16  #elif defined COMPILE_PCRE16
8392        re->first_char = firstchar & 0xffff;        re->first_char = firstchar & 0xffff;
8393  #elif defined COMPILE_PCRE32  #elif defined COMPILE_PCRE32
8394        re->first_char = firstchar & ~REQ_MASK;        re->first_char = firstchar;
8395  #endif  #endif
8396        if ((firstchar & REQ_CASELESS) != 0)        if ((firstcharflags & REQ_CASELESS) != 0)
8397          {          {
8398  #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)  #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
8399          /* We ignore non-ASCII first chars in 8 bit mode. */          /* We ignore non-ASCII first chars in 8 bit mode. */
# Line 8174  if ((re->options & PCRE_ANCHORED) == 0) Line 8425  if ((re->options & PCRE_ANCHORED) == 0)
8425  variable length item in the regex. Remove the caseless flag for non-caseable  variable length item in the regex. Remove the caseless flag for non-caseable
8426  bytes. */  bytes. */
8427    
8428  if (reqchar >= 0 &&  if (reqcharflags >= 0 &&
8429       ((re->options & PCRE_ANCHORED) == 0 || (reqchar & REQ_VARY) != 0))       ((re->options & PCRE_ANCHORED) == 0 || (reqcharflags & REQ_VARY) != 0))
8430    {    {
8431  #if defined COMPILE_PCRE8  #if defined COMPILE_PCRE8
8432    re->req_char = reqchar & 0xff;    re->req_char = reqchar & 0xff;
8433  #elif defined COMPILE_PCRE16  #elif defined COMPILE_PCRE16
8434    re->req_char = reqchar & 0xffff;    re->req_char = reqchar & 0xffff;
8435  #elif defined COMPILE_PCRE32  #elif defined COMPILE_PCRE32
8436    re->req_char = reqchar & ~REQ_MASK;    re->req_char = reqchar;
8437  #endif  #endif
8438    if ((reqchar & REQ_CASELESS) != 0)    if ((reqcharflags & REQ_CASELESS) != 0)
8439      {      {
8440  #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)  #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
8441      /* We ignore non-ASCII first chars in 8 bit mode. */      /* We ignore non-ASCII first chars in 8 bit mode. */
# Line 8255  if (code - codestart > length) Line 8506  if (code - codestart > length)
8506    }    }
8507  #endif   /* PCRE_DEBUG */  #endif   /* PCRE_DEBUG */
8508    
8509    /* Check for a pattern than can match an empty string, so that this information
8510    can be provided to applications. */
8511    
8512    do
8513      {
8514      if (could_be_empty_branch(codestart, code, utf, cd, NULL))
8515        {
8516        re->flags |= PCRE_MATCH_EMPTY;
8517        break;
8518        }
8519      codestart += GET(codestart, 1);
8520      }
8521    while (*codestart == OP_ALT);
8522    
8523  #if defined COMPILE_PCRE8  #if defined COMPILE_PCRE8
8524  return (pcre *)re;  return (pcre *)re;
8525  #elif defined COMPILE_PCRE16  #elif defined COMPILE_PCRE16

Legend:
Removed from v.1071  
changed lines
  Added in v.1348

  ViewVC Help
Powered by ViewVC 1.1.5