/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1133 by chpe, Thu Oct 18 18:35:30 2012 UTC revision 1348 by ph10, Fri Jul 5 10:38:37 2013 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2012 University of Cambridge             Copyright (c) 1997-2013 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 80  to check them every time. */ Line 80  to check them every time. */
80  /* Definitions to allow mutual recursion */  /* Definitions to allow mutual recursion */
81    
82  static int  static int
83    add_list_to_class(pcre_uint8 *, pcre_uchar **, int, compile_data *,    add_list_to_class(pcre_uint8 *, pcre_uchar **, int, compile_data *,
84      const pcre_uint32 *, unsigned int);      const pcre_uint32 *, unsigned int);
85    
86  static BOOL  static BOOL
87    compile_regex(int, pcre_uchar **, const pcre_uchar **, int *, BOOL, BOOL, int, int,    compile_regex(int, pcre_uchar **, const pcre_uchar **, int *, BOOL, BOOL, int, int,
88      pcre_uint32 *, pcre_int32 *, pcre_uint32 *, pcre_int32 *, branch_chain *,      pcre_uint32 *, pcre_int32 *, pcre_uint32 *, pcre_int32 *, branch_chain *,
89      compile_data *, int *);      compile_data *, int *);
90    
91    
# Line 487  static const char error_texts[] = Line 487  static const char error_texts[] =
487    "a numbered reference must not be zero\0"    "a numbered reference must not be zero\0"
488    "an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)\0"    "an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)\0"
489    /* 60 */    /* 60 */
490    "(*VERB) not recognized\0"    "(*VERB) not recognized or malformed\0"
491    "number is too big\0"    "number is too big\0"
492    "subpattern name expected\0"    "subpattern name expected\0"
493    "digit expected after (?+\0"    "digit expected after (?+\0"
# Line 508  static const char error_texts[] = Line 508  static const char error_texts[] =
508    "name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0"    "name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0"
509    "character value in \\u.... sequence is too large\0"    "character value in \\u.... sequence is too large\0"
510    "invalid UTF-32 string\0"    "invalid UTF-32 string\0"
511      "setting UTF is disabled by the application\0"
512    ;    ;
513    
514  /* Table to identify digits and hex digits. This is used when compiling  /* Table to identify digits and hex digits. This is used when compiling
# Line 667  find_error_text(int n) Line 668  find_error_text(int n)
668  const char *s = error_texts;  const char *s = error_texts;
669  for (; n > 0; n--)  for (; n > 0; n--)
670    {    {
671    while (*s++ != 0) {};    while (*s++ != CHAR_NULL) {};
672    if (*s == 0) return "Error text not found (please report)";    if (*s == CHAR_NULL) return "Error text not found (please report)";
673    }    }
674  return s;  return s;
675  }  }
# Line 773  Returns:         zero => a data characte Line 774  Returns:         zero => a data characte
774  */  */
775    
776  static int  static int
777  check_escape(const pcre_uchar **ptrptr, pcre_uint32 *chptr, int *errorcodeptr,  check_escape(const pcre_uchar **ptrptr, pcre_uint32 *chptr, int *errorcodeptr,
778    int bracount, int options, BOOL isclass)    int bracount, int options, BOOL isclass)
779  {  {
780  /* PCRE_UTF16 has the same value as PCRE_UTF8. */  /* PCRE_UTF16 has the same value as PCRE_UTF8. */
# Line 788  ptr--;                            /* Set Line 789  ptr--;                            /* Set
789    
790  /* If backslash is at the end of the pattern, it's an error. */  /* If backslash is at the end of the pattern, it's an error. */
791    
792  if (c == 0) *errorcodeptr = ERR1;  if (c == CHAR_NULL) *errorcodeptr = ERR1;
793    
794  /* Non-alphanumerics are literals. For digits or letters, do an initial lookup  /* Non-alphanumerics are literals. For digits or letters, do an initial lookup
795  in a table. A non-zero result is something that can be returned immediately.  in a table. A non-zero result is something that can be returned immediately.
# Line 797  Otherwise further processing may be requ Line 798  Otherwise further processing may be requ
798  #ifndef EBCDIC  /* ASCII/UTF-8 coding */  #ifndef EBCDIC  /* ASCII/UTF-8 coding */
799  /* Not alphanumeric */  /* Not alphanumeric */
800  else if (c < CHAR_0 || c > CHAR_z) {}  else if (c < CHAR_0 || c > CHAR_z) {}
801  else if ((i = escapes[c - CHAR_0]) != 0) { if (i > 0) c = (pcre_uint32)i; else escape = -i; }  else if ((i = escapes[c - CHAR_0]) != 0)
802      { if (i > 0) c = (pcre_uint32)i; else escape = -i; }
803    
804  #else           /* EBCDIC coding */  #else           /* EBCDIC coding */
805  /* Not alphanumeric */  /* Not alphanumeric */
# Line 847  else Line 849  else
849            }            }
850    
851  #if defined COMPILE_PCRE8  #if defined COMPILE_PCRE8
852          if (c > (utf ? 0x10ffff : 0xff))          if (c > (utf ? 0x10ffffU : 0xffU))
853  #elif defined COMPILE_PCRE16  #elif defined COMPILE_PCRE16
854          if (c > (utf ? 0x10ffff : 0xffff))          if (c > (utf ? 0x10ffffU : 0xffffU))
855  #elif defined COMPILE_PCRE32  #elif defined COMPILE_PCRE32
856          if (utf && c > 0x10ffff)          if (utf && c > 0x10ffffU)
857  #endif  #endif
858            {            {
859            *errorcodeptr = ERR76;            *errorcodeptr = ERR76;
# Line 898  else Line 900  else
900      if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)      if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)
901        {        {
902        const pcre_uchar *p;        const pcre_uchar *p;
903        for (p = ptr+2; *p != 0 && *p != CHAR_RIGHT_CURLY_BRACKET; p++)        for (p = ptr+2; *p != CHAR_NULL && *p != CHAR_RIGHT_CURLY_BRACKET; p++)
904          if (*p != CHAR_MINUS && !IS_DIGIT(*p)) break;          if (*p != CHAR_MINUS && !IS_DIGIT(*p)) break;
905        if (*p != 0 && *p != CHAR_RIGHT_CURLY_BRACKET)        if (*p != CHAR_NULL && *p != CHAR_RIGHT_CURLY_BRACKET)
906          {          {
907          escape = ESC_k;          escape = ESC_k;
908          break;          break;
# Line 1085  else Line 1087  else
1087  #endif  #endif
1088    
1089  #if defined COMPILE_PCRE8  #if defined COMPILE_PCRE8
1090          if (c > (utf ? 0x10ffff : 0xff)) { overflow = TRUE; break; }          if (c > (utf ? 0x10ffffU : 0xffU)) { overflow = TRUE; break; }
1091  #elif defined COMPILE_PCRE16  #elif defined COMPILE_PCRE16
1092          if (c > (utf ? 0x10ffff : 0xffff)) { overflow = TRUE; break; }          if (c > (utf ? 0x10ffffU : 0xffffU)) { overflow = TRUE; break; }
1093  #elif defined COMPILE_PCRE32  #elif defined COMPILE_PCRE32
1094          if (utf && c > 0x10ffff) { overflow = TRUE; break; }          if (utf && c > 0x10ffffU) { overflow = TRUE; break; }
1095  #endif  #endif
1096          }          }
1097    
# Line 1134  else Line 1136  else
1136    
1137      case CHAR_c:      case CHAR_c:
1138      c = *(++ptr);      c = *(++ptr);
1139      if (c == 0)      if (c == CHAR_NULL)
1140        {        {
1141        *errorcodeptr = ERR2;        *errorcodeptr = ERR2;
1142        break;        break;
# Line 1211  Returns:         TRUE if the type value Line 1213  Returns:         TRUE if the type value
1213  */  */
1214    
1215  static BOOL  static BOOL
1216  get_ucp(const pcre_uchar **ptrptr, BOOL *negptr, unsigned int *ptypeptr,  get_ucp(const pcre_uchar **ptrptr, BOOL *negptr, unsigned int *ptypeptr,
1217    unsigned int *pdataptr, int *errorcodeptr)    unsigned int *pdataptr, int *errorcodeptr)
1218  {  {
1219  pcre_uchar c;  pcre_uchar c;
# Line 1220  const pcre_uchar *ptr = *ptrptr; Line 1222  const pcre_uchar *ptr = *ptrptr;
1222  pcre_uchar name[32];  pcre_uchar name[32];
1223    
1224  c = *(++ptr);  c = *(++ptr);
1225  if (c == 0) goto ERROR_RETURN;  if (c == CHAR_NULL) goto ERROR_RETURN;
1226    
1227  *negptr = FALSE;  *negptr = FALSE;
1228    
# Line 1237  if (c == CHAR_LEFT_CURLY_BRACKET) Line 1239  if (c == CHAR_LEFT_CURLY_BRACKET)
1239    for (i = 0; i < (int)(sizeof(name) / sizeof(pcre_uchar)) - 1; i++)    for (i = 0; i < (int)(sizeof(name) / sizeof(pcre_uchar)) - 1; i++)
1240      {      {
1241      c = *(++ptr);      c = *(++ptr);
1242      if (c == 0) goto ERROR_RETURN;      if (c == CHAR_NULL) goto ERROR_RETURN;
1243      if (c == CHAR_RIGHT_CURLY_BRACKET) break;      if (c == CHAR_RIGHT_CURLY_BRACKET) break;
1244      name[i] = c;      name[i] = c;
1245      }      }
# Line 1408  if (ptr[0] == CHAR_LEFT_PARENTHESIS) Line 1410  if (ptr[0] == CHAR_LEFT_PARENTHESIS)
1410    {    {
1411    /* Handle specials such as (*SKIP) or (*UTF8) etc. */    /* Handle specials such as (*SKIP) or (*UTF8) etc. */
1412    
1413    if (ptr[1] == CHAR_ASTERISK) ptr += 2;    if (ptr[1] == CHAR_ASTERISK)
1414        {
1415        ptr += 2;
1416        while (ptr < cd->end_pattern && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
1417        }
1418    
1419    /* Handle a normal, unnamed capturing parenthesis. */    /* Handle a normal, unnamed capturing parenthesis. */
1420    
# Line 1432  if (ptr[0] == CHAR_LEFT_PARENTHESIS) Line 1438  if (ptr[0] == CHAR_LEFT_PARENTHESIS)
1438    
1439    else if (ptr[2] == CHAR_NUMBER_SIGN)    else if (ptr[2] == CHAR_NUMBER_SIGN)
1440      {      {
1441      for (ptr += 3; *ptr != 0; ptr++) if (*ptr == CHAR_RIGHT_PARENTHESIS) break;      for (ptr += 3; *ptr != CHAR_NULL; ptr++)
1442          if (*ptr == CHAR_RIGHT_PARENTHESIS) break;
1443      goto FAIL_EXIT;      goto FAIL_EXIT;
1444      }      }
1445    
# Line 1445  if (ptr[0] == CHAR_LEFT_PARENTHESIS) Line 1452  if (ptr[0] == CHAR_LEFT_PARENTHESIS)
1452      ptr += 2;      ptr += 2;
1453      if (ptr[1] != CHAR_QUESTION_MARK)      if (ptr[1] != CHAR_QUESTION_MARK)
1454        {        {
1455        while (*ptr != 0 && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;        while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
1456        if (*ptr != 0) ptr++;        if (*ptr != CHAR_NULL) ptr++;
1457        }        }
1458      }      }
1459    
# Line 1489  for (; ptr < cd->end_pattern; ptr++) Line 1496  for (; ptr < cd->end_pattern; ptr++)
1496    
1497    if (*ptr == CHAR_BACKSLASH)    if (*ptr == CHAR_BACKSLASH)
1498      {      {
1499      if (*(++ptr) == 0) goto FAIL_EXIT;      if (*(++ptr) == CHAR_NULL) goto FAIL_EXIT;
1500      if (*ptr == CHAR_Q) for (;;)      if (*ptr == CHAR_Q) for (;;)
1501        {        {
1502        while (*(++ptr) != 0 && *ptr != CHAR_BACKSLASH) {};        while (*(++ptr) != CHAR_NULL && *ptr != CHAR_BACKSLASH) {};
1503        if (*ptr == 0) goto FAIL_EXIT;        if (*ptr == CHAR_NULL) goto FAIL_EXIT;
1504        if (*(++ptr) == CHAR_E) break;        if (*(++ptr) == CHAR_E) break;
1505        }        }
1506      continue;      continue;
# Line 1537  for (; ptr < cd->end_pattern; ptr++) Line 1544  for (; ptr < cd->end_pattern; ptr++)
1544    
1545      while (*(++ptr) != CHAR_RIGHT_SQUARE_BRACKET)      while (*(++ptr) != CHAR_RIGHT_SQUARE_BRACKET)
1546        {        {
1547        if (*ptr == 0) return -1;        if (*ptr == CHAR_NULL) return -1;
1548        if (*ptr == CHAR_BACKSLASH)        if (*ptr == CHAR_BACKSLASH)
1549          {          {
1550          if (*(++ptr) == 0) goto FAIL_EXIT;          if (*(++ptr) == CHAR_NULL) goto FAIL_EXIT;
1551          if (*ptr == CHAR_Q) for (;;)          if (*ptr == CHAR_Q) for (;;)
1552            {            {
1553            while (*(++ptr) != 0 && *ptr != CHAR_BACKSLASH) {};            while (*(++ptr) != CHAR_NULL && *ptr != CHAR_BACKSLASH) {};
1554            if (*ptr == 0) goto FAIL_EXIT;            if (*ptr == CHAR_NULL) goto FAIL_EXIT;
1555            if (*(++ptr) == CHAR_E) break;            if (*(++ptr) == CHAR_E) break;
1556            }            }
1557          continue;          continue;
# Line 1558  for (; ptr < cd->end_pattern; ptr++) Line 1565  for (; ptr < cd->end_pattern; ptr++)
1565    if (xmode && *ptr == CHAR_NUMBER_SIGN)    if (xmode && *ptr == CHAR_NUMBER_SIGN)
1566      {      {
1567      ptr++;      ptr++;
1568      while (*ptr != 0)      while (*ptr != CHAR_NULL)
1569        {        {
1570        if (IS_NEWLINE(ptr)) { ptr += cd->nllen - 1; break; }        if (IS_NEWLINE(ptr)) { ptr += cd->nllen - 1; break; }
1571        ptr++;        ptr++;
# Line 1566  for (; ptr < cd->end_pattern; ptr++) Line 1573  for (; ptr < cd->end_pattern; ptr++)
1573        if (utf) FORWARDCHAR(ptr);        if (utf) FORWARDCHAR(ptr);
1574  #endif  #endif
1575        }        }
1576      if (*ptr == 0) goto FAIL_EXIT;      if (*ptr == CHAR_NULL) goto FAIL_EXIT;
1577      continue;      continue;
1578      }      }
1579    
# Line 1576  for (; ptr < cd->end_pattern; ptr++) Line 1583  for (; ptr < cd->end_pattern; ptr++)
1583      {      {
1584      int rc = find_parens_sub(&ptr, cd, name, lorn, xmode, utf, count);      int rc = find_parens_sub(&ptr, cd, name, lorn, xmode, utf, count);
1585      if (rc > 0) return rc;      if (rc > 0) return rc;
1586      if (*ptr == 0) goto FAIL_EXIT;      if (*ptr == CHAR_NULL) goto FAIL_EXIT;
1587      }      }
1588    
1589    else if (*ptr == CHAR_RIGHT_PARENTHESIS)    else if (*ptr == CHAR_RIGHT_PARENTHESIS)
# Line 1641  matching closing parens. That is why we Line 1648  matching closing parens. That is why we
1648  for (;;)  for (;;)
1649    {    {
1650    rc = find_parens_sub(&ptr, cd, name, lorn, xmode, utf, &count);    rc = find_parens_sub(&ptr, cd, name, lorn, xmode, utf, &count);
1651    if (rc > 0 || *ptr++ == 0) break;    if (rc > 0 || *ptr++ == CHAR_NULL) break;
1652    }    }
1653    
1654  return rc;  return rc;
# Line 1878  for (;;) Line 1885  for (;;)
1885    
1886      case OP_TYPEEXACT:      case OP_TYPEEXACT:
1887      branchlength += GET2(cc,1);      branchlength += GET2(cc,1);
1888      if (cc[1 + IMM2_SIZE] == OP_PROP || cc[1 + IMM2_SIZE] == OP_NOTPROP)      if (cc[1 + IMM2_SIZE] == OP_PROP || cc[1 + IMM2_SIZE] == OP_NOTPROP)
1889        cc += 2;        cc += 2;
1890      cc += 1 + IMM2_SIZE + 1;      cc += 1 + IMM2_SIZE + 1;
1891      break;      break;
# Line 1914  for (;;) Line 1921  for (;;)
1921    
1922      /* Check a class for variable quantification */      /* Check a class for variable quantification */
1923    
 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32  
     case OP_XCLASS:  
     cc += GET(cc, 1) - PRIV(OP_lengths)[OP_CLASS];  
     /* Fall through */  
 #endif  
   
1924      case OP_CLASS:      case OP_CLASS:
1925      case OP_NCLASS:      case OP_NCLASS:
1926    #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1927        case OP_XCLASS:
1928        /* The original code caused an unsigned overflow in 64 bit systems,
1929        so now we use a conditional statement. */
1930        if (op == OP_XCLASS)
1931          cc += GET(cc, 1);
1932        else
1933          cc += PRIV(OP_lengths)[OP_CLASS];
1934    #else
1935      cc += PRIV(OP_lengths)[OP_CLASS];      cc += PRIV(OP_lengths)[OP_CLASS];
1936    #endif
1937    
1938      switch (*cc)      switch (*cc)
1939        {        {
# Line 2117  for (;;) Line 2128  for (;;)
2128        case OP_TYPEMINUPTO:        case OP_TYPEMINUPTO:
2129        case OP_TYPEEXACT:        case OP_TYPEEXACT:
2130        case OP_TYPEPOSUPTO:        case OP_TYPEPOSUPTO:
2131        if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)        if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
2132          code += 2;          code += 2;
2133        break;        break;
2134    
2135        case OP_MARK:        case OP_MARK:
2136        case OP_PRUNE_ARG:        case OP_PRUNE_ARG:
2137        case OP_SKIP_ARG:        case OP_SKIP_ARG:
       code += code[1];  
       break;  
   
2138        case OP_THEN_ARG:        case OP_THEN_ARG:
2139        code += code[1];        code += code[1];
2140        break;        break;
# Line 2237  for (;;) Line 2245  for (;;)
2245        case OP_TYPEUPTO:        case OP_TYPEUPTO:
2246        case OP_TYPEMINUPTO:        case OP_TYPEMINUPTO:
2247        case OP_TYPEEXACT:        case OP_TYPEEXACT:
2248        if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)        if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
2249          code += 2;          code += 2;
2250        break;        break;
2251    
2252        case OP_MARK:        case OP_MARK:
2253        case OP_PRUNE_ARG:        case OP_PRUNE_ARG:
2254        case OP_SKIP_ARG:        case OP_SKIP_ARG:
       code += code[1];  
       break;  
   
2255        case OP_THEN_ARG:        case OP_THEN_ARG:
2256        code += code[1];        code += code[1];
2257        break;        break;
# Line 2348  Arguments: Line 2353  Arguments:
2353    endcode     points to where to stop    endcode     points to where to stop
2354    utf         TRUE if in UTF-8 / UTF-16 / UTF-32 mode    utf         TRUE if in UTF-8 / UTF-16 / UTF-32 mode
2355    cd          contains pointers to tables etc.    cd          contains pointers to tables etc.
2356      recurses    chain of recurse_check to catch mutual recursion
2357    
2358  Returns:      TRUE if what is matched could be empty  Returns:      TRUE if what is matched could be empty
2359  */  */
2360    
2361    typedef struct recurse_check {
2362      struct recurse_check *prev;
2363      const pcre_uchar *group;
2364    } recurse_check;
2365    
2366  static BOOL  static BOOL
2367  could_be_empty_branch(const pcre_uchar *code, const pcre_uchar *endcode,  could_be_empty_branch(const pcre_uchar *code, const pcre_uchar *endcode,
2368    BOOL utf, compile_data *cd)    BOOL utf, compile_data *cd, recurse_check *recurses)
2369  {  {
2370  register pcre_uchar c;  register pcre_uchar c;
2371    recurse_check this_recurse;
2372    
2373  for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);  for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
2374       code < endcode;       code < endcode;
2375       code = first_significant_code(code + PRIV(OP_lengths)[c], TRUE))       code = first_significant_code(code + PRIV(OP_lengths)[c], TRUE))
# Line 2364  for (code = first_significant_code(code Line 2377  for (code = first_significant_code(code
2377    const pcre_uchar *ccode;    const pcre_uchar *ccode;
2378    
2379    c = *code;    c = *code;
2380    
2381    /* Skip over forward assertions; the other assertions are skipped by    /* Skip over forward assertions; the other assertions are skipped by
2382    first_significant_code() with a TRUE final argument. */    first_significant_code() with a TRUE final argument. */
2383    
# Line 2384  for (code = first_significant_code(code Line 2397  for (code = first_significant_code(code
2397    
2398    if (c == OP_RECURSE)    if (c == OP_RECURSE)
2399      {      {
2400      const pcre_uchar *scode;      const pcre_uchar *scode = cd->start_code + GET(code, 1);
2401      BOOL empty_branch;      BOOL empty_branch;
2402    
2403      /* Test for forward reference */      /* Test for forward reference or uncompleted reference. This is disabled
2404        when called to scan a completed pattern by setting cd->start_workspace to
2405      for (scode = cd->start_workspace; scode < cd->hwm; scode += LINK_SIZE)      NULL. */
2406        if ((int)GET(scode, 0) == (int)(code + 1 - cd->start_code)) return TRUE;  
2407        if (cd->start_workspace != NULL)
2408          {
2409          const pcre_uchar *tcode;
2410          for (tcode = cd->start_workspace; tcode < cd->hwm; tcode += LINK_SIZE)
2411            if ((int)GET(tcode, 0) == (int)(code + 1 - cd->start_code)) return TRUE;
2412          if (GET(scode, 1) == 0) return TRUE;    /* Unclosed */
2413          }
2414    
2415        /* If we are scanning a completed pattern, there are no forward references
2416        and all groups are complete. We need to detect whether this is a recursive
2417        call, as otherwise there will be an infinite loop. If it is a recursion,
2418        just skip over it. Simple recursions are easily detected. For mutual
2419        recursions we keep a chain on the stack. */
2420    
2421        else
2422          {
2423          recurse_check *r = recurses;
2424          const pcre_uchar *endgroup = scode;
2425    
2426          do endgroup += GET(endgroup, 1); while (*endgroup == OP_ALT);
2427          if (code >= scode && code <= endgroup) continue;  /* Simple recursion */
2428    
2429          for (r = recurses; r != NULL; r = r->prev)
2430            if (r->group == scode) break;
2431          if (r != NULL) continue;   /* Mutual recursion */
2432          }
2433    
2434      /* Not a forward reference, test for completed backward reference */      /* Completed reference; scan the referenced group, remembering it on the
2435        stack chain to detect mutual recursions. */
2436    
2437      empty_branch = FALSE;      empty_branch = FALSE;
2438      scode = cd->start_code + GET(code, 1);      this_recurse.prev = recurses;
2439      if (GET(scode, 1) == 0) return TRUE;    /* Unclosed */      this_recurse.group = scode;
2440    
     /* Completed backwards reference */  
   
2441      do      do
2442        {        {
2443        if (could_be_empty_branch(scode, endcode, utf, cd))        if (could_be_empty_branch(scode, endcode, utf, cd, &this_recurse))
2444          {          {
2445          empty_branch = TRUE;          empty_branch = TRUE;
2446          break;          break;
# Line 2458  for (code = first_significant_code(code Line 2496  for (code = first_significant_code(code
2496        empty_branch = FALSE;        empty_branch = FALSE;
2497        do        do
2498          {          {
2499          if (!empty_branch && could_be_empty_branch(code, endcode, utf, cd))          if (!empty_branch && could_be_empty_branch(code, endcode, utf, cd, NULL))
2500            empty_branch = TRUE;            empty_branch = TRUE;
2501          code += GET(code, 1);          code += GET(code, 1);
2502          }          }
# Line 2516  for (code = first_significant_code(code Line 2554  for (code = first_significant_code(code
2554    
2555      /* Opcodes that must match a character */      /* Opcodes that must match a character */
2556    
2557        case OP_ANY:
2558        case OP_ALLANY:
2559        case OP_ANYBYTE:
2560    
2561      case OP_PROP:      case OP_PROP:
2562      case OP_NOTPROP:      case OP_NOTPROP:
2563        case OP_ANYNL:
2564    
2565        case OP_NOT_HSPACE:
2566        case OP_HSPACE:
2567        case OP_NOT_VSPACE:
2568        case OP_VSPACE:
2569      case OP_EXTUNI:      case OP_EXTUNI:
2570    
2571      case OP_NOT_DIGIT:      case OP_NOT_DIGIT:
2572      case OP_DIGIT:      case OP_DIGIT:
2573      case OP_NOT_WHITESPACE:      case OP_NOT_WHITESPACE:
2574      case OP_WHITESPACE:      case OP_WHITESPACE:
2575      case OP_NOT_WORDCHAR:      case OP_NOT_WORDCHAR:
2576      case OP_WORDCHAR:      case OP_WORDCHAR:
2577      case OP_ANY:  
     case OP_ALLANY:  
     case OP_ANYBYTE:  
2578      case OP_CHAR:      case OP_CHAR:
2579      case OP_CHARI:      case OP_CHARI:
2580      case OP_NOT:      case OP_NOT:
2581      case OP_NOTI:      case OP_NOTI:
2582    
2583      case OP_PLUS:      case OP_PLUS:
2584        case OP_PLUSI:
2585      case OP_MINPLUS:      case OP_MINPLUS:
2586      case OP_POSPLUS:      case OP_MINPLUSI:
2587      case OP_EXACT:  
2588      case OP_NOTPLUS:      case OP_NOTPLUS:
2589        case OP_NOTPLUSI:
2590      case OP_NOTMINPLUS:      case OP_NOTMINPLUS:
2591        case OP_NOTMINPLUSI:
2592    
2593        case OP_POSPLUS:
2594        case OP_POSPLUSI:
2595      case OP_NOTPOSPLUS:      case OP_NOTPOSPLUS:
2596        case OP_NOTPOSPLUSI:
2597    
2598        case OP_EXACT:
2599        case OP_EXACTI:
2600      case OP_NOTEXACT:      case OP_NOTEXACT:
2601        case OP_NOTEXACTI:
2602    
2603      case OP_TYPEPLUS:      case OP_TYPEPLUS:
2604      case OP_TYPEMINPLUS:      case OP_TYPEMINPLUS:
2605      case OP_TYPEPOSPLUS:      case OP_TYPEPOSPLUS:
2606      case OP_TYPEEXACT:      case OP_TYPEEXACT:
2607    
2608      return FALSE;      return FALSE;
2609    
2610      /* These are going to continue, as they may be empty, but we have to      /* These are going to continue, as they may be empty, but we have to
# Line 2563  for (code = first_significant_code(code Line 2624  for (code = first_significant_code(code
2624      case OP_TYPEUPTO:      case OP_TYPEUPTO:
2625      case OP_TYPEMINUPTO:      case OP_TYPEMINUPTO:
2626      case OP_TYPEPOSUPTO:      case OP_TYPEPOSUPTO:
2627      if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)      if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
2628        code += 2;        code += 2;
2629      break;      break;
2630    
# Line 2577  for (code = first_significant_code(code Line 2638  for (code = first_significant_code(code
2638      return TRUE;      return TRUE;
2639    
2640      /* In UTF-8 mode, STAR, MINSTAR, POSSTAR, QUERY, MINQUERY, POSQUERY, UPTO,      /* In UTF-8 mode, STAR, MINSTAR, POSSTAR, QUERY, MINQUERY, POSQUERY, UPTO,
2641      MINUPTO, and POSUPTO may be followed by a multibyte character */      MINUPTO, and POSUPTO and their caseless and negative versions may be
2642        followed by a multibyte character. */
2643    
2644  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2645      case OP_STAR:      case OP_STAR:
2646      case OP_STARI:      case OP_STARI:
2647        case OP_NOTSTAR:
2648        case OP_NOTSTARI:
2649    
2650      case OP_MINSTAR:      case OP_MINSTAR:
2651      case OP_MINSTARI:      case OP_MINSTARI:
2652        case OP_NOTMINSTAR:
2653        case OP_NOTMINSTARI:
2654    
2655      case OP_POSSTAR:      case OP_POSSTAR:
2656      case OP_POSSTARI:      case OP_POSSTARI:
2657        case OP_NOTPOSSTAR:
2658        case OP_NOTPOSSTARI:
2659    
2660      case OP_QUERY:      case OP_QUERY:
2661      case OP_QUERYI:      case OP_QUERYI:
2662        case OP_NOTQUERY:
2663        case OP_NOTQUERYI:
2664    
2665      case OP_MINQUERY:      case OP_MINQUERY:
2666      case OP_MINQUERYI:      case OP_MINQUERYI:
2667        case OP_NOTMINQUERY:
2668        case OP_NOTMINQUERYI:
2669    
2670      case OP_POSQUERY:      case OP_POSQUERY:
2671      case OP_POSQUERYI:      case OP_POSQUERYI:
2672        case OP_NOTPOSQUERY:
2673        case OP_NOTPOSQUERYI:
2674    
2675      if (utf && HAS_EXTRALEN(code[1])) code += GET_EXTRALEN(code[1]);      if (utf && HAS_EXTRALEN(code[1])) code += GET_EXTRALEN(code[1]);
2676      break;      break;
2677    
2678      case OP_UPTO:      case OP_UPTO:
2679      case OP_UPTOI:      case OP_UPTOI:
2680        case OP_NOTUPTO:
2681        case OP_NOTUPTOI:
2682    
2683      case OP_MINUPTO:      case OP_MINUPTO:
2684      case OP_MINUPTOI:      case OP_MINUPTOI:
2685        case OP_NOTMINUPTO:
2686        case OP_NOTMINUPTOI:
2687    
2688      case OP_POSUPTO:      case OP_POSUPTO:
2689      case OP_POSUPTOI:      case OP_POSUPTOI:
2690        case OP_NOTPOSUPTO:
2691        case OP_NOTPOSUPTOI:
2692    
2693      if (utf && HAS_EXTRALEN(code[1 + IMM2_SIZE])) code += GET_EXTRALEN(code[1 + IMM2_SIZE]);      if (utf && HAS_EXTRALEN(code[1 + IMM2_SIZE])) code += GET_EXTRALEN(code[1 + IMM2_SIZE]);
2694      break;      break;
2695  #endif  #endif
# Line 2611  for (code = first_significant_code(code Line 2700  for (code = first_significant_code(code
2700      case OP_MARK:      case OP_MARK:
2701      case OP_PRUNE_ARG:      case OP_PRUNE_ARG:
2702      case OP_SKIP_ARG:      case OP_SKIP_ARG:
     code += code[1];  
     break;  
   
2703      case OP_THEN_ARG:      case OP_THEN_ARG:
2704      code += code[1];      code += code[1];
2705      break;      break;
# Line 2657  could_be_empty(const pcre_uchar *code, c Line 2743  could_be_empty(const pcre_uchar *code, c
2743  {  {
2744  while (bcptr != NULL && bcptr->current_branch >= code)  while (bcptr != NULL && bcptr->current_branch >= code)
2745    {    {
2746    if (!could_be_empty_branch(bcptr->current_branch, endcode, utf, cd))    if (!could_be_empty_branch(bcptr->current_branch, endcode, utf, cd, NULL))
2747      return FALSE;      return FALSE;
2748    bcptr = bcptr->outer;    bcptr = bcptr->outer;
2749    }    }
# Line 2713  check_posix_syntax(const pcre_uchar *ptr Line 2799  check_posix_syntax(const pcre_uchar *ptr
2799  {  {
2800  pcre_uchar terminator;          /* Don't combine these lines; the Solaris cc */  pcre_uchar terminator;          /* Don't combine these lines; the Solaris cc */
2801  terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */  terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */
2802  for (++ptr; *ptr != 0; ptr++)  for (++ptr; *ptr != CHAR_NULL; ptr++)
2803    {    {
2804    if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)    if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
2805      ptr++;      ptr++;
# Line 2898  PUT(previous_callout, 2 + LINK_SIZE, len Line 2984  PUT(previous_callout, 2 + LINK_SIZE, len
2984  /* This function is passed the start and end of a class range, in UTF-8 mode  /* This function is passed the start and end of a class range, in UTF-8 mode
2985  with UCP support. It searches up the characters, looking for ranges of  with UCP support. It searches up the characters, looking for ranges of
2986  characters in the "other" case. Each call returns the next one, updating the  characters in the "other" case. Each call returns the next one, updating the
2987  start address. A character with multiple other cases is returned on its own  start address. A character with multiple other cases is returned on its own
2988  with a special return value.  with a special return value.
2989    
2990  Arguments:  Arguments:
# Line 2910  Arguments: Line 2996  Arguments:
2996  Yield:        -1 when no more  Yield:        -1 when no more
2997                 0 when a range is returned                 0 when a range is returned
2998                >0 the CASESET offset for char with multiple other cases                >0 the CASESET offset for char with multiple other cases
2999                  in this case, ocptr contains the original                  in this case, ocptr contains the original
3000  */  */
3001    
3002  static int  static int
# Line 2920  get_othercase_range(pcre_uint32 *cptr, p Line 3006  get_othercase_range(pcre_uint32 *cptr, p
3006  pcre_uint32 c, othercase, next;  pcre_uint32 c, othercase, next;
3007  unsigned int co;  unsigned int co;
3008    
3009  /* Find the first character that has an other case. If it has multiple other  /* Find the first character that has an other case. If it has multiple other
3010  cases, return its case offset value. */  cases, return its case offset value. */
3011    
3012  for (c = *cptr; c <= d; c++)  for (c = *cptr; c <= d; c++)
3013    {    {
3014    if ((co = UCD_CASESET(c)) != 0)    if ((co = UCD_CASESET(c)) != 0)
3015      {      {
3016      *ocptr = c++;   /* Character that has the set */      *ocptr = c++;   /* Character that has the set */
3017      *cptr = c;      /* Rest of input range */      *cptr = c;      /* Rest of input range */
3018      return (int)co;      return (int)co;
3019      }      }
3020    if ((othercase = UCD_OTHERCASE(c)) != c) break;    if ((othercase = UCD_OTHERCASE(c)) != c) break;
3021    }    }
3022    
3023  if (c > d) return -1;  /* Reached end of range */  if (c > d) return -1;  /* Reached end of range */
# Line 3014  switch(ptype) Line 3100  switch(ptype)
3100    return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||    return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
3101            PRIV(ucp_gentype)[prop->chartype] == ucp_N ||            PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
3102            c == CHAR_UNDERSCORE) == negated;            c == CHAR_UNDERSCORE) == negated;
3103    
3104  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3105    case PT_CLIST:    case PT_CLIST:
3106    p = PRIV(ucd_caseless_sets) + prop->caseset;    p = PRIV(ucd_caseless_sets) + prop->caseset;
3107    for (;;)    for (;;)
3108      {      {
3109      if ((unsigned int)c < *p) return !negated;      if (c < *p) return !negated;
3110      if ((unsigned int)c == *p++) return negated;      if (c == *p++) return negated;
3111      }      }
3112    break;  /* Control never reaches here */    break;  /* Control never reaches here */
3113  #endif  #endif
# Line 3070  if ((options & PCRE_EXTENDED) != 0) Line 3156  if ((options & PCRE_EXTENDED) != 0)
3156      if (*ptr == CHAR_NUMBER_SIGN)      if (*ptr == CHAR_NUMBER_SIGN)
3157        {        {
3158        ptr++;        ptr++;
3159        while (*ptr != 0)        while (*ptr != CHAR_NULL)
3160          {          {
3161          if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }          if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }
3162          ptr++;          ptr++;
# Line 3089  value is a character, a negative value i Line 3175  value is a character, a negative value i
3175  if (*ptr == CHAR_BACKSLASH)  if (*ptr == CHAR_BACKSLASH)
3176    {    {
3177    int temperrorcode = 0;    int temperrorcode = 0;
3178    escape = check_escape(&ptr, &next, &temperrorcode, cd->bracount, options, FALSE);    escape = check_escape(&ptr, &next, &temperrorcode, cd->bracount, options,
3179        FALSE);
3180    if (temperrorcode != 0) return FALSE;    if (temperrorcode != 0) return FALSE;
3181    ptr++;    /* Point after the escape sequence */    ptr++;    /* Point after the escape sequence */
3182    }    }
# Line 3113  if ((options & PCRE_EXTENDED) != 0) Line 3200  if ((options & PCRE_EXTENDED) != 0)
3200      if (*ptr == CHAR_NUMBER_SIGN)      if (*ptr == CHAR_NUMBER_SIGN)
3201        {        {
3202        ptr++;        ptr++;
3203        while (*ptr != 0)        while (*ptr != CHAR_NULL)
3204          {          {
3205          if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }          if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }
3206          ptr++;          ptr++;
# Line 3134  if (*ptr == CHAR_ASTERISK || *ptr == CHA Line 3221  if (*ptr == CHAR_ASTERISK || *ptr == CHA
3221    
3222  /* If the previous item is a character, get its value. */  /* If the previous item is a character, get its value. */
3223    
3224  if (op_code == OP_CHAR || op_code == OP_CHARI ||  if (op_code == OP_CHAR || op_code == OP_CHARI ||
3225      op_code == OP_NOT || op_code == OP_NOTI)      op_code == OP_NOT || op_code == OP_NOTI)
3226    {    {
3227  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
# Line 3151  if (escape == 0) Line 3238  if (escape == 0)
3238    {    {
3239    /* For a caseless UTF match, the next character may have more than one other    /* For a caseless UTF match, the next character may have more than one other
3240    case, which maps to the special PT_CLIST property. Check this first. */    case, which maps to the special PT_CLIST property. Check this first. */
3241    
3242  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3243    if (utf && c != NOTACHAR && (options & PCRE_CASELESS) != 0)    if (utf && c != NOTACHAR && (options & PCRE_CASELESS) != 0)
3244      {      {
# Line 3167  if (escape == 0) Line 3254  if (escape == 0)
3254    
3255      /* For CHARI (caseless character) we must check the other case. If we have      /* For CHARI (caseless character) we must check the other case. If we have
3256      Unicode property support, we can use it to test the other case of      Unicode property support, we can use it to test the other case of
3257      high-valued characters. We know that next can have only one other case,      high-valued characters. We know that next can have only one other case,
3258      because multi-other-case characters are dealt with above. */      because multi-other-case characters are dealt with above. */
3259    
3260      case OP_CHARI:      case OP_CHARI:
# Line 3187  if (escape == 0) Line 3274  if (escape == 0)
3274      else      else
3275  #endif  /* SUPPORT_UTF */  #endif  /* SUPPORT_UTF */
3276      return (c != TABLE_GET(next, cd->fcc, next));  /* Not UTF */      return (c != TABLE_GET(next, cd->fcc, next));  /* Not UTF */
3277    
3278      case OP_NOT:      case OP_NOT:
3279      return c == next;      return c == next;
3280    
3281      case OP_NOTI:      case OP_NOTI:
3282      if (c == next) return TRUE;      if (c == next) return TRUE;
3283  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
# Line 3234  if (escape == 0) Line 3321  if (escape == 0)
3321      case OP_NOT_HSPACE:      case OP_NOT_HSPACE:
3322      switch(next)      switch(next)
3323        {        {
3324        HSPACE_CASES:        HSPACE_CASES:
3325        return op_code == OP_NOT_HSPACE;        return op_code == OP_NOT_HSPACE;
3326    
3327        default:        default:
# Line 3246  if (escape == 0) Line 3333  if (escape == 0)
3333      case OP_NOT_VSPACE:      case OP_NOT_VSPACE:
3334      switch(next)      switch(next)
3335        {        {
3336        VSPACE_CASES:        VSPACE_CASES:
3337        return op_code == OP_NOT_VSPACE;        return op_code == OP_NOT_VSPACE;
3338    
3339        default:        default:
# Line 3300  switch(op_code) Line 3387  switch(op_code)
3387      case ESC_H:      case ESC_H:
3388      switch(c)      switch(c)
3389        {        {
3390        HSPACE_CASES:        HSPACE_CASES:
3391        return escape != ESC_h;        return escape != ESC_h;
3392    
3393        default:        default:
3394        return escape == ESC_h;        return escape == ESC_h;
3395        }        }
# Line 3311  switch(op_code) Line 3398  switch(op_code)
3398      case ESC_V:      case ESC_V:
3399      switch(c)      switch(c)
3400        {        {
3401        VSPACE_CASES:        VSPACE_CASES:
3402        return escape != ESC_v;        return escape != ESC_v;
3403    
3404        default:        default:
# Line 3423  switch(op_code) Line 3510  switch(op_code)
3510  *************************************************/  *************************************************/
3511    
3512  /* This function packages up the logic of adding a character or range of  /* This function packages up the logic of adding a character or range of
3513  characters to a class. The character values in the arguments will be within the  characters to a class. The character values in the arguments will be within the
3514  valid values for the current mode (8-bit, 16-bit, UTF, etc). This function is  valid values for the current mode (8-bit, 16-bit, UTF, etc). This function is
3515  mutually recursive with the function immediately below.  mutually recursive with the function immediately below.
3516    
3517  Arguments:  Arguments:
3518    classbits     the bit map for characters < 256    classbits     the bit map for characters < 256
3519    uchardptr     points to the pointer for extra data    uchardptr     points to the pointer for extra data
3520    options       the options word    options       the options word
3521    cd            contains pointers to tables etc.    cd            contains pointers to tables etc.
3522    start         start of range character    start         start of range character
3523    end           end of range character    end           end of range character
3524    
3525  Returns:        the number of < 256 characters added  Returns:        the number of < 256 characters added
3526                  the pointer to extra data is updated                  the pointer to extra data is updated
3527  */  */
# Line 3446  add_to_class(pcre_uint8 *classbits, pcre Line 3533  add_to_class(pcre_uint8 *classbits, pcre
3533  pcre_uint32 c;  pcre_uint32 c;
3534  int n8 = 0;  int n8 = 0;
3535    
3536  /* If caseless matching is required, scan the range and process alternate  /* If caseless matching is required, scan the range and process alternate
3537  cases. In Unicode, there are 8-bit characters that have alternate cases that  cases. In Unicode, there are 8-bit characters that have alternate cases that
3538  are greater than 255 and vice-versa. Sometimes we can just extend the original  are greater than 255 and vice-versa. Sometimes we can just extend the original
3539  range. */  range. */
3540    
3541  if ((options & PCRE_CASELESS) != 0)  if ((options & PCRE_CASELESS) != 0)
3542    {    {
3543  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3544    if ((options & PCRE_UTF8) != 0)    if ((options & PCRE_UTF8) != 0)
3545      {      {
3546      int rc;      int rc;
3547      pcre_uint32 oc, od;      pcre_uint32 oc, od;
3548    
3549      options &= ~PCRE_CASELESS;   /* Remove for recursive calls */      options &= ~PCRE_CASELESS;   /* Remove for recursive calls */
3550      c = start;      c = start;
3551    
3552      while ((rc = get_othercase_range(&c, end, &oc, &od)) >= 0)      while ((rc = get_othercase_range(&c, end, &oc, &od)) >= 0)
3553        {        {
3554        /* Handle a single character that has more than one other case. */        /* Handle a single character that has more than one other case. */
3555    
3556        if (rc > 0) n8 += add_list_to_class(classbits, uchardptr, options, cd,        if (rc > 0) n8 += add_list_to_class(classbits, uchardptr, options, cd,
3557          PRIV(ucd_caseless_sets) + rc, oc);          PRIV(ucd_caseless_sets) + rc, oc);
3558    
3559        /* Do nothing if the other case range is within the original range. */        /* Do nothing if the other case range is within the original range. */
3560    
3561        else if (oc >= start && od <= end) continue;        else if (oc >= start && od <= end) continue;
3562    
3563        /* Extend the original range if there is overlap, noting that if oc < c, we        /* Extend the original range if there is overlap, noting that if oc < c, we
3564        can't have od > end because a subrange is always shorter than the basic        can't have od > end because a subrange is always shorter than the basic
3565        range. Otherwise, use a recursive call to add the additional range. */        range. Otherwise, use a recursive call to add the additional range. */
3566    
3567        else if (oc < start && od >= start - 1) start = oc; /* Extend downwards */        else if (oc < start && od >= start - 1) start = oc; /* Extend downwards */
3568        else if (od > end && oc <= end + 1) end = od;       /* Extend upwards */        else if (od > end && oc <= end + 1) end = od;       /* Extend upwards */
3569        else n8 += add_to_class(classbits, uchardptr, options, cd, oc, od);        else n8 += add_to_class(classbits, uchardptr, options, cd, oc, od);
# Line 3486  if ((options & PCRE_CASELESS) != 0) Line 3573  if ((options & PCRE_CASELESS) != 0)
3573  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
3574    
3575    /* Not UTF-mode, or no UCP */    /* Not UTF-mode, or no UCP */
3576    
3577    for (c = start; c <= end && c < 256; c++)    for (c = start; c <= end && c < 256; c++)
3578      {      {
3579      SETBIT(classbits, cd->fcc[c]);      SETBIT(classbits, cd->fcc[c]);
3580      n8++;      n8++;
3581      }      }
3582    }    }
3583    
3584  /* Now handle the original range. Adjust the final value according to the bit  /* Now handle the original range. Adjust the final value according to the bit
3585  length - this means that the same lists of (e.g.) horizontal spaces can be used  length - this means that the same lists of (e.g.) horizontal spaces can be used
3586  in all cases. */  in all cases. */
# Line 3519  if (end < 0x100) Line 3606  if (end < 0x100)
3606    {    {
3607    for (c = start; c <= end; c++)    for (c = start; c <= end; c++)
3608      {      {
3609      n8++;      n8++;
3610      SETBIT(classbits, c);      SETBIT(classbits, c);
3611      }      }
3612    }    }
3613    
3614  else  else
3615    {    {
3616    pcre_uchar *uchardata = *uchardptr;    pcre_uchar *uchardata = *uchardptr;
3617    
3618  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
3619    if ((options & PCRE_UTF8) != 0)  /* All UTFs use the same flag bit */    if ((options & PCRE_UTF8) != 0)  /* All UTFs use the same flag bit */
3620      {      {
3621      if (start < end)      if (start < end)
3622        {        {
3623        *uchardata++ = XCL_RANGE;        *uchardata++ = XCL_RANGE;
3624        uchardata += PRIV(ord2utf)(start, uchardata);        uchardata += PRIV(ord2utf)(start, uchardata);
3625        uchardata += PRIV(ord2utf)(end, uchardata);        uchardata += PRIV(ord2utf)(end, uchardata);
3626        }        }
3627      else if (start == end)      else if (start == end)
3628        {        {
3629        *uchardata++ = XCL_SINGLE;        *uchardata++ = XCL_SINGLE;
3630        uchardata += PRIV(ord2utf)(start, uchardata);        uchardata += PRIV(ord2utf)(start, uchardata);
3631        }        }
3632      }      }
3633    else    else
3634  #endif  /* SUPPORT_UTF */  #endif  /* SUPPORT_UTF */
3635    
3636    /* Without UTF support, character values are constrained by the bit length,    /* Without UTF support, character values are constrained by the bit length,
3637    and can only be > 256 for 16-bit and 32-bit libraries. */    and can only be > 256 for 16-bit and 32-bit libraries. */
3638    
3639  #ifdef COMPILE_PCRE8  #ifdef COMPILE_PCRE8
3640      {}      {}
3641  #else  #else
3642    if (start < end)    if (start < end)
3643      {      {
3644      *uchardata++ = XCL_RANGE;      *uchardata++ = XCL_RANGE;
# Line 3562  else Line 3649  else
3649      {      {
3650      *uchardata++ = XCL_SINGLE;      *uchardata++ = XCL_SINGLE;
3651      *uchardata++ = start;      *uchardata++ = start;
3652      }      }
3653  #endif  #endif
3654    
3655    *uchardptr = uchardata;   /* Updata extra data pointer */    *uchardptr = uchardata;   /* Updata extra data pointer */
3656    }    }
3657    
3658  return n8;    /* Number of 8-bit characters */  return n8;    /* Number of 8-bit characters */
3659  }  }
3660    
3661    
3662    
3663    
3664  /*************************************************  /*************************************************
3665  *        Add a list of characters to a class     *  *        Add a list of characters to a class     *
3666  *************************************************/  *************************************************/
3667    
3668  /* This function is used for adding a list of case-equivalent characters to a  /* This function is used for adding a list of case-equivalent characters to a
3669  class, and also for adding a list of horizontal or vertical whitespace. If the  class, and also for adding a list of horizontal or vertical whitespace. If the
3670  list is in order (which it should be), ranges of characters are detected and  list is in order (which it should be), ranges of characters are detected and
3671  handled appropriately. This function is mutually recursive with the function  handled appropriately. This function is mutually recursive with the function
# Line 3588  Arguments: Line 3675  Arguments:
3675    classbits     the bit map for characters < 256    classbits     the bit map for characters < 256
3676    uchardptr     points to the pointer for extra data    uchardptr     points to the pointer for extra data
3677    options       the options word    options       the options word
3678    cd            contains pointers to tables etc.    cd            contains pointers to tables etc.
3679    p             points to row of 32-bit values, terminated by NOTACHAR    p             points to row of 32-bit values, terminated by NOTACHAR
3680    except        character to omit; this is used when adding lists of    except        character to omit; this is used when adding lists of
3681                    case-equivalent characters to avoid including the one we                    case-equivalent characters to avoid including the one we
3682                    already know about                    already know about
3683    
3684  Returns:        the number of < 256 characters added  Returns:        the number of < 256 characters added
3685                  the pointer to extra data is updated                  the pointer to extra data is updated
3686  */  */
# Line 3607  while (p[0] < NOTACHAR) Line 3694  while (p[0] < NOTACHAR)
3694    {    {
3695    int n = 0;    int n = 0;
3696    if (p[0] != except)    if (p[0] != except)
3697      {      {
3698      while(p[n+1] == p[0] + n + 1) n++;      while(p[n+1] == p[0] + n + 1) n++;
3699      n8 += add_to_class(classbits, uchardptr, options, cd, p[0], p[n]);      n8 += add_to_class(classbits, uchardptr, options, cd, p[0], p[n]);
3700      }      }
3701    p += n + 1;    p += n + 1;
3702    }    }
3703  return n8;  return n8;
3704  }  }
3705    
3706    
3707    
# Line 3629  Arguments: Line 3716  Arguments:
3716    classbits     the bit map for characters < 256    classbits     the bit map for characters < 256
3717    uchardptr     points to the pointer for extra data    uchardptr     points to the pointer for extra data
3718    options       the options word    options       the options word
3719    cd            contains pointers to tables etc.    cd            contains pointers to tables etc.
3720    p             points to row of 32-bit values, terminated by NOTACHAR    p             points to row of 32-bit values, terminated by NOTACHAR
3721    
3722  Returns:        the number of < 256 characters added  Returns:        the number of < 256 characters added
3723                  the pointer to extra data is updated                  the pointer to extra data is updated
3724  */  */
3725    
3726  static int  static int
3727  add_not_list_to_class(pcre_uint8 *classbits, pcre_uchar **uchardptr,  add_not_list_to_class(pcre_uint8 *classbits, pcre_uchar **uchardptr,
3728    int options, compile_data *cd, const pcre_uint32 *p)    int options, compile_data *cd, const pcre_uint32 *p)
3729  {  {
3730  BOOL utf = (options & PCRE_UTF8) != 0;  BOOL utf = (options & PCRE_UTF8) != 0;
# Line 3649  while (p[0] < NOTACHAR) Line 3736  while (p[0] < NOTACHAR)
3736    while (p[1] == p[0] + 1) p++;    while (p[1] == p[0] + 1) p++;
3737    n8 += add_to_class(classbits, uchardptr, options, cd, p[0] + 1,    n8 += add_to_class(classbits, uchardptr, options, cd, p[0] + 1,
3738      (p[1] == NOTACHAR) ? (utf ? 0x10ffffu : 0xffffffffu) : p[1] - 1);      (p[1] == NOTACHAR) ? (utf ? 0x10ffffu : 0xffffffffu) : p[1] - 1);
3739    p++;    p++;
3740    }    }
3741  return n8;  return n8;
3742  }  }
3743    
3744    
3745    
# Line 3736  BOOL utf = FALSE; Line 3823  BOOL utf = FALSE;
3823  #endif  #endif
3824    
3825  /* Helper variables for OP_XCLASS opcode (for characters > 255). We define  /* Helper variables for OP_XCLASS opcode (for characters > 255). We define
3826  class_uchardata always so that it can be passed to add_to_class() always,  class_uchardata always so that it can be passed to add_to_class() always,
3827  though it will not be used in non-UTF 8-bit cases. This avoids having to supply  though it will not be used in non-UTF 8-bit cases. This avoids having to supply
3828  alternative calls for the different cases. */  alternative calls for the different cases. */
3829    
3830  pcre_uchar *class_uchardata;  pcre_uchar *class_uchardata;
# Line 3796  for (;; ptr++) Line 3883  for (;; ptr++)
3883    pcre_int32 subreqcharflags, subfirstcharflags;    pcre_int32 subreqcharflags, subfirstcharflags;
3884    int terminator;    int terminator;
3885    unsigned int mclength;    unsigned int mclength;
3886    int tempbracount;    unsigned int tempbracount;
3887    pcre_uint32 ec;    pcre_uint32 ec;
3888    pcre_uchar mcbuffer[8];    pcre_uchar mcbuffer[8];
3889    
# Line 3807  for (;; ptr++) Line 3894  for (;; ptr++)
3894    /* If we are at the end of a nested substitution, revert to the outer level    /* If we are at the end of a nested substitution, revert to the outer level
3895    string. Nesting only happens one level deep. */    string. Nesting only happens one level deep. */
3896    
3897    if (c == 0 && nestptr != NULL)    if (c == CHAR_NULL && nestptr != NULL)
3898      {      {
3899      ptr = nestptr;      ptr = nestptr;
3900      nestptr = NULL;      nestptr = NULL;
# Line 3882  for (;; ptr++) Line 3969  for (;; ptr++)
3969    
3970    /* If in \Q...\E, check for the end; if not, we have a literal */    /* If in \Q...\E, check for the end; if not, we have a literal */
3971    
3972    if (inescq && c != 0)    if (inescq && c != CHAR_NULL)
3973      {      {
3974      if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E)      if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E)
3975        {        {
# Line 3930  for (;; ptr++) Line 4017  for (;; ptr++)
4017      if (c == CHAR_NUMBER_SIGN)      if (c == CHAR_NUMBER_SIGN)
4018        {        {
4019        ptr++;        ptr++;
4020        while (*ptr != 0)        while (*ptr != CHAR_NULL)
4021          {          {
4022          if (IS_NEWLINE(ptr)) { ptr += cd->nllen - 1; break; }          if (IS_NEWLINE(ptr)) { ptr += cd->nllen - 1; break; }
4023          ptr++;          ptr++;
# Line 3938  for (;; ptr++) Line 4025  for (;; ptr++)
4025          if (utf) FORWARDCHAR(ptr);          if (utf) FORWARDCHAR(ptr);
4026  #endif  #endif
4027          }          }
4028        if (*ptr != 0) continue;        if (*ptr != CHAR_NULL) continue;
4029    
4030        /* Else fall through to handle end of string */        /* Else fall through to handle end of string */
4031        c = 0;        c = 0;
# Line 4116  for (;; ptr++) Line 4203  for (;; ptr++)
4203      means that an initial ] is taken as a data character. At the start of the      means that an initial ] is taken as a data character. At the start of the
4204      loop, c contains the first byte of the character. */      loop, c contains the first byte of the character. */
4205    
4206      if (c != 0) do      if (c != CHAR_NULL) do
4207        {        {
4208        const pcre_uchar *oldptr;        const pcre_uchar *oldptr;
4209    
# Line 4131  for (;; ptr++) Line 4218  for (;; ptr++)
4218        /* In the pre-compile phase, accumulate the length of any extra        /* In the pre-compile phase, accumulate the length of any extra
4219        data and reset the pointer. This is so that very large classes that        data and reset the pointer. This is so that very large classes that
4220        contain a zillion > 255 characters no longer overwrite the work space        contain a zillion > 255 characters no longer overwrite the work space
4221        (which is on the stack). We have to remember that there was XCLASS data,        (which is on the stack). We have to remember that there was XCLASS data,
4222        however. */        however. */
4223    
4224        if (lengthptr != NULL && class_uchardata > class_uchardata_base)        if (lengthptr != NULL && class_uchardata > class_uchardata_base)
# Line 4195  for (;; ptr++) Line 4282  for (;; ptr++)
4282          alpha. This relies on the fact that the class table starts with          alpha. This relies on the fact that the class table starts with
4283          alpha, lower, upper as the first 3 entries. */          alpha, lower, upper as the first 3 entries. */
4284    
4285          if ((options & PCRE_CASELESS) != 0 && posix_class <= 2)          if ((options & PCRE_CASELESS) != 0 && posix_class <= 2)
4286            posix_class = 0;            posix_class = 0;
4287    
4288          /* When PCRE_UCP is set, some of the POSIX classes are converted to          /* When PCRE_UCP is set, some of the POSIX classes are converted to
# Line 4272  for (;; ptr++) Line 4359  for (;; ptr++)
4359    
4360        if (c == CHAR_BACKSLASH)        if (c == CHAR_BACKSLASH)
4361          {          {
4362          escape = check_escape(&ptr, &ec, errorcodeptr, cd->bracount, options, TRUE);          escape = check_escape(&ptr, &ec, errorcodeptr, cd->bracount, options,
4363              TRUE);
4364          if (*errorcodeptr != 0) goto FAILED;          if (*errorcodeptr != 0) goto FAILED;
4365            if (escape == 0) c = ec;
         if (escape == 0)  
           c = ec;  
4366          else if (escape == ESC_b) c = CHAR_BS; /* \b is backspace in a class */          else if (escape == ESC_b) c = CHAR_BS; /* \b is backspace in a class */
4367          else if (escape == ESC_N)            /* \N is not supported in a class */          else if (escape == ESC_N)          /* \N is not supported in a class */
4368            {            {
4369            *errorcodeptr = ERR71;            *errorcodeptr = ERR71;
4370            goto FAILED;            goto FAILED;
# Line 4351  for (;; ptr++) Line 4436  for (;; ptr++)
4436              for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_space];              for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_space];
4437              classbits[1] |= 0x08;    /* Perl 5.004 onwards omits VT from \s */              classbits[1] |= 0x08;    /* Perl 5.004 onwards omits VT from \s */
4438              continue;              continue;
4439    
4440              /* The rest apply in both UCP and non-UCP cases. */              /* The rest apply in both UCP and non-UCP cases. */
4441    
4442              case ESC_h:              case ESC_h:
4443              (void)add_list_to_class(classbits, &class_uchardata, options, cd,              (void)add_list_to_class(classbits, &class_uchardata, options, cd,
4444                PRIV(hspace_list), NOTACHAR);                PRIV(hspace_list), NOTACHAR);
4445              continue;              continue;
4446    
4447              case ESC_H:              case ESC_H:
4448              (void)add_not_list_to_class(classbits, &class_uchardata, options,              (void)add_not_list_to_class(classbits, &class_uchardata, options,
4449                cd, PRIV(hspace_list));                cd, PRIV(hspace_list));
4450              continue;              continue;
4451    
4452              case ESC_v:              case ESC_v:
4453              (void)add_list_to_class(classbits, &class_uchardata, options, cd,              (void)add_list_to_class(classbits, &class_uchardata, options, cd,
4454                PRIV(vspace_list), NOTACHAR);                PRIV(vspace_list), NOTACHAR);
4455              continue;              continue;
4456    
4457              case ESC_V:              case ESC_V:
4458              (void)add_not_list_to_class(classbits, &class_uchardata, options,              (void)add_not_list_to_class(classbits, &class_uchardata, options,
4459                cd, PRIV(vspace_list));                cd, PRIV(vspace_list));
4460              continue;              continue;
4461    
4462  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 4409  for (;; ptr++) Line 4494  for (;; ptr++)
4494    
4495          /* Fall through if the escape just defined a single character (c >= 0).          /* Fall through if the escape just defined a single character (c >= 0).
4496          This may be greater than 256. */          This may be greater than 256. */
4497    
4498          escape = 0;          escape = 0;
4499    
4500          }   /* End of backslash handling */          }   /* End of backslash handling */
# Line 4450  for (;; ptr++) Line 4535  for (;; ptr++)
4535            inescq = TRUE;            inescq = TRUE;
4536            break;            break;
4537            }            }
4538    
4539          /* Minus (hyphen) at the end of a class is treated as a literal, so put          /* Minus (hyphen) at the end of a class is treated as a literal, so put
4540          back the pointer and jump to handle the character that preceded it. */          back the pointer and jump to handle the character that preceded it. */
4541    
4542          if (*ptr == 0 || (!inescq && *ptr == CHAR_RIGHT_SQUARE_BRACKET))          if (*ptr == CHAR_NULL || (!inescq && *ptr == CHAR_RIGHT_SQUARE_BRACKET))
4543            {            {
4544            ptr = oldptr;            ptr = oldptr;
4545            goto CLASS_SINGLE_CHARACTER;            goto CLASS_SINGLE_CHARACTER;
4546            }            }
4547    
4548          /* Otherwise, we have a potential range; pick up the next character */          /* Otherwise, we have a potential range; pick up the next character */
4549    
4550  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
# Line 4506  for (;; ptr++) Line 4591  for (;; ptr++)
4591          /* We have found a character range, so single character optimizations          /* We have found a character range, so single character optimizations
4592          cannot be done anymore. Any value greater than 1 indicates that there          cannot be done anymore. Any value greater than 1 indicates that there
4593          is more than one character. */          is more than one character. */
4594    
4595          class_one_char = 2;          class_one_char = 2;
4596    
4597          /* Remember an explicit \r or \n, and add the range to the class. */          /* Remember an explicit \r or \n, and add the range to the class. */
4598    
4599          if (d == CHAR_CR || d == CHAR_NL) cd->external_flags |= PCRE_HASCRORLF;          if (d == CHAR_CR || d == CHAR_NL) cd->external_flags |= PCRE_HASCRORLF;
4600    
4601          class_has_8bitchar +=          class_has_8bitchar +=
4602            add_to_class(classbits, &class_uchardata, options, cd, c, d);            add_to_class(classbits, &class_uchardata, options, cd, c, d);
4603    
4604          continue;   /* Go get the next char in the class */          continue;   /* Go get the next char in the class */
4605          }          }
4606    
4607        /* Handle a single character - we can get here for a normal non-escape        /* Handle a single character - we can get here for a normal non-escape
4608        char, or after \ that introduces a single character or for an apparent        char, or after \ that introduces a single character or for an apparent
4609        range that isn't. Only the value 1 matters for class_one_char, so don't        range that isn't. Only the value 1 matters for class_one_char, so don't
4610        increase it if it is already 2 or more ... just in case there's a class        increase it if it is already 2 or more ... just in case there's a class
4611        with a zillion characters in it. */        with a zillion characters in it. */
4612    
4613        CLASS_SINGLE_CHARACTER:        CLASS_SINGLE_CHARACTER:
# Line 4545  for (;; ptr++) Line 4630  for (;; ptr++)
4630    
4631          if (negate_class)          if (negate_class)
4632            {            {
4633  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
4634            int d;            int d;
4635  #endif  #endif
4636            if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;            if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;
4637            zerofirstchar = firstchar;            zerofirstchar = firstchar;
4638            zerofirstcharflags = firstcharflags;            zerofirstcharflags = firstcharflags;
# Line 4555  for (;; ptr++) Line 4640  for (;; ptr++)
4640            /* For caseless UTF-8 mode when UCP support is available, check            /* For caseless UTF-8 mode when UCP support is available, check
4641            whether this character has more than one other case. If so, generate            whether this character has more than one other case. If so, generate
4642            a special OP_NOTPROP item instead of OP_NOTI. */            a special OP_NOTPROP item instead of OP_NOTI. */
4643    
4644  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
4645            if (utf && (options & PCRE_CASELESS) != 0 &&            if (utf && (options & PCRE_CASELESS) != 0 &&
4646                (d = UCD_CASESET(c)) != 0)                (d = UCD_CASESET(c)) != 0)
4647              {              {
4648              *code++ = OP_NOTPROP;              *code++ = OP_NOTPROP;
4649              *code++ = PT_CLIST;              *code++ = PT_CLIST;
4650              *code++ = d;              *code++ = d;
4651              }              }
4652            else            else
4653  #endif  #endif
4654            /* Char has only one other case, or UCP not available */            /* Char has only one other case, or UCP not available */
4655    
# Line 4577  for (;; ptr++) Line 4662  for (;; ptr++)
4662  #endif  #endif
4663                *code++ = c;                *code++ = c;
4664              }              }
4665    
4666            /* We are finished with this character class */            /* We are finished with this character class */
4667    
4668            goto END_CLASS;            goto END_CLASS;
4669            }            }
4670    
# Line 4597  for (;; ptr++) Line 4682  for (;; ptr++)
4682            }            }
4683          goto ONE_CHAR;          goto ONE_CHAR;
4684          }       /* End of 1-char optimization */          }       /* End of 1-char optimization */
4685    
4686        /* There is more than one character in the class, or an XCLASS item        /* There is more than one character in the class, or an XCLASS item
4687        has been generated. Add this character to the class. */        has been generated. Add this character to the class. */
4688    
4689        class_has_8bitchar +=        class_has_8bitchar +=
4690          add_to_class(classbits, &class_uchardata, options, cd, c, c);          add_to_class(classbits, &class_uchardata, options, cd, c, c);
4691        }        }
4692    
# Line 4609  for (;; ptr++) Line 4694  for (;; ptr++)
4694      If we are at the end of an internal nested string, revert to the outer      If we are at the end of an internal nested string, revert to the outer
4695      string. */      string. */
4696    
4697      while (((c = *(++ptr)) != 0 ||      while (((c = *(++ptr)) != CHAR_NULL ||
4698             (nestptr != NULL &&             (nestptr != NULL &&
4699               (ptr = nestptr, nestptr = NULL, c = *(++ptr)) != 0)) &&               (ptr = nestptr, nestptr = NULL, c = *(++ptr)) != CHAR_NULL)) &&
4700             (c != CHAR_RIGHT_SQUARE_BRACKET || inescq));             (c != CHAR_RIGHT_SQUARE_BRACKET || inescq));
4701    
4702      /* Check for missing terminating ']' */      /* Check for missing terminating ']' */
4703    
4704      if (c == 0)      if (c == CHAR_NULL)
4705        {        {
4706        *errorcodeptr = ERR6;        *errorcodeptr = ERR6;
4707        goto FAILED;        goto FAILED;
4708        }        }
4709    
4710      /* We will need an XCLASS if data has been placed in class_uchardata. In      /* We will need an XCLASS if data has been placed in class_uchardata. In
4711      the second phase this is a sufficient test. However, in the pre-compile      the second phase this is a sufficient test. However, in the pre-compile
4712      phase, class_uchardata gets emptied to prevent workspace overflow, so it      phase, class_uchardata gets emptied to prevent workspace overflow, so it
4713      only if the very last character in the class needs XCLASS will it contain      only if the very last character in the class needs XCLASS will it contain
4714      anything at this point. For this reason, xclass gets set TRUE above when      anything at this point. For this reason, xclass gets set TRUE above when
4715      uchar_classdata is emptied, and that's why this code is the way it is here      uchar_classdata is emptied, and that's why this code is the way it is here
4716      instead of just doing a test on class_uchardata below. */      instead of just doing a test on class_uchardata below. */
4717    
4718  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4719      if (class_uchardata > class_uchardata_base) xclass = TRUE;      if (class_uchardata > class_uchardata_base) xclass = TRUE;
4720  #endif  #endif
# Line 4698  for (;; ptr++) Line 4783  for (;; ptr++)
4783        memcpy(code, classbits, 32);        memcpy(code, classbits, 32);
4784        }        }
4785      code += 32 / sizeof(pcre_uchar);      code += 32 / sizeof(pcre_uchar);
4786    
4787      END_CLASS:      END_CLASS:
4788      break;      break;
4789    
# Line 4902  for (;; ptr++) Line 4987  for (;; ptr++)
4987    
4988        if (repeat_max == 0) goto END_REPEAT;        if (repeat_max == 0) goto END_REPEAT;
4989    
       /*--------------------------------------------------------------------*/  
       /* This code is obsolete from release 8.00; the restriction was finally  
       removed: */  
   
       /* All real repeats make it impossible to handle partial matching (maybe  
       one day we will be able to remove this restriction). */  
   
       /* if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL; */  
       /*--------------------------------------------------------------------*/  
   
4990        /* Combine the op_type with the repeat_type */        /* Combine the op_type with the repeat_type */
4991    
4992        repeat_type += op_type;        repeat_type += op_type;
# Line 5058  for (;; ptr++) Line 5133  for (;; ptr++)
5133          goto END_REPEAT;          goto END_REPEAT;
5134          }          }
5135    
       /*--------------------------------------------------------------------*/  
       /* This code is obsolete from release 8.00; the restriction was finally  
       removed: */  
   
       /* All real repeats make it impossible to handle partial matching (maybe  
       one day we will be able to remove this restriction). */  
   
       /* if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL; */  
       /*--------------------------------------------------------------------*/  
   
5136        if (repeat_min == 0 && repeat_max == -1)        if (repeat_min == 0 && repeat_max == -1)
5137          *code++ = OP_CRSTAR + repeat_type;          *code++ = OP_CRSTAR + repeat_type;
5138        else if (repeat_min == 1 && repeat_max == -1)        else if (repeat_min == 1 && repeat_max == -1)
# Line 5410  for (;; ptr++) Line 5475  for (;; ptr++)
5475              pcre_uchar *scode = bracode;              pcre_uchar *scode = bracode;
5476              do              do
5477                {                {
5478                if (could_be_empty_branch(scode, ketcode, utf, cd))                if (could_be_empty_branch(scode, ketcode, utf, cd, NULL))
5479                  {                  {
5480                  *bracode += OP_SBRA - OP_BRA;                  *bracode += OP_SBRA - OP_BRA;
5481                  break;                  break;
# Line 5606  for (;; ptr++) Line 5671  for (;; ptr++)
5671        if (*ptr == CHAR_COLON)        if (*ptr == CHAR_COLON)
5672          {          {
5673          arg = ++ptr;          arg = ++ptr;
5674          while (*ptr != 0 && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;          while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
5675          arglen = (int)(ptr - arg);          arglen = (int)(ptr - arg);
5676          if ((unsigned int)arglen > MAX_MARK)          if ((unsigned int)arglen > MAX_MARK)
5677            {            {
# Line 5720  for (;; ptr++) Line 5785  for (;; ptr++)
5785          {          {
5786          case CHAR_NUMBER_SIGN:                 /* Comment; skip to ket */          case CHAR_NUMBER_SIGN:                 /* Comment; skip to ket */
5787          ptr++;          ptr++;
5788          while (*ptr != 0 && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;          while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
5789          if (*ptr == 0)          if (*ptr == CHAR_NULL)
5790            {            {
5791            *errorcodeptr = ERR18;            *errorcodeptr = ERR18;
5792            goto FAILED;            goto FAILED;
# Line 5744  for (;; ptr++) Line 5809  for (;; ptr++)
5809          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
5810          case CHAR_LEFT_PARENTHESIS:          case CHAR_LEFT_PARENTHESIS:
5811          bravalue = OP_COND;       /* Conditional group */          bravalue = OP_COND;       /* Conditional group */
5812            tempptr = ptr;
5813    
5814          /* A condition can be an assertion, a number (referring to a numbered          /* A condition can be an assertion, a number (referring to a numbered
5815          group), a name (referring to a named group), or 'R', referring to          group), a name (referring to a named group), or 'R', referring to
# Line 5756  for (;; ptr++) Line 5822  for (;; ptr++)
5822          be the recursive thing or the name 'R' (and similarly for 'R' followed          be the recursive thing or the name 'R' (and similarly for 'R' followed
5823          by digits), and (b) a number could be a name that consists of digits.          by digits), and (b) a number could be a name that consists of digits.
5824          In both cases, we look for a name first; if not found, we try the other          In both cases, we look for a name first; if not found, we try the other
5825          cases. */          cases.
5826    
5827            For compatibility with auto-callouts, we allow a callout to be
5828            specified before a condition that is an assertion. First, check for the
5829            syntax of a callout; if found, adjust the temporary pointer that is
5830            used to check for an assertion condition. That's all that is needed! */
5831    
5832            if (ptr[1] == CHAR_QUESTION_MARK && ptr[2] == CHAR_C)
5833              {
5834              for (i = 3;; i++) if (!IS_DIGIT(ptr[i])) break;
5835              if (ptr[i] == CHAR_RIGHT_PARENTHESIS)
5836                tempptr += i + 1;
5837              }
5838    
5839          /* For conditions that are assertions, check the syntax, and then exit          /* For conditions that are assertions, check the syntax, and then exit
5840          the switch. This will take control down to where bracketed groups,          the switch. This will take control down to where bracketed groups,
5841          including assertions, are processed. */          including assertions, are processed. */
5842    
5843          if (ptr[1] == CHAR_QUESTION_MARK && (ptr[2] == CHAR_EQUALS_SIGN ||          if (tempptr[1] == CHAR_QUESTION_MARK &&
5844              ptr[2] == CHAR_EXCLAMATION_MARK || ptr[2] == CHAR_LESS_THAN_SIGN))                (tempptr[2] == CHAR_EQUALS_SIGN ||
5845                   tempptr[2] == CHAR_EXCLAMATION_MARK ||
5846                   tempptr[2] == CHAR_LESS_THAN_SIGN))
5847            break;            break;
5848    
5849          /* Most other conditions use OP_CREF (a couple change to OP_RREF          /* Most other conditions use OP_CREF (a couple change to OP_RREF
# Line 5797  for (;; ptr++) Line 5877  for (;; ptr++)
5877            }            }
5878          else          else
5879            {            {
5880            terminator = 0;            terminator = CHAR_NULL;
5881            if (ptr[1] == CHAR_MINUS || ptr[1] == CHAR_PLUS) refsign = *(++ptr);            if (ptr[1] == CHAR_MINUS || ptr[1] == CHAR_PLUS) refsign = *(++ptr);
5882            }            }
5883    
# Line 5887  for (;; ptr++) Line 5967  for (;; ptr++)
5967            code[1+LINK_SIZE]++;            code[1+LINK_SIZE]++;
5968            }            }
5969    
5970          /* If terminator == 0 it means that the name followed directly after          /* If terminator == CHAR_NULL it means that the name followed directly
5971          the opening parenthesis [e.g. (?(abc)...] and in this case there are          after the opening parenthesis [e.g. (?(abc)...] and in this case there
5972          some further alternatives to try. For the cases where terminator != 0          are some further alternatives to try. For the cases where terminator !=
5973          [things like (?(<name>... or (?('name')... or (?(R&name)... ] we have          0 [things like (?(<name>... or (?('name')... or (?(R&name)... ] we have
5974          now checked all the possibilities, so give an error. */          now checked all the possibilities, so give an error. */
5975    
5976          else if (terminator != 0)          else if (terminator != CHAR_NULL)
5977            {            {
5978            *errorcodeptr = ERR15;            *errorcodeptr = ERR15;
5979            goto FAILED;            goto FAILED;
# Line 6698  for (;; ptr++) Line 6778  for (;; ptr++)
6778        /* If the subpattern set a required byte (or set a first byte that isn't        /* If the subpattern set a required byte (or set a first byte that isn't
6779        really the first byte - see above), set it. */        really the first byte - see above), set it. */
6780    
6781        if (subreqcharflags >= 0)        if (subreqcharflags >= 0)
6782          {          {
6783          reqchar = subreqchar;          reqchar = subreqchar;
6784          reqcharflags = subreqcharflags;          reqcharflags = subreqcharflags;
# Line 6733  for (;; ptr++) Line 6813  for (;; ptr++)
6813      case CHAR_BACKSLASH:      case CHAR_BACKSLASH:
6814      tempptr = ptr;      tempptr = ptr;
6815      escape = check_escape(&ptr, &ec, errorcodeptr, cd->bracount, options, FALSE);      escape = check_escape(&ptr, &ec, errorcodeptr, cd->bracount, options, FALSE);
   
6816      if (*errorcodeptr != 0) goto FAILED;      if (*errorcodeptr != 0) goto FAILED;
6817    
6818      if (escape == 0)      if (escape == 0)                  /* The escape coded a single character */
6819        c = ec;        c = ec;
6820      else      else
6821        {        {
# Line 6790  for (;; ptr++) Line 6869  for (;; ptr++)
6869          if (ptr[1] != CHAR_PLUS && ptr[1] != CHAR_MINUS)          if (ptr[1] != CHAR_PLUS && ptr[1] != CHAR_MINUS)
6870            {            {
6871            BOOL is_a_number = TRUE;            BOOL is_a_number = TRUE;
6872            for (p = ptr + 1; *p != 0 && *p != (pcre_uchar)terminator; p++)            for (p = ptr + 1; *p != CHAR_NULL && *p != (pcre_uchar)terminator; p++)
6873              {              {
6874              if (!MAX_255(*p)) { is_a_number = FALSE; break; }              if (!MAX_255(*p)) { is_a_number = FALSE; break; }
6875              if ((cd->ctypes[*p] & ctype_digit) == 0) is_a_number = FALSE;              if ((cd->ctypes[*p] & ctype_digit) == 0) is_a_number = FALSE;
# Line 6902  for (;; ptr++) Line 6981  for (;; ptr++)
6981        can obtain the OP value by negating the escape value in the default        can obtain the OP value by negating the escape value in the default
6982        situation when PCRE_UCP is not set. When it *is* set, we substitute        situation when PCRE_UCP is not set. When it *is* set, we substitute
6983        Unicode property tests. Note that \b and \B do a one-character        Unicode property tests. Note that \b and \B do a one-character
6984        lookbehind. */        lookbehind, and \A also behaves as if it does. */
6985    
6986        else        else
6987          {          {
6988          if ((escape == ESC_b || escape == ESC_B) && cd->max_lookbehind == 0)          if ((escape == ESC_b || escape == ESC_B || escape == ESC_A) &&
6989                 cd->max_lookbehind == 0)
6990            cd->max_lookbehind = 1;            cd->max_lookbehind = 1;
6991  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
6992          if (escape >= ESC_DU && escape <= ESC_wu)          if (escape >= ESC_DU && escape <= ESC_wu)
# Line 6964  for (;; ptr++) Line 7044  for (;; ptr++)
7044    
7045      ONE_CHAR:      ONE_CHAR:
7046      previous = code;      previous = code;
7047    
7048      /* For caseless UTF-8 mode when UCP support is available, check whether      /* For caseless UTF-8 mode when UCP support is available, check whether
7049      this character has more than one other case. If so, generate a special      this character has more than one other case. If so, generate a special
7050      OP_PROP item instead of OP_CHARI. */      OP_PROP item instead of OP_CHARI. */
7051    
7052  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
7053      if (utf && (options & PCRE_CASELESS) != 0)      if (utf && (options & PCRE_CASELESS) != 0)
7054        {        {
# Line 6978  for (;; ptr++) Line 7058  for (;; ptr++)
7058          *code++ = OP_PROP;          *code++ = OP_PROP;
7059          *code++ = PT_CLIST;          *code++ = PT_CLIST;
7060          *code++ = c;          *code++ = c;
7061          if (firstcharflags == REQ_UNSET) firstcharflags = zerofirstcharflags = REQ_NONE;          if (firstcharflags == REQ_UNSET) firstcharflags = zerofirstcharflags = REQ_NONE;
7062          break;          break;
7063          }          }
7064        }        }
7065  #endif  #endif
7066    
7067      /* Caseful matches, or not one of the multicase characters. */      /* Caseful matches, or not one of the multicase characters. */
7068    
7069      *code++ = ((options & PCRE_CASELESS) != 0)? OP_CHARI : OP_CHAR;      *code++ = ((options & PCRE_CASELESS) != 0)? OP_CHARI : OP_CHAR;
7070      for (c = 0; c < mclength; c++) *code++ = mcbuffer[c];      for (c = 0; c < mclength; c++) *code++ = mcbuffer[c];
7071    
# Line 7107  pcre_int32 firstcharflags, reqcharflags; Line 7187  pcre_int32 firstcharflags, reqcharflags;
7187  pcre_uint32 branchfirstchar, branchreqchar;  pcre_uint32 branchfirstchar, branchreqchar;
7188  pcre_int32 branchfirstcharflags, branchreqcharflags;  pcre_int32 branchfirstcharflags, branchreqcharflags;
7189  int length;  int length;
7190  int orig_bracount;  unsigned int orig_bracount;
7191  int max_bracount;  unsigned int max_bracount;
7192  branch_chain bc;  branch_chain bc;
7193    
7194  bc.outer = bcptr;  bc.outer = bcptr;
# Line 7175  for (;;) Line 7255  for (;;)
7255    into the length. */    into the length. */
7256    
7257    if (!compile_branch(&options, &code, &ptr, errorcodeptr, &branchfirstchar,    if (!compile_branch(&options, &code, &ptr, errorcodeptr, &branchfirstchar,
7258          &branchfirstcharflags, &branchreqchar, &branchreqcharflags, &bc,          &branchfirstcharflags, &branchreqchar, &branchreqcharflags, &bc,
7259          cond_depth, cd, (lengthptr == NULL)? NULL : &length))          cond_depth, cd, (lengthptr == NULL)? NULL : &length))
7260      {      {
7261      *ptrptr = ptr;      *ptrptr = ptr;
# Line 7213  for (;;) Line 7293  for (;;)
7293        we have to abandon the firstchar for the regex, but if there was        we have to abandon the firstchar for the regex, but if there was
7294        previously no reqchar, it takes on the value of the old firstchar. */        previously no reqchar, it takes on the value of the old firstchar. */
7295    
7296        if (firstcharflags >= 0 &&        if (firstcharflags >= 0 &&
7297            (firstcharflags != branchfirstcharflags || firstchar != branchfirstchar))            (firstcharflags != branchfirstcharflags || firstchar != branchfirstchar))
7298          {          {
7299          if (reqcharflags < 0)          if (reqcharflags < 0)
7300            {            {
7301            reqchar = firstchar;            reqchar = firstchar;
7302            reqcharflags = firstcharflags;            reqcharflags = firstcharflags;
# Line 7575  do { Line 7655  do {
7655       {       {
7656       if (!is_startline(scode, bracket_map, cd, atomcount)) return FALSE;       if (!is_startline(scode, bracket_map, cd, atomcount)) return FALSE;
7657       }       }
7658    
7659     /* Atomic brackets */     /* Atomic brackets */
7660    
7661     else if (op == OP_ONCE || op == OP_ONCE_NC)     else if (op == OP_ONCE || op == OP_ONCE_NC)
# Line 7770  pcre32_compile2(PCRE_SPTR32 pattern, int Line 7850  pcre32_compile2(PCRE_SPTR32 pattern, int
7850  {  {
7851  REAL_PCRE *re;  REAL_PCRE *re;
7852  int length = 1;  /* For final END opcode */  int length = 1;  /* For final END opcode */
 pcre_uint32 firstchar, reqchar;  
7853  pcre_int32 firstcharflags, reqcharflags;  pcre_int32 firstcharflags, reqcharflags;
7854    pcre_uint32 firstchar, reqchar;
7855    pcre_uint32 limit_match = PCRE_UINT32_MAX;
7856    pcre_uint32 limit_recursion = PCRE_UINT32_MAX;
7857  int newline;  int newline;
7858  int errorcode = 0;  int errorcode = 0;
7859  int skipatstart = 0;  int skipatstart = 0;
7860  BOOL utf;  BOOL utf;
7861    BOOL never_utf = FALSE;
7862  size_t size;  size_t size;
7863  pcre_uchar *code;  pcre_uchar *code;
7864  const pcre_uchar *codestart;  const pcre_uchar *codestart;
# Line 7835  if ((options & ~PUBLIC_COMPILE_OPTIONS) Line 7918  if ((options & ~PUBLIC_COMPILE_OPTIONS)
7918    goto PCRE_EARLY_ERROR_RETURN;    goto PCRE_EARLY_ERROR_RETURN;
7919    }    }
7920    
7921    /* If PCRE_NEVER_UTF is set, remember it. */
7922    
7923    if ((options & PCRE_NEVER_UTF) != 0) never_utf = TRUE;
7924    
7925  /* Check for global one-time settings at the start of the pattern, and remember  /* Check for global one-time settings at the start of the pattern, and remember
7926  the offset for later. */  the offset for later. */
7927    
7928    cd->external_flags = 0;   /* Initialize here for LIMIT_MATCH/RECURSION */
7929    
7930  while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&  while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&
7931         ptr[skipatstart+1] == CHAR_ASTERISK)         ptr[skipatstart+1] == CHAR_ASTERISK)
7932    {    {
7933    int newnl = 0;    int newnl = 0;
7934    int newbsr = 0;    int newbsr = 0;
7935    
7936    /* For completeness and backward compatibility, (*UTFn) is supported in the
7937    relevant libraries, but (*UTF) is generic and always supported. Note that
7938    PCRE_UTF8 == PCRE_UTF16 == PCRE_UTF32. */
7939    
7940  #ifdef COMPILE_PCRE8  #ifdef COMPILE_PCRE8
7941    if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF_RIGHTPAR, 5) == 0)    if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF8_RIGHTPAR, 5) == 0)
7942      { skipatstart += 7; options |= PCRE_UTF8; continue; }      { skipatstart += 7; options |= PCRE_UTF8; continue; }
7943  #endif  #endif
7944  #ifdef COMPILE_PCRE16  #ifdef COMPILE_PCRE16
7945    if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF_RIGHTPAR, 6) == 0)    if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF16_RIGHTPAR, 6) == 0)
7946      { skipatstart += 8; options |= PCRE_UTF16; continue; }      { skipatstart += 8; options |= PCRE_UTF16; continue; }
7947  #endif  #endif
7948  #ifdef COMPILE_PCRE32  #ifdef COMPILE_PCRE32
7949    if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF_RIGHTPAR, 6) == 0)    if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF32_RIGHTPAR, 6) == 0)
7950      { skipatstart += 8; options |= PCRE_UTF32; continue; }      { skipatstart += 8; options |= PCRE_UTF32; continue; }
7951  #endif  #endif
7952    
7953      else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF_RIGHTPAR, 4) == 0)
7954        { skipatstart += 6; options |= PCRE_UTF8; continue; }
7955    else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UCP_RIGHTPAR, 4) == 0)    else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UCP_RIGHTPAR, 4) == 0)
7956      { skipatstart += 6; options |= PCRE_UCP; continue; }      { skipatstart += 6; options |= PCRE_UCP; continue; }
7957    else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_NO_START_OPT_RIGHTPAR, 13) == 0)    else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_NO_START_OPT_RIGHTPAR, 13) == 0)
7958      { skipatstart += 15; options |= PCRE_NO_START_OPTIMIZE; continue; }      { skipatstart += 15; options |= PCRE_NO_START_OPTIMIZE; continue; }
7959    
7960      else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_LIMIT_MATCH_EQ, 12) == 0)
7961        {
7962        pcre_uint32 c = 0;
7963        int p = skipatstart + 14;
7964        while (isdigit(ptr[p]))
7965          {
7966          if (c > PCRE_UINT32_MAX / 10 - 1) break;   /* Integer overflow */
7967          c = c*10 + ptr[p++] - CHAR_0;
7968          }
7969        if (ptr[p++] != CHAR_RIGHT_PARENTHESIS) break;
7970        if (c < limit_match)
7971          {
7972          limit_match = c;
7973          cd->external_flags |= PCRE_MLSET;
7974          }
7975        skipatstart = p;
7976        continue;
7977        }
7978    
7979      else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_LIMIT_RECURSION_EQ, 16) == 0)
7980        {
7981        pcre_uint32 c = 0;
7982        int p = skipatstart + 18;
7983        while (isdigit(ptr[p]))
7984          {
7985          if (c > PCRE_UINT32_MAX / 10 - 1) break;   /* Integer overflow check */
7986          c = c*10 + ptr[p++] - CHAR_0;
7987          }
7988        if (ptr[p++] != CHAR_RIGHT_PARENTHESIS) break;
7989        if (c < limit_recursion)
7990          {
7991          limit_recursion = c;
7992          cd->external_flags |= PCRE_RLSET;
7993          }
7994        skipatstart = p;
7995        continue;
7996        }
7997    
7998    if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_CR_RIGHTPAR, 3) == 0)    if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_CR_RIGHTPAR, 3) == 0)
7999      { skipatstart += 5; newnl = PCRE_NEWLINE_CR; }      { skipatstart += 5; newnl = PCRE_NEWLINE_CR; }
8000    else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_LF_RIGHTPAR, 3)  == 0)    else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_LF_RIGHTPAR, 3)  == 0)
# Line 7886  while (ptr[skipatstart] == CHAR_LEFT_PAR Line 8020  while (ptr[skipatstart] == CHAR_LEFT_PAR
8020    
8021  /* PCRE_UTF(16|32) have the same value as PCRE_UTF8. */  /* PCRE_UTF(16|32) have the same value as PCRE_UTF8. */
8022  utf = (options & PCRE_UTF8) != 0;  utf = (options & PCRE_UTF8) != 0;
8023    if (utf && never_utf)
8024      {
8025      errorcode = ERR78;
8026      goto PCRE_EARLY_ERROR_RETURN2;
8027      }
8028    
8029  /* Can't support UTF unless PCRE has been compiled to include the code. The  /* Can't support UTF unless PCRE has been compiled to include the code. The
8030  return of an error code from PRIV(valid_utf)() is a new feature, introduced in  return of an error code from PRIV(valid_utf)() is a new feature, introduced in
# Line 8008  cd->req_varyopt = 0; Line 8147  cd->req_varyopt = 0;
8147  cd->assert_depth = 0;  cd->assert_depth = 0;
8148  cd->max_lookbehind = 0;  cd->max_lookbehind = 0;
8149  cd->external_options = options;  cd->external_options = options;
 cd->external_flags = 0;  
8150  cd->open_caps = NULL;  cd->open_caps = NULL;
8151    
8152  /* Now do the pre-compile. On error, errorcode will be set non-zero, so we  /* Now do the pre-compile. On error, errorcode will be set non-zero, so we
# Line 8058  re->magic_number = MAGIC_NUMBER; Line 8196  re->magic_number = MAGIC_NUMBER;
8196  re->size = (int)size;  re->size = (int)size;
8197  re->options = cd->external_options;  re->options = cd->external_options;
8198  re->flags = cd->external_flags;  re->flags = cd->external_flags;
8199    re->limit_match = limit_match;
8200    re->limit_recursion = limit_recursion;
8201  re->first_char = 0;  re->first_char = 0;
8202  re->req_char = 0;  re->req_char = 0;
8203  re->name_table_offset = sizeof(REAL_PCRE) / sizeof(pcre_uchar);  re->name_table_offset = sizeof(REAL_PCRE) / sizeof(pcre_uchar);
# Line 8067  re->ref_count = 0; Line 8207  re->ref_count = 0;
8207  re->tables = (tables == PRIV(default_tables))? NULL : tables;  re->tables = (tables == PRIV(default_tables))? NULL : tables;
8208  re->nullpad = NULL;  re->nullpad = NULL;
8209  #ifdef COMPILE_PCRE32  #ifdef COMPILE_PCRE32
8210  re->dummy1 = re->dummy2 = 0;  re->dummy = 0;
8211    #else
8212    re->dummy1 = re->dummy2 = re->dummy3 = 0;
8213  #endif  #endif
8214    
8215  /* The starting points of the name/number translation table and of the code are  /* The starting points of the name/number translation table and of the code are
# Line 8114  if (cd->had_accept) Line 8256  if (cd->had_accept)
8256    
8257  /* If not reached end of pattern on success, there's an excess bracket. */  /* If not reached end of pattern on success, there's an excess bracket. */
8258    
8259  if (errorcode == 0 && *ptr != 0) errorcode = ERR22;  if (errorcode == 0 && *ptr != CHAR_NULL) errorcode = ERR22;
8260    
8261  /* Fill in the terminating state and check for disastrous overflow, but  /* Fill in the terminating state and check for disastrous overflow, but
8262  if debugging, leave the test till after things are printed out. */  if debugging, leave the test till after things are printed out. */
# Line 8125  if debugging, leave the test till after Line 8267  if debugging, leave the test till after
8267  if (code - codestart > length) errorcode = ERR23;  if (code - codestart > length) errorcode = ERR23;
8268  #endif  #endif
8269    
8270    #ifdef SUPPORT_VALGRIND
8271    /* If the estimated length exceeds the really used length, mark the extra
8272    allocated memory as unaddressable, so that any out-of-bound reads can be
8273    detected. */
8274    VALGRIND_MAKE_MEM_NOACCESS(code, (length - (code - codestart)) * sizeof(pcre_uchar));
8275    #endif
8276    
8277  /* Fill in any forward references that are required. There may be repeated  /* Fill in any forward references that are required. There may be repeated
8278  references; optimize for them, as searching a large regex takes time. */  references; optimize for them, as searching a large regex takes time. */
8279    
# Line 8148  if (cd->hwm > cd->start_workspace) Line 8297  if (cd->hwm > cd->start_workspace)
8297      }      }
8298    }    }
8299    
8300  /* If the workspace had to be expanded, free the new memory. */  /* If the workspace had to be expanded, free the new memory. Set the pointer to
8301    NULL to indicate that forward references have been filled in. */
8302    
8303  if (cd->workspace_size > COMPILE_WORK_SIZE)  if (cd->workspace_size > COMPILE_WORK_SIZE)
8304    (PUBL(free))((void *)cd->start_workspace);    (PUBL(free))((void *)cd->start_workspace);
8305    cd->start_workspace = NULL;
8306    
8307  /* Give an error if there's back reference to a non-existent capturing  /* Give an error if there's back reference to a non-existent capturing
8308  subpattern. */  subpattern. */
# Line 8355  if (code - codestart > length) Line 8506  if (code - codestart > length)
8506    }    }
8507  #endif   /* PCRE_DEBUG */  #endif   /* PCRE_DEBUG */
8508    
8509    /* Check for a pattern than can match an empty string, so that this information
8510    can be provided to applications. */
8511    
8512    do
8513      {
8514      if (could_be_empty_branch(codestart, code, utf, cd, NULL))
8515        {
8516        re->flags |= PCRE_MATCH_EMPTY;
8517        break;
8518        }
8519      codestart += GET(codestart, 1);
8520      }
8521    while (*codestart == OP_ALT);
8522    
8523  #if defined COMPILE_PCRE8  #if defined COMPILE_PCRE8
8524  return (pcre *)re;  return (pcre *)re;
8525  #elif defined COMPILE_PCRE16  #elif defined COMPILE_PCRE16

Legend:
Removed from v.1133  
changed lines
  Added in v.1348

  ViewVC Help
Powered by ViewVC 1.1.5