/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 994 by ph10, Tue Jul 10 14:29:26 2012 UTC revision 1060 by chpe, Tue Oct 16 15:53:57 2012 UTC
# Line 53  supporting internal functions that are n Line 53  supporting internal functions that are n
53  #include "pcre_internal.h"  #include "pcre_internal.h"
54    
55    
56  /* When PCRE_DEBUG is defined, we need the pcre(16)_printint() function, which  /* When PCRE_DEBUG is defined, we need the pcre(16|32)_printint() function, which
57  is also used by pcretest. PCRE_DEBUG is not defined when building a production  is also used by pcretest. PCRE_DEBUG is not defined when building a production
58  library. We do not need to select pcre16_printint.c specially, because the  library. We do not need to select pcre16_printint.c specially, because the
59  COMPILE_PCREx macro will already be appropriately set. */  COMPILE_PCREx macro will already be appropriately set. */
# Line 68  COMPILE_PCREx macro will already be appr Line 68  COMPILE_PCREx macro will already be appr
68    
69  /* Macro for setting individual bits in class bitmaps. */  /* Macro for setting individual bits in class bitmaps. */
70    
71  #define SETBIT(a,b) a[b/8] |= (1 << (b%8))  #define SETBIT(a,b) a[(b)/8] |= (1 << ((b)&7))
72    
73  /* Maximum length value to check against when making sure that the integer that  /* Maximum length value to check against when making sure that the integer that
74  holds the compiled pattern length does not overflow. We make it a bit less than  holds the compiled pattern length does not overflow. We make it a bit less than
# Line 77  to check them every time. */ Line 77  to check them every time. */
77    
78  #define OFLOW_MAX (INT_MAX - 20)  #define OFLOW_MAX (INT_MAX - 20)
79    
80    /* Definitions to allow mutual recursion */
81    
82    static int
83      add_list_to_class(pcre_uint8 *, pcre_uchar **, int, compile_data *,
84        const pcre_uint32 *, unsigned int);
85    
86    static BOOL
87      compile_regex(int, pcre_uchar **, const pcre_uchar **, int *, BOOL, BOOL,
88        int, int, int *, int *, branch_chain *, compile_data *, int *);
89    
90    
91    
92  /*************************************************  /*************************************************
93  *      Code parameters and static tables         *  *      Code parameters and static tables         *
# Line 112  overrun before it actually does run off Line 123  overrun before it actually does run off
123    
124  #define REQ_CASELESS   0x10000000l      /* Indicates caselessness */  #define REQ_CASELESS   0x10000000l      /* Indicates caselessness */
125  #define REQ_VARY       0x20000000l      /* Reqchar followed non-literal item */  #define REQ_VARY       0x20000000l      /* Reqchar followed non-literal item */
126    #define REQ_MASK       (REQ_CASELESS | REQ_VARY)
127    
128  /* Repeated character flags. */  /* Repeated character flags. */
129    
# Line 492  static const char error_texts[] = Line 504  static const char error_texts[] =
504    /* 75 */    /* 75 */
505    "name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0"    "name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0"
506    "character value in \\u.... sequence is too large\0"    "character value in \\u.... sequence is too large\0"
507      "invalid UTF-32 string\0"
508    ;    ;
509    
510  /* Table to identify digits and hex digits. This is used when compiling  /* Table to identify digits and hex digits. This is used when compiling
# Line 631  static const pcre_uint8 ebcdic_chartab[] Line 644  static const pcre_uint8 ebcdic_chartab[]
644  #endif  #endif
645    
646    
 /* Definition to allow mutual recursion */  
   
 static BOOL  
   compile_regex(int, pcre_uchar **, const pcre_uchar **, int *, BOOL, BOOL, int, int,  
     int *, int *, branch_chain *, compile_data *, int *);  
   
647    
648    
649  /*************************************************  /*************************************************
# Line 742  return (*p == CHAR_RIGHT_CURLY_BRACKET); Line 749  return (*p == CHAR_RIGHT_CURLY_BRACKET);
749  *************************************************/  *************************************************/
750    
751  /* This function is called when a \ has been encountered. It either returns a  /* This function is called when a \ has been encountered. It either returns a
752  positive value for a simple escape such as \n, or a negative value which  positive value for a simple escape such as \n, or 0 for a data character
753  encodes one of the more complicated things such as \d. A backreference to group  which will be placed in chptr. A backreference to group
754  n is returned as -(ESC_REF + n); ESC_REF is the highest ESC_xxx macro. When  n is returned as ESC_REF + n; ESC_REF is the highest ESC_xxx macro. When
755  UTF-8 is enabled, a positive value greater than 255 may be returned. On entry,  UTF-8 is enabled, a positive value greater than 255 may be returned in chptr.
756  ptr is pointing at the \. On exit, it is on the final character of the escape  On entry,ptr is pointing at the \. On exit, it is on the final character of the
757  sequence.  escape sequence.
758    
759  Arguments:  Arguments:
760    ptrptr         points to the pattern position pointer    ptrptr         points to the pattern position pointer
761      chptr          points to the data character
762    errorcodeptr   points to the errorcode variable    errorcodeptr   points to the errorcode variable
763    bracount       number of previous extracting brackets    bracount       number of previous extracting brackets
764    options        the options bits    options        the options bits
765    isclass        TRUE if inside a character class    isclass        TRUE if inside a character class
766    
767  Returns:         zero or positive => a data character  Returns:         zero => a data character
768                   negative => a special escape sequence                   positive => a special escape sequence
769                   on error, errorcodeptr is set                   on error, errorcodeptr is set
770  */  */
771    
772  static int  static int
773  check_escape(const pcre_uchar **ptrptr, int *errorcodeptr, int bracount,  check_escape(const pcre_uchar **ptrptr, int *chptr, int *errorcodeptr,
774    int options, BOOL isclass)    int bracount, int options, BOOL isclass)
775  {  {
776  /* PCRE_UTF16 has the same value as PCRE_UTF8. */  /* PCRE_UTF16 has the same value as PCRE_UTF8. */
777  BOOL utf = (options & PCRE_UTF8) != 0;  BOOL utf = (options & PCRE_UTF8) != 0;
778  const pcre_uchar *ptr = *ptrptr + 1;  const pcre_uchar *ptr = *ptrptr + 1;
779  pcre_int32 c;  pcre_uint32 c;
780    int escape = 0;
781  int i;  int i;
782    
783  GETCHARINCTEST(c, ptr);           /* Get character value, increment pointer */  GETCHARINCTEST(c, ptr);           /* Get character value, increment pointer */
# Line 785  Otherwise further processing may be requ Line 794  Otherwise further processing may be requ
794  #ifndef EBCDIC  /* ASCII/UTF-8 coding */  #ifndef EBCDIC  /* ASCII/UTF-8 coding */
795  /* Not alphanumeric */  /* Not alphanumeric */
796  else if (c < CHAR_0 || c > CHAR_z) {}  else if (c < CHAR_0 || c > CHAR_z) {}
797  else if ((i = escapes[c - CHAR_0]) != 0) c = i;  else if ((i = escapes[c - CHAR_0]) != 0) { if (i > 0) c = i; else escape = -i; }
798    
799  #else           /* EBCDIC coding */  #else           /* EBCDIC coding */
800  /* Not alphanumeric */  /* Not alphanumeric */
801  else if (c < 'a' || (!MAX_255(c) || (ebcdic_chartab[c] & 0x0E) == 0)) {}  else if (c < CHAR_a || (!MAX_255(c) || (ebcdic_chartab[c] & 0x0E) == 0)) {}
802  else if ((i = escapes[c - 0x48]) != 0)  c = i;  else if ((i = escapes[c - 0x48]) != 0)  { if (i > 0) c = i; else escape = -i; }
803  #endif  #endif
804    
805  /* Escapes that need further processing, or are illegal. */  /* Escapes that need further processing, or are illegal. */
# Line 823  else Line 832  else
832          c = 0;          c = 0;
833          for (i = 0; i < 4; ++i)          for (i = 0; i < 4; ++i)
834            {            {
835            register int cc = *(++ptr);            register pcre_uint32 cc = *(++ptr);
836  #ifndef EBCDIC  /* ASCII/UTF-8 coding */  #ifndef EBCDIC  /* ASCII/UTF-8 coding */
837            if (cc >= CHAR_a) cc -= 32;               /* Convert to upper case */            if (cc >= CHAR_a) cc -= 32;               /* Convert to upper case */
838            c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));            c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
# Line 833  else Line 842  else
842  #endif  #endif
843            }            }
844    
845  #ifdef COMPILE_PCRE8  #if defined COMPILE_PCRE8
846          if (c > (utf ? 0x10ffff : 0xff))          if (c > (utf ? 0x10ffff : 0xff))
847  #else  #elif defined COMPILE_PCRE16
 #ifdef COMPILE_PCRE16  
848          if (c > (utf ? 0x10ffff : 0xffff))          if (c > (utf ? 0x10ffff : 0xffff))
849  #endif  #elif defined COMPILE_PCRE32
850            if (utf && c > 0x10ffff)
851  #endif  #endif
852            {            {
853            *errorcodeptr = ERR76;            *errorcodeptr = ERR76;
# Line 870  else Line 879  else
879      (3) For Oniguruma compatibility we also support \g followed by a name or a      (3) For Oniguruma compatibility we also support \g followed by a name or a
880      number either in angle brackets or in single quotes. However, these are      number either in angle brackets or in single quotes. However, these are
881      (possibly recursive) subroutine calls, _not_ backreferences. Just return      (possibly recursive) subroutine calls, _not_ backreferences. Just return
882      the -ESC_g code (cf \k). */      the ESC_g code (cf \k). */
883    
884      case CHAR_g:      case CHAR_g:
885      if (isclass) break;      if (isclass) break;
886      if (ptr[1] == CHAR_LESS_THAN_SIGN || ptr[1] == CHAR_APOSTROPHE)      if (ptr[1] == CHAR_LESS_THAN_SIGN || ptr[1] == CHAR_APOSTROPHE)
887        {        {
888        c = -ESC_g;        escape = ESC_g;
889        break;        break;
890        }        }
891    
# Line 889  else Line 898  else
898          if (*p != CHAR_MINUS && !IS_DIGIT(*p)) break;          if (*p != CHAR_MINUS && !IS_DIGIT(*p)) break;
899        if (*p != 0 && *p != CHAR_RIGHT_CURLY_BRACKET)        if (*p != 0 && *p != CHAR_RIGHT_CURLY_BRACKET)
900          {          {
901          c = -ESC_k;          escape = ESC_k;
902          break;          break;
903          }          }
904        braced = TRUE;        braced = TRUE;
# Line 945  else Line 954  else
954        c = bracount - (c - 1);        c = bracount - (c - 1);
955        }        }
956    
957      c = -(ESC_REF + c);      escape = ESC_REF + c;
958      break;      break;
959    
960      /* The handling of escape sequences consisting of a string of digits      /* The handling of escape sequences consisting of a string of digits
# Line 986  else Line 995  else
995          }          }
996        if (c < 10 || c <= bracount)        if (c < 10 || c <= bracount)
997          {          {
998          c = -(ESC_REF + c);          escape = ESC_REF + c;
999          break;          break;
1000          }          }
1001        ptr = oldptr;      /* Put the pointer back and fall through */        ptr = oldptr;      /* Put the pointer back and fall through */
# Line 1033  else Line 1042  else
1042          c = 0;          c = 0;
1043          for (i = 0; i < 2; ++i)          for (i = 0; i < 2; ++i)
1044            {            {
1045            register int cc = *(++ptr);            register pcre_uint32 cc = *(++ptr);
1046  #ifndef EBCDIC  /* ASCII/UTF-8 coding */  #ifndef EBCDIC  /* ASCII/UTF-8 coding */
1047            if (cc >= CHAR_a) cc -= 32;               /* Convert to upper case */            if (cc >= CHAR_a) cc -= 32;               /* Convert to upper case */
1048            c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));            c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
# Line 1049  else Line 1058  else
1058      if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)      if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)
1059        {        {
1060        const pcre_uchar *pt = ptr + 2;        const pcre_uchar *pt = ptr + 2;
1061          BOOL overflow;
1062    
1063        c = 0;        c = 0;
1064          overflow = FALSE;
1065        while (MAX_255(*pt) && (digitab[*pt] & ctype_xdigit) != 0)        while (MAX_255(*pt) && (digitab[*pt] & ctype_xdigit) != 0)
1066          {          {
1067          register int cc = *pt++;          register pcre_uint32 cc = *pt++;
1068          if (c == 0 && cc == CHAR_0) continue;     /* Leading zeroes */          if (c == 0 && cc == CHAR_0) continue;     /* Leading zeroes */
1069    
1070    #ifdef COMPILE_PCRE32
1071            if (c >= 0x10000000l) { overflow = TRUE; break; }
1072    #endif
1073    
1074  #ifndef EBCDIC  /* ASCII/UTF-8 coding */  #ifndef EBCDIC  /* ASCII/UTF-8 coding */
1075          if (cc >= CHAR_a) cc -= 32;               /* Convert to upper case */          if (cc >= CHAR_a) cc -= 32;               /* Convert to upper case */
1076          c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));          c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
# Line 1064  else Line 1079  else
1079          c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));          c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
1080  #endif  #endif
1081    
1082  #ifdef COMPILE_PCRE8  #if defined COMPILE_PCRE8
1083          if (c > (utf ? 0x10ffff : 0xff)) { c = -1; break; }          if (c > (utf ? 0x10ffff : 0xff)) { overflow = TRUE; break; }
1084  #else  #elif defined COMPILE_PCRE16
1085  #ifdef COMPILE_PCRE16          if (c > (utf ? 0x10ffff : 0xffff)) { overflow = TRUE; break; }
1086          if (c > (utf ? 0x10ffff : 0xffff)) { c = -1; break; }  #elif defined COMPILE_PCRE32
1087  #endif          if (utf && c > 0x10ffff) { overflow = TRUE; break; }
1088  #endif  #endif
1089          }          }
1090    
1091        if (c < 0)        if (overflow)
1092          {          {
1093          while (MAX_255(*pt) && (digitab[*pt] & ctype_xdigit) != 0) pt++;          while (MAX_255(*pt) && (digitab[*pt] & ctype_xdigit) != 0) pt++;
1094          *errorcodeptr = ERR34;          *errorcodeptr = ERR34;
# Line 1095  else Line 1110  else
1110      c = 0;      c = 0;
1111      while (i++ < 2 && MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0)      while (i++ < 2 && MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0)
1112        {        {
1113        int cc;                                  /* Some compilers don't like */        pcre_uint32 cc;                          /* Some compilers don't like */
1114        cc = *(++ptr);                           /* ++ in initializers */        cc = *(++ptr);                           /* ++ in initializers */
1115  #ifndef EBCDIC  /* ASCII/UTF-8 coding */  #ifndef EBCDIC  /* ASCII/UTF-8 coding */
1116        if (cc >= CHAR_a) cc -= 32;              /* Convert to upper case */        if (cc >= CHAR_a) cc -= 32;              /* Convert to upper case */
# Line 1154  else Line 1169  else
1169  newline". PCRE does not support \N{name}. However, it does support  newline". PCRE does not support \N{name}. However, it does support
1170  quantification such as \N{2,3}. */  quantification such as \N{2,3}. */
1171    
1172  if (c == -ESC_N && ptr[1] == CHAR_LEFT_CURLY_BRACKET &&  if (escape == ESC_N && ptr[1] == CHAR_LEFT_CURLY_BRACKET &&
1173       !is_counted_repeat(ptr+2))       !is_counted_repeat(ptr+2))
1174    *errorcodeptr = ERR37;    *errorcodeptr = ERR37;
1175    
1176  /* If PCRE_UCP is set, we change the values for \d etc. */  /* If PCRE_UCP is set, we change the values for \d etc. */
1177    
1178  if ((options & PCRE_UCP) != 0 && c <= -ESC_D && c >= -ESC_w)  if ((options & PCRE_UCP) != 0 && escape >= ESC_D && escape <= ESC_w)
1179    c -= (ESC_DU - ESC_D);    escape += (ESC_DU - ESC_D);
1180    
1181  /* Set the pointer to the final character before returning. */  /* Set the pointer to the final character before returning. */
1182    
1183  *ptrptr = ptr;  *ptrptr = ptr;
1184  return c;  *chptr = c;
1185    return escape;
1186  }  }
1187    
   
   
1188  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1189  /*************************************************  /*************************************************
1190  *               Handle \P and \p                 *  *               Handle \P and \p                 *
# Line 1362  Arguments: Line 1376  Arguments:
1376    name         name to seek, or NULL if seeking a numbered subpattern    name         name to seek, or NULL if seeking a numbered subpattern
1377    lorn         name length, or subpattern number if name is NULL    lorn         name length, or subpattern number if name is NULL
1378    xmode        TRUE if we are in /x mode    xmode        TRUE if we are in /x mode
1379    utf          TRUE if we are in UTF-8 / UTF-16 mode    utf          TRUE if we are in UTF-8 / UTF-16 / UTF-32 mode
1380    count        pointer to the current capturing subpattern number (updated)    count        pointer to the current capturing subpattern number (updated)
1381    
1382  Returns:       the number of the named subpattern, or -1 if not found  Returns:       the number of the named subpattern, or -1 if not found
# Line 1596  Arguments: Line 1610  Arguments:
1610    name         name to seek, or NULL if seeking a numbered subpattern    name         name to seek, or NULL if seeking a numbered subpattern
1611    lorn         name length, or subpattern number if name is NULL    lorn         name length, or subpattern number if name is NULL
1612    xmode        TRUE if we are in /x mode    xmode        TRUE if we are in /x mode
1613    utf          TRUE if we are in UTF-8 / UTF-16 mode    utf          TRUE if we are in UTF-8 / UTF-16 / UTF-32 mode
1614    
1615  Returns:       the number of the found subpattern, or -1 if not found  Returns:       the number of the found subpattern, or -1 if not found
1616  */  */
# Line 1699  and doing the check at the end; a flag s Line 1713  and doing the check at the end; a flag s
1713    
1714  Arguments:  Arguments:
1715    code     points to the start of the pattern (the bracket)    code     points to the start of the pattern (the bracket)
1716    utf      TRUE in UTF-8 / UTF-16 mode    utf      TRUE in UTF-8 / UTF-16 / UTF-32 mode
1717    atend    TRUE if called when the pattern is complete    atend    TRUE if called when the pattern is complete
1718    cd       the "compile data" structure    cd       the "compile data" structure
1719    
# Line 1833  for (;;) Line 1847  for (;;)
1847      case OP_NOTI:      case OP_NOTI:
1848      branchlength++;      branchlength++;
1849      cc += 2;      cc += 2;
1850  #ifdef SUPPORT_UTF  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
1851      if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);      if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1852  #endif  #endif
1853      break;      break;
# Line 1847  for (;;) Line 1861  for (;;)
1861      case OP_NOTEXACTI:      case OP_NOTEXACTI:
1862      branchlength += GET2(cc,1);      branchlength += GET2(cc,1);
1863      cc += 2 + IMM2_SIZE;      cc += 2 + IMM2_SIZE;
1864  #ifdef SUPPORT_UTF  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
1865      if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);      if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1866  #endif  #endif
1867      break;      break;
1868    
1869      case OP_TYPEEXACT:      case OP_TYPEEXACT:
1870      branchlength += GET2(cc,1);      branchlength += GET2(cc,1);
1871      if (cc[1 + IMM2_SIZE] == OP_PROP || cc[1 + IMM2_SIZE] == OP_NOTPROP) cc += 2;      if (cc[1 + IMM2_SIZE] == OP_PROP || cc[1 + IMM2_SIZE] == OP_NOTPROP)
1872          cc += 2;
1873      cc += 1 + IMM2_SIZE + 1;      cc += 1 + IMM2_SIZE + 1;
1874      break;      break;
1875    
# Line 1889  for (;;) Line 1904  for (;;)
1904    
1905      /* Check a class for variable quantification */      /* Check a class for variable quantification */
1906    
1907  #if defined SUPPORT_UTF || defined COMPILE_PCRE16  #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1908      case OP_XCLASS:      case OP_XCLASS:
1909      cc += GET(cc, 1) - PRIV(OP_lengths)[OP_CLASS];      cc += GET(cc, 1) - PRIV(OP_lengths)[OP_CLASS];
1910      /* Fall through */      /* Fall through */
# Line 2028  length. Line 2043  length.
2043    
2044  Arguments:  Arguments:
2045    code        points to start of expression    code        points to start of expression
2046    utf         TRUE in UTF-8 / UTF-16 mode    utf         TRUE in UTF-8 / UTF-16 / UTF-32 mode
2047    number      the required bracket number or negative to find a lookbehind    number      the required bracket number or negative to find a lookbehind
2048    
2049  Returns:      pointer to the opcode for the bracket, or NULL if not found  Returns:      pointer to the opcode for the bracket, or NULL if not found
# Line 2092  for (;;) Line 2107  for (;;)
2107        case OP_TYPEMINUPTO:        case OP_TYPEMINUPTO:
2108        case OP_TYPEEXACT:        case OP_TYPEEXACT:
2109        case OP_TYPEPOSUPTO:        case OP_TYPEPOSUPTO:
2110        if (code[1 + IMM2_SIZE] == OP_PROP        if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
2111          || code[1 + IMM2_SIZE] == OP_NOTPROP) code += 2;          code += 2;
2112        break;        break;
2113    
2114        case OP_MARK:        case OP_MARK:
# Line 2115  for (;;) Line 2130  for (;;)
2130    a multi-byte character. The length in the table is a minimum, so we have to    a multi-byte character. The length in the table is a minimum, so we have to
2131    arrange to skip the extra bytes. */    arrange to skip the extra bytes. */
2132    
2133  #ifdef SUPPORT_UTF  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2134      if (utf) switch(c)      if (utf) switch(c)
2135        {        {
2136        case OP_CHAR:        case OP_CHAR:
# Line 2167  instance of OP_RECURSE. Line 2182  instance of OP_RECURSE.
2182    
2183  Arguments:  Arguments:
2184    code        points to start of expression    code        points to start of expression
2185    utf         TRUE in UTF-8 / UTF-16 mode    utf         TRUE in UTF-8 / UTF-16 / UTF-32 mode
2186    
2187  Returns:      pointer to the opcode for OP_RECURSE, or NULL if not found  Returns:      pointer to the opcode for OP_RECURSE, or NULL if not found
2188  */  */
# Line 2212  for (;;) Line 2227  for (;;)
2227        case OP_TYPEUPTO:        case OP_TYPEUPTO:
2228        case OP_TYPEMINUPTO:        case OP_TYPEMINUPTO:
2229        case OP_TYPEEXACT:        case OP_TYPEEXACT:
2230        if (code[1 + IMM2_SIZE] == OP_PROP        if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
2231          || code[1 + IMM2_SIZE] == OP_NOTPROP) code += 2;          code += 2;
2232        break;        break;
2233    
2234        case OP_MARK:        case OP_MARK:
# Line 2235  for (;;) Line 2250  for (;;)
2250      by a multi-byte character. The length in the table is a minimum, so we have      by a multi-byte character. The length in the table is a minimum, so we have
2251      to arrange to skip the extra bytes. */      to arrange to skip the extra bytes. */
2252    
2253  #ifdef SUPPORT_UTF  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2254      if (utf) switch(c)      if (utf) switch(c)
2255        {        {
2256        case OP_CHAR:        case OP_CHAR:
# Line 2321  bracket whose current branch will alread Line 2336  bracket whose current branch will alread
2336  Arguments:  Arguments:
2337    code        points to start of search    code        points to start of search
2338    endcode     points to where to stop    endcode     points to where to stop
2339    utf         TRUE if in UTF-8 / UTF-16 mode    utf         TRUE if in UTF-8 / UTF-16 / UTF-32 mode
2340    cd          contains pointers to tables etc.    cd          contains pointers to tables etc.
2341    
2342  Returns:      TRUE if what is matched could be empty  Returns:      TRUE if what is matched could be empty
# Line 2538  for (code = first_significant_code(code Line 2553  for (code = first_significant_code(code
2553      case OP_TYPEUPTO:      case OP_TYPEUPTO:
2554      case OP_TYPEMINUPTO:      case OP_TYPEMINUPTO:
2555      case OP_TYPEPOSUPTO:      case OP_TYPEPOSUPTO:
2556      if (code[1 + IMM2_SIZE] == OP_PROP      if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
2557        || code[1 + IMM2_SIZE] == OP_NOTPROP) code += 2;        code += 2;
2558      break;      break;
2559    
2560      /* End of branch */      /* End of branch */
# Line 2554  for (code = first_significant_code(code Line 2569  for (code = first_significant_code(code
2569      /* In UTF-8 mode, STAR, MINSTAR, POSSTAR, QUERY, MINQUERY, POSQUERY, UPTO,      /* In UTF-8 mode, STAR, MINSTAR, POSSTAR, QUERY, MINQUERY, POSQUERY, UPTO,
2570      MINUPTO, and POSUPTO may be followed by a multibyte character */      MINUPTO, and POSUPTO may be followed by a multibyte character */
2571    
2572  #ifdef SUPPORT_UTF  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2573      case OP_STAR:      case OP_STAR:
2574      case OP_STARI:      case OP_STARI:
2575      case OP_MINSTAR:      case OP_MINSTAR:
# Line 2620  Arguments: Line 2635  Arguments:
2635    code        points to start of the recursion    code        points to start of the recursion
2636    endcode     points to where to stop (current RECURSE item)    endcode     points to where to stop (current RECURSE item)
2637    bcptr       points to the chain of current (unclosed) branch starts    bcptr       points to the chain of current (unclosed) branch starts
2638    utf         TRUE if in UTF-8 / UTF-16 mode    utf         TRUE if in UTF-8 / UTF-16 / UTF-32 mode
2639    cd          pointers to tables etc    cd          pointers to tables etc
2640    
2641  Returns:      TRUE if what is matched could be empty  Returns:      TRUE if what is matched could be empty
# Line 2767  value in the reference (which is a group Line 2782  value in the reference (which is a group
2782  Arguments:  Arguments:
2783    group      points to the start of the group    group      points to the start of the group
2784    adjust     the amount by which the group is to be moved    adjust     the amount by which the group is to be moved
2785    utf        TRUE in UTF-8 / UTF-16 mode    utf        TRUE in UTF-8 / UTF-16 / UTF-32 mode
2786    cd         contains pointers to tables etc.    cd         contains pointers to tables etc.
2787    save_hwm   the hwm forward reference pointer at the start of the group    save_hwm   the hwm forward reference pointer at the start of the group
2788    
# Line 2871  PUT(previous_callout, 2 + LINK_SIZE, len Line 2886  PUT(previous_callout, 2 + LINK_SIZE, len
2886  *************************************************/  *************************************************/
2887    
2888  /* This function is passed the start and end of a class range, in UTF-8 mode  /* This function is passed the start and end of a class range, in UTF-8 mode
2889  with UCP support. It searches up the characters, looking for internal ranges of  with UCP support. It searches up the characters, looking for ranges of
2890  characters in the "other" case. Each call returns the next one, updating the  characters in the "other" case. Each call returns the next one, updating the
2891  start address.  start address. A character with multiple other cases is returned on its own
2892    with a special return value.
2893    
2894  Arguments:  Arguments:
2895    cptr        points to starting character value; updated    cptr        points to starting character value; updated
# Line 2881  Arguments: Line 2897  Arguments:
2897    ocptr       where to put start of othercase range    ocptr       where to put start of othercase range
2898    odptr       where to put end of othercase range    odptr       where to put end of othercase range
2899    
2900  Yield:        TRUE when range returned; FALSE when no more  Yield:        -1 when no more
2901                   0 when a range is returned
2902                  >0 the CASESET offset for char with multiple other cases
2903                    in this case, ocptr contains the original
2904  */  */
2905    
2906  static BOOL  static int
2907  get_othercase_range(unsigned int *cptr, unsigned int d, unsigned int *ocptr,  get_othercase_range(unsigned int *cptr, unsigned int d, unsigned int *ocptr,
2908    unsigned int *odptr)    unsigned int *odptr)
2909  {  {
2910  unsigned int c, othercase, next;  unsigned int c, othercase, next;
2911    int co;
2912    
2913    /* Find the first character that has an other case. If it has multiple other
2914    cases, return its case offset value. */
2915    
2916  for (c = *cptr; c <= d; c++)  for (c = *cptr; c <= d; c++)
2917    { if ((othercase = UCD_OTHERCASE(c)) != c) break; }    {
2918      if ((co = UCD_CASESET(c)) != 0)
2919        {
2920        *ocptr = c++;   /* Character that has the set */
2921        *cptr = c;      /* Rest of input range */
2922        return co;
2923        }
2924      if ((othercase = UCD_OTHERCASE(c)) != c) break;
2925      }
2926    
2927  if (c > d) return FALSE;  if (c > d) return -1;  /* Reached end of range */
2928    
2929  *ocptr = othercase;  *ocptr = othercase;
2930  next = othercase + 1;  next = othercase + 1;
# Line 2904  for (++c; c <= d; c++) Line 2935  for (++c; c <= d; c++)
2935    next++;    next++;
2936    }    }
2937    
2938  *odptr = next - 1;  *odptr = next - 1;     /* End of othercase range */
2939  *cptr = c;  *cptr = c;             /* Rest of input range */
2940    return 0;
 return TRUE;  
2941  }  }
2942    
2943    
# Line 2931  Returns:       TRUE if auto-possessifyin Line 2961  Returns:       TRUE if auto-possessifyin
2961  static BOOL  static BOOL
2962  check_char_prop(int c, int ptype, int pdata, BOOL negated)  check_char_prop(int c, int ptype, int pdata, BOOL negated)
2963  {  {
2964    #ifdef SUPPORT_UCP
2965    const pcre_uint32 *p;
2966    #endif
2967    
2968  const ucd_record *prop = GET_UCD(c);  const ucd_record *prop = GET_UCD(c);
2969    
2970  switch(ptype)  switch(ptype)
2971    {    {
2972    case PT_LAMP:    case PT_LAMP:
# Line 2969  switch(ptype) Line 3004  switch(ptype)
3004    return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||    return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
3005            PRIV(ucp_gentype)[prop->chartype] == ucp_N ||            PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
3006            c == CHAR_UNDERSCORE) == negated;            c == CHAR_UNDERSCORE) == negated;
3007    
3008    #ifdef SUPPORT_UCP
3009      case PT_CLIST:
3010      p = PRIV(ucd_caseless_sets) + prop->caseset;
3011      for (;;)
3012        {
3013        if ((unsigned int)c < *p) return !negated;
3014        if ((unsigned int)c == *p++) return negated;
3015        }
3016      break;  /* Control never reaches here */
3017    #endif
3018    }    }
3019    
3020  return FALSE;  return FALSE;
3021  }  }
3022  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
# Line 2986  sense to automatically possessify the re Line 3033  sense to automatically possessify the re
3033    
3034  Arguments:  Arguments:
3035    previous      pointer to the repeated opcode    previous      pointer to the repeated opcode
3036    utf           TRUE in UTF-8 / UTF-16 mode    utf           TRUE in UTF-8 / UTF-16 / UTF-32 mode
3037    ptr           next character in pattern    ptr           next character in pattern
3038    options       options bits    options       options bits
3039    cd            contains pointers to tables etc.    cd            contains pointers to tables etc.
# Line 2998  static BOOL Line 3045  static BOOL
3045  check_auto_possessive(const pcre_uchar *previous, BOOL utf,  check_auto_possessive(const pcre_uchar *previous, BOOL utf,
3046    const pcre_uchar *ptr, int options, compile_data *cd)    const pcre_uchar *ptr, int options, compile_data *cd)
3047  {  {
3048  pcre_int32 c, next;  pcre_int32 c = NOTACHAR; // FIXMEchpe pcre_uint32
3049    pcre_int32 next;
3050    int escape;
3051  int op_code = *previous++;  int op_code = *previous++;
3052    
3053  /* Skip whitespace and comments in extended mode */  /* Skip whitespace and comments in extended mode */
# Line 3030  value is a character, a negative value i Line 3079  value is a character, a negative value i
3079  if (*ptr == CHAR_BACKSLASH)  if (*ptr == CHAR_BACKSLASH)
3080    {    {
3081    int temperrorcode = 0;    int temperrorcode = 0;
3082    next = check_escape(&ptr, &temperrorcode, cd->bracount, options, FALSE);    escape = check_escape(&ptr, &next, &temperrorcode, cd->bracount, options, FALSE);
3083    if (temperrorcode != 0) return FALSE;    if (temperrorcode != 0) return FALSE;
3084    ptr++;    /* Point after the escape sequence */    ptr++;    /* Point after the escape sequence */
3085    }    }
3086  else if (!MAX_255(*ptr) || (cd->ctypes[*ptr] & ctype_meta) == 0)  else if (!MAX_255(*ptr) || (cd->ctypes[*ptr] & ctype_meta) == 0)
3087    {    {
3088      escape = 0;
3089  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
3090    if (utf) { GETCHARINC(next, ptr); } else    if (utf) { GETCHARINC(next, ptr); } else
3091  #endif  #endif
# Line 3072  if (*ptr == CHAR_ASTERISK || *ptr == CHA Line 3122  if (*ptr == CHAR_ASTERISK || *ptr == CHA
3122    STRNCMP_UC_C8(ptr, STR_LEFT_CURLY_BRACKET STR_0 STR_COMMA, 3) == 0)    STRNCMP_UC_C8(ptr, STR_LEFT_CURLY_BRACKET STR_0 STR_COMMA, 3) == 0)
3123      return FALSE;      return FALSE;
3124    
3125  /* Now compare the next item with the previous opcode. First, handle cases when  /* If the previous item is a character, get its value. */
 the next item is a character. */  
3126    
3127  if (next >= 0) switch(op_code)  if (op_code == OP_CHAR || op_code == OP_CHARI ||
3128        op_code == OP_NOT || op_code == OP_NOTI)
3129      //if (escape == 0) switch(op_code)
3130    {    {
   case OP_CHAR:  
3131  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
3132    GETCHARTEST(c, previous);    GETCHARTEST(c, previous);
3133  #else  #else
3134    c = *previous;    c = *previous;
3135  #endif  #endif
3136    return c != next;    }
3137    
3138    /* For CHARI (caseless character) we must check the other case. If we have  /* Now compare the next item with the previous opcode. First, handle cases when
3139    Unicode property support, we can use it to test the other case of  the next item is a character. */
   high-valued characters. */  
3140    
3141    case OP_CHARI:  if (escape == 0)
3142  #ifdef SUPPORT_UTF    {
3143    GETCHARTEST(c, previous);    /* For a caseless UTF match, the next character may have more than one other
3144  #else    case, which maps to the special PT_CLIST property. Check this first. */
3145    c = *previous;  
 #endif  
   if (c == next) return FALSE;  
 #ifdef SUPPORT_UTF  
   if (utf)  
     {  
     unsigned int othercase;  
     if (next < 128) othercase = cd->fcc[next]; else  
3146  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3147      othercase = UCD_OTHERCASE((unsigned int)next);    if (utf && (unsigned int)c != NOTACHAR && (options & PCRE_CASELESS) != 0)
3148  #else      {
3149      othercase = NOTACHAR;      int ocs = UCD_CASESET(next);
3150  #endif      if (ocs > 0) return check_char_prop(c, PT_CLIST, ocs, op_code >= OP_NOT);
     return (unsigned int)c != othercase;  
3151      }      }
   else  
 #endif  /* SUPPORT_UTF */  
   return (c != TABLE_GET((unsigned int)next, cd->fcc, next));  /* Non-UTF-8 mode */  
   
   case OP_NOT:  
 #ifdef SUPPORT_UTF  
   GETCHARTEST(c, previous);  
 #else  
   c = *previous;  
3152  #endif  #endif
   return c == next;  
3153    
3154    case OP_NOTI:    switch(op_code)
3155        {
3156        case OP_CHAR:
3157        return c != next;
3158    
3159        /* For CHARI (caseless character) we must check the other case. If we have
3160        Unicode property support, we can use it to test the other case of
3161        high-valued characters. We know that next can have only one other case,
3162        because multi-other-case characters are dealt with above. */
3163    
3164        case OP_CHARI:
3165        if (c == next) return FALSE;
3166  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
3167    GETCHARTEST(c, previous);      if (utf)
3168          {
3169          unsigned int othercase;
3170          if (next < 128) othercase = cd->fcc[next]; else
3171    #ifdef SUPPORT_UCP
3172          othercase = UCD_OTHERCASE((unsigned int)next);
3173  #else  #else
3174    c = *previous;        othercase = NOTACHAR;
3175  #endif  #endif
3176    if (c == next) return TRUE;        return (unsigned int)c != othercase;
3177          }
3178        else
3179    #endif  /* SUPPORT_UTF */
3180        return (c != TABLE_GET((unsigned int)next, cd->fcc, next));  /* Not UTF */
3181    
3182        case OP_NOT:
3183        return c == next;
3184    
3185        case OP_NOTI:
3186        if (c == next) return TRUE;
3187  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
3188    if (utf)      if (utf)
3189      {        {
3190      unsigned int othercase;        unsigned int othercase;
3191      if (next < 128) othercase = cd->fcc[next]; else        if (next < 128) othercase = cd->fcc[next]; else
3192  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3193      othercase = UCD_OTHERCASE((unsigned int)next);        othercase = UCD_OTHERCASE((unsigned int)next);
3194  #else  #else
3195      othercase = NOTACHAR;        othercase = NOTACHAR;
3196  #endif  #endif
3197      return (unsigned int)c == othercase;        return (unsigned int)c == othercase;
3198      }        }
3199    else      else
3200  #endif  /* SUPPORT_UTF */  #endif  /* SUPPORT_UTF */
3201    return (c == TABLE_GET((unsigned int)next, cd->fcc, next));  /* Non-UTF-8 mode */      return (c == TABLE_GET((unsigned int)next, cd->fcc, next));  /* Not UTF */
3202    
3203    /* Note that OP_DIGIT etc. are generated only when PCRE_UCP is *not* set.      /* Note that OP_DIGIT etc. are generated only when PCRE_UCP is *not* set.
3204    When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */      When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */
3205    
3206    case OP_DIGIT:      case OP_DIGIT:
3207    return next > 255 || (cd->ctypes[next] & ctype_digit) == 0;      return next > 255 || (cd->ctypes[next] & ctype_digit) == 0;
3208    
3209    case OP_NOT_DIGIT:      case OP_NOT_DIGIT:
3210    return next <= 255 && (cd->ctypes[next] & ctype_digit) != 0;      return next <= 255 && (cd->ctypes[next] & ctype_digit) != 0;
3211    
3212    case OP_WHITESPACE:      case OP_WHITESPACE:
3213    return next > 255 || (cd->ctypes[next] & ctype_space) == 0;      return next > 255 || (cd->ctypes[next] & ctype_space) == 0;
3214    
3215    case OP_NOT_WHITESPACE:      case OP_NOT_WHITESPACE:
3216    return next <= 255 && (cd->ctypes[next] & ctype_space) != 0;      return next <= 255 && (cd->ctypes[next] & ctype_space) != 0;
3217    
3218    case OP_WORDCHAR:      case OP_WORDCHAR:
3219    return next > 255 || (cd->ctypes[next] & ctype_word) == 0;      return next > 255 || (cd->ctypes[next] & ctype_word) == 0;
3220    
3221    case OP_NOT_WORDCHAR:      case OP_NOT_WORDCHAR:
3222    return next <= 255 && (cd->ctypes[next] & ctype_word) != 0;      return next <= 255 && (cd->ctypes[next] & ctype_word) != 0;
3223    
3224    case OP_HSPACE:      case OP_HSPACE:
3225    case OP_NOT_HSPACE:      case OP_NOT_HSPACE:
3226    switch(next)      switch(next)
3227      {        {
3228      case 0x09:        HSPACE_CASES:
3229      case 0x20:        return op_code == OP_NOT_HSPACE;
     case 0xa0:  
     case 0x1680:  
     case 0x180e:  
     case 0x2000:  
     case 0x2001:  
     case 0x2002:  
     case 0x2003:  
     case 0x2004:  
     case 0x2005:  
     case 0x2006:  
     case 0x2007:  
     case 0x2008:  
     case 0x2009:  
     case 0x200A:  
     case 0x202f:  
     case 0x205f:  
     case 0x3000:  
     return op_code == OP_NOT_HSPACE;  
     default:  
     return op_code != OP_NOT_HSPACE;  
     }  
3230    
3231    case OP_ANYNL:        default:
3232    case OP_VSPACE:        return op_code != OP_NOT_HSPACE;
3233    case OP_NOT_VSPACE:        }
3234    switch(next)  
3235      {      case OP_ANYNL:
3236      case 0x0a:      case OP_VSPACE:
3237      case 0x0b:      case OP_NOT_VSPACE:
3238      case 0x0c:      switch(next)
3239      case 0x0d:        {
3240      case 0x85:        VSPACE_CASES:
3241      case 0x2028:        return op_code == OP_NOT_VSPACE;
3242      case 0x2029:  
3243      return op_code == OP_NOT_VSPACE;        default:
3244      default:        return op_code != OP_NOT_VSPACE;
3245      return op_code != OP_NOT_VSPACE;        }
     }  
3246    
3247  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3248    case OP_PROP:      case OP_PROP:
3249    return check_char_prop(next, previous[0], previous[1], FALSE);      return check_char_prop(next, previous[0], previous[1], FALSE);
3250    
3251    case OP_NOTPROP:      case OP_NOTPROP:
3252    return check_char_prop(next, previous[0], previous[1], TRUE);      return check_char_prop(next, previous[0], previous[1], TRUE);
3253  #endif  #endif
3254    
3255    default:      default:
3256    return FALSE;      return FALSE;
3257        }
3258    }    }
3259    
   
3260  /* Handle the case when the next item is \d, \s, etc. Note that when PCRE_UCP  /* Handle the case when the next item is \d, \s, etc. Note that when PCRE_UCP
3261  is set, \d turns into ESC_du rather than ESC_d, etc., so ESC_d etc. are  is set, \d turns into ESC_du rather than ESC_d, etc., so ESC_d etc. are
3262  generated only when PCRE_UCP is *not* set, that is, when only ASCII  generated only when PCRE_UCP is *not* set, that is, when only ASCII
# Line 3232  switch(op_code) Line 3267  switch(op_code)
3267    {    {
3268    case OP_CHAR:    case OP_CHAR:
3269    case OP_CHARI:    case OP_CHARI:
3270  #ifdef SUPPORT_UTF    switch(escape)
   GETCHARTEST(c, previous);  
 #else  
   c = *previous;  
 #endif  
   switch(-next)  
3271      {      {
3272      case ESC_d:      case ESC_d:
3273      return c > 255 || (cd->ctypes[c] & ctype_digit) == 0;      return c > 255 || (cd->ctypes[c] & ctype_digit) == 0;
# Line 3261  switch(op_code) Line 3291  switch(op_code)
3291      case ESC_H:      case ESC_H:
3292      switch(c)      switch(c)
3293        {        {
3294        case 0x09:        HSPACE_CASES:
3295        case 0x20:        return escape != ESC_h;
3296        case 0xa0:  
       case 0x1680:  
       case 0x180e:  
       case 0x2000:  
       case 0x2001:  
       case 0x2002:  
       case 0x2003:  
       case 0x2004:  
       case 0x2005:  
       case 0x2006:  
       case 0x2007:  
       case 0x2008:  
       case 0x2009:  
       case 0x200A:  
       case 0x202f:  
       case 0x205f:  
       case 0x3000:  
       return -next != ESC_h;  
3297        default:        default:
3298        return -next == ESC_h;        return escape == ESC_h;
3299        }        }
3300    
3301      case ESC_v:      case ESC_v:
3302      case ESC_V:      case ESC_V:
3303      switch(c)      switch(c)
3304        {        {
3305        case 0x0a:        VSPACE_CASES:
3306        case 0x0b:        return escape != ESC_v;
3307        case 0x0c:  
       case 0x0d:  
       case 0x85:  
       case 0x2028:  
       case 0x2029:  
       return -next != ESC_v;  
3308        default:        default:
3309        return -next == ESC_v;        return escape == ESC_v;
3310        }        }
3311    
3312      /* When PCRE_UCP is set, these values get generated for \d etc. Find      /* When PCRE_UCP is set, these values get generated for \d etc. Find
3313      their substitutions and process them. The result will always be either      their substitutions and process them. The result will always be either
3314      -ESC_p or -ESC_P. Then fall through to process those values. */      ESC_p or ESC_P. Then fall through to process those values. */
3315    
3316  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3317      case ESC_du:      case ESC_du:
# Line 3314  switch(op_code) Line 3322  switch(op_code)
3322      case ESC_SU:      case ESC_SU:
3323        {        {
3324        int temperrorcode = 0;        int temperrorcode = 0;
3325        ptr = substitutes[-next - ESC_DU];        ptr = substitutes[escape - ESC_DU];
3326        next = check_escape(&ptr, &temperrorcode, 0, options, FALSE);        escape = check_escape(&ptr, &next, &temperrorcode, 0, options, FALSE);
3327        if (temperrorcode != 0) return FALSE;        if (temperrorcode != 0) return FALSE;
3328        ptr++;    /* For compatibility */        ptr++;    /* For compatibility */
3329        }        }
# Line 3342  switch(op_code) Line 3350  switch(op_code)
3350    
3351        /* Do the property check. */        /* Do the property check. */
3352    
3353        return check_char_prop(c, ptype, pdata, (next == -ESC_P) != negated);        return check_char_prop(c, ptype, pdata, (escape == ESC_P) != negated);
3354        }        }
3355  #endif  #endif
3356    
# Line 3357  switch(op_code) Line 3365  switch(op_code)
3365    these op-codes are never generated.) */    these op-codes are never generated.) */
3366    
3367    case OP_DIGIT:    case OP_DIGIT:
3368    return next == -ESC_D || next == -ESC_s || next == -ESC_W ||    return escape == ESC_D || escape == ESC_s || escape == ESC_W ||
3369           next == -ESC_h || next == -ESC_v || next == -ESC_R;           escape == ESC_h || escape == ESC_v || escape == ESC_R;
3370    
3371    case OP_NOT_DIGIT:    case OP_NOT_DIGIT:
3372    return next == -ESC_d;    return escape == ESC_d;
3373    
3374    case OP_WHITESPACE:    case OP_WHITESPACE:
3375    return next == -ESC_S || next == -ESC_d || next == -ESC_w;    return escape == ESC_S || escape == ESC_d || escape == ESC_w;
3376    
3377    case OP_NOT_WHITESPACE:    case OP_NOT_WHITESPACE:
3378    return next == -ESC_s || next == -ESC_h || next == -ESC_v || next == -ESC_R;    return escape == ESC_s || escape == ESC_h || escape == ESC_v || escape == ESC_R;
3379    
3380    case OP_HSPACE:    case OP_HSPACE:
3381    return next == -ESC_S || next == -ESC_H || next == -ESC_d ||    return escape == ESC_S || escape == ESC_H || escape == ESC_d ||
3382           next == -ESC_w || next == -ESC_v || next == -ESC_R;           escape == ESC_w || escape == ESC_v || escape == ESC_R;
3383    
3384    case OP_NOT_HSPACE:    case OP_NOT_HSPACE:
3385    return next == -ESC_h;    return escape == ESC_h;
3386    
3387    /* Can't have \S in here because VT matches \S (Perl anomaly) */    /* Can't have \S in here because VT matches \S (Perl anomaly) */
3388    case OP_ANYNL:    case OP_ANYNL:
3389    case OP_VSPACE:    case OP_VSPACE:
3390    return next == -ESC_V || next == -ESC_d || next == -ESC_w;    return escape == ESC_V || escape == ESC_d || escape == ESC_w;
3391    
3392    case OP_NOT_VSPACE:    case OP_NOT_VSPACE:
3393    return next == -ESC_v || next == -ESC_R;    return escape == ESC_v || escape == ESC_R;
3394    
3395    case OP_WORDCHAR:    case OP_WORDCHAR:
3396    return next == -ESC_W || next == -ESC_s || next == -ESC_h ||    return escape == ESC_W || escape == ESC_s || escape == ESC_h ||
3397           next == -ESC_v || next == -ESC_R;           escape == ESC_v || escape == ESC_R;
3398    
3399    case OP_NOT_WORDCHAR:    case OP_NOT_WORDCHAR:
3400    return next == -ESC_w || next == -ESC_d;    return escape == ESC_w || escape == ESC_d;
3401    
3402    default:    default:
3403    return FALSE;    return FALSE;
# Line 3401  switch(op_code) Line 3409  switch(op_code)
3409    
3410    
3411  /*************************************************  /*************************************************
3412    *        Add a character or range to a class     *
3413    *************************************************/
3414    
3415    /* This function packages up the logic of adding a character or range of
3416    characters to a class. The character values in the arguments will be within the
3417    valid values for the current mode (8-bit, 16-bit, UTF, etc). This function is
3418    mutually recursive with the function immediately below.
3419    
3420    Arguments:
3421      classbits     the bit map for characters < 256
3422      uchardptr     points to the pointer for extra data
3423      options       the options word
3424      cd            contains pointers to tables etc.
3425      start         start of range character
3426      end           end of range character
3427    
3428    Returns:        the number of < 256 characters added
3429                    the pointer to extra data is updated
3430    */
3431    
3432    static int
3433    add_to_class(pcre_uint8 *classbits, pcre_uchar **uchardptr, int options,
3434      compile_data *cd, pcre_uint32 start, pcre_uint32 end)
3435    {
3436    pcre_uint32 c;
3437    int n8 = 0;
3438    
3439    /* If caseless matching is required, scan the range and process alternate
3440    cases. In Unicode, there are 8-bit characters that have alternate cases that
3441    are greater than 255 and vice-versa. Sometimes we can just extend the original
3442    range. */
3443    
3444    if ((options & PCRE_CASELESS) != 0)
3445      {
3446    #ifdef SUPPORT_UCP
3447      if ((options & PCRE_UTF8) != 0)
3448        {
3449        int rc;
3450        pcre_uint32 oc, od;
3451    
3452        options &= ~PCRE_CASELESS;   /* Remove for recursive calls */
3453        c = start;
3454    
3455        while ((rc = get_othercase_range(&c, end, &oc, &od)) >= 0)
3456          {
3457          /* Handle a single character that has more than one other case. */
3458    
3459          if (rc > 0) n8 += add_list_to_class(classbits, uchardptr, options, cd,
3460            PRIV(ucd_caseless_sets) + rc, oc);
3461    
3462          /* Do nothing if the other case range is within the original range. */
3463    
3464          else if (oc >= start && od <= end) continue;
3465    
3466          /* Extend the original range if there is overlap, noting that if oc < c, we
3467          can't have od > end because a subrange is always shorter than the basic
3468          range. Otherwise, use a recursive call to add the additional range. */
3469    
3470          else if (oc < start && od >= start - 1) start = oc; /* Extend downwards */
3471          else if (od > end && oc <= end + 1) end = od;       /* Extend upwards */
3472          else n8 += add_to_class(classbits, uchardptr, options, cd, oc, od);
3473          }
3474        }
3475      else
3476    #endif  /* SUPPORT_UCP */
3477    
3478      /* Not UTF-mode, or no UCP */
3479    
3480      for (c = start; c <= end && c < 256; c++)
3481        {
3482        SETBIT(classbits, cd->fcc[c]);
3483        n8++;
3484        }
3485      }
3486    
3487    /* Now handle the original range. Adjust the final value according to the bit
3488    length - this means that the same lists of (e.g.) horizontal spaces can be used
3489    in all cases. */
3490    
3491    #if defined COMPILE_PCRE8
3492    #ifdef SUPPORT_UTF
3493      if ((options & PCRE_UTF8) == 0)
3494    #endif
3495      if (end > 0xff) end = 0xff;
3496    
3497    #elif defined COMPILE_PCRE16
3498    #ifdef SUPPORT_UTF
3499      if ((options & PCRE_UTF16) == 0)
3500    #endif
3501      if (end > 0xffff) end = 0xffff;
3502    
3503    #endif /* COMPILE_PCRE[8|16] */
3504    
3505    /* If all characters are less than 256, use the bit map. Otherwise use extra
3506    data. */
3507    
3508    if (end < 0x100)
3509      {
3510      for (c = start; c <= end; c++)
3511        {
3512        n8++;
3513        SETBIT(classbits, c);
3514        }
3515      }
3516    
3517    else
3518      {
3519      pcre_uchar *uchardata = *uchardptr;
3520    
3521    #ifdef SUPPORT_UTF
3522      if ((options & PCRE_UTF8) != 0)  /* All UTFs use the same flag bit */
3523        {
3524        if (start < end)
3525          {
3526          *uchardata++ = XCL_RANGE;
3527          uchardata += PRIV(ord2utf)(start, uchardata);
3528          uchardata += PRIV(ord2utf)(end, uchardata);
3529          }
3530        else if (start == end)
3531          {
3532          *uchardata++ = XCL_SINGLE;
3533          uchardata += PRIV(ord2utf)(start, uchardata);
3534          }
3535        }
3536      else
3537    #endif  /* SUPPORT_UTF */
3538    
3539      /* Without UTF support, character values are constrained by the bit length,
3540      and can only be > 256 for 16-bit and 32-bit libraries. */
3541    
3542    #ifdef COMPILE_PCRE8
3543        {}
3544    #else
3545      if (start < end)
3546        {
3547        *uchardata++ = XCL_RANGE;
3548        *uchardata++ = start;
3549        *uchardata++ = end;
3550        }
3551      else if (start == end)
3552        {
3553        *uchardata++ = XCL_SINGLE;
3554        *uchardata++ = start;
3555        }
3556    #endif
3557    
3558      *uchardptr = uchardata;   /* Updata extra data pointer */
3559      }
3560    
3561    return n8;    /* Number of 8-bit characters */
3562    }
3563    
3564    
3565    
3566    
3567    /*************************************************
3568    *        Add a list of characters to a class     *
3569    *************************************************/
3570    
3571    /* This function is used for adding a list of case-equivalent characters to a
3572    class, and also for adding a list of horizontal or vertical whitespace. If the
3573    list is in order (which it should be), ranges of characters are detected and
3574    handled appropriately. This function is mutually recursive with the function
3575    above.
3576    
3577    Arguments:
3578      classbits     the bit map for characters < 256
3579      uchardptr     points to the pointer for extra data
3580      options       the options word
3581      cd            contains pointers to tables etc.
3582      p             points to row of 32-bit values, terminated by NOTACHAR
3583      except        character to omit; this is used when adding lists of
3584                      case-equivalent characters to avoid including the one we
3585                      already know about
3586    
3587    Returns:        the number of < 256 characters added
3588                    the pointer to extra data is updated
3589    */
3590    
3591    static int
3592    add_list_to_class(pcre_uint8 *classbits, pcre_uchar **uchardptr, int options,
3593      compile_data *cd, const pcre_uint32 *p, unsigned int except)
3594    {
3595    int n8 = 0;
3596    while (p[0] < NOTACHAR)
3597      {
3598      int n = 0;
3599      if (p[0] != except)
3600        {
3601        while(p[n+1] == p[0] + n + 1) n++;
3602        n8 += add_to_class(classbits, uchardptr, options, cd, p[0], p[n]);
3603        }
3604      p += n + 1;
3605      }
3606    return n8;
3607    }
3608    
3609    
3610    
3611    /*************************************************
3612    *    Add characters not in a list to a class     *
3613    *************************************************/
3614    
3615    /* This function is used for adding the complement of a list of horizontal or
3616    vertical whitespace to a class. The list must be in order.
3617    
3618    Arguments:
3619      classbits     the bit map for characters < 256
3620      uchardptr     points to the pointer for extra data
3621      options       the options word
3622      cd            contains pointers to tables etc.
3623      p             points to row of 32-bit values, terminated by NOTACHAR
3624    
3625    Returns:        the number of < 256 characters added
3626                    the pointer to extra data is updated
3627    */
3628    
3629    static int
3630    add_not_list_to_class(pcre_uint8 *classbits, pcre_uchar **uchardptr,
3631      int options, compile_data *cd, const pcre_uint32 *p)
3632    {
3633    BOOL utf = (options & PCRE_UTF8) != 0;
3634    int n8 = 0;
3635    if (p[0] > 0)
3636      n8 += add_to_class(classbits, uchardptr, options, cd, 0, p[0] - 1);
3637    while (p[0] < NOTACHAR)
3638      {
3639      while (p[1] == p[0] + 1) p++;
3640      n8 += add_to_class(classbits, uchardptr, options, cd, p[0] + 1,
3641        (p[1] == NOTACHAR) ? (utf ? 0x10ffffu : 0xffffffffu) : p[1] - 1);
3642      p++;
3643      }
3644    return n8;
3645    }
3646    
3647    
3648    
3649    /*************************************************
3650  *           Compile one branch                   *  *           Compile one branch                   *
3651  *************************************************/  *************************************************/
3652    
# Line 3444  int options = *optionsptr; Line 3690  int options = *optionsptr;
3690  int after_manual_callout = 0;  int after_manual_callout = 0;
3691  int length_prevgroup = 0;  int length_prevgroup = 0;
3692  register int c;  register int c;
3693    int escape;
3694  register pcre_uchar *code = *codeptr;  register pcre_uchar *code = *codeptr;
3695  pcre_uchar *last_code = code;  pcre_uchar *last_code = code;
3696  pcre_uchar *orig_code = code;  pcre_uchar *orig_code = code;
# Line 3463  must not do this for other options (e.g. Line 3710  must not do this for other options (e.g.
3710  dynamically as we process the pattern. */  dynamically as we process the pattern. */
3711    
3712  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
3713  /* PCRE_UTF16 has the same value as PCRE_UTF8. */  /* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */
3714  BOOL utf = (options & PCRE_UTF8) != 0;  BOOL utf = (options & PCRE_UTF8) != 0;
3715  pcre_uchar utf_chars[6];  pcre_uchar utf_chars[6];
3716  #else  #else
3717  BOOL utf = FALSE;  BOOL utf = FALSE;
3718  #endif  #endif
3719    
3720  /* Helper variables for OP_XCLASS opcode (for characters > 255). */  /* Helper variables for OP_XCLASS opcode (for characters > 255). We define
3721    class_uchardata always so that it can be passed to add_to_class() always,
3722    though it will not be used in non-UTF 8-bit cases. This avoids having to supply
3723    alternative calls for the different cases. */
3724    
3725    pcre_uchar *class_uchardata;
3726  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3727  BOOL xclass;  BOOL xclass;
 pcre_uchar *class_uchardata;  
3728  pcre_uchar *class_uchardata_base;  pcre_uchar *class_uchardata_base;
3729  #endif  #endif
3730    
# Line 3518  for (;; ptr++) Line 3768  for (;; ptr++)
3768    BOOL is_recurse;    BOOL is_recurse;
3769    BOOL reset_bracount;    BOOL reset_bracount;
3770    int class_has_8bitchar;    int class_has_8bitchar;
3771    int class_single_char;    int class_one_char;
3772    int newoptions;    int newoptions;
3773    int recno;    int recno;
3774    int refsign;    int refsign;
# Line 3528  for (;; ptr++) Line 3778  for (;; ptr++)
3778    int terminator;    int terminator;
3779    int mclength;    int mclength;
3780    int tempbracount;    int tempbracount;
3781      int ec; // FIXMEchpe pcre_uint32
3782    pcre_uchar mcbuffer[8];    pcre_uchar mcbuffer[8];
3783    
3784    /* Get next character in the pattern */    /* Get next character in the pattern */
# Line 3816  for (;; ptr++) Line 4067  for (;; ptr++)
4067    
4068      should_flip_negation = FALSE;      should_flip_negation = FALSE;
4069    
4070      /* For optimization purposes, we track some properties of the class.      /* For optimization purposes, we track some properties of the class:
4071      class_has_8bitchar will be non-zero, if the class contains at least one      class_has_8bitchar will be non-zero if the class contains at least one <
4072      < 256 character. class_single_char will be 1 if the class contains only      256 character; class_one_char will be 1 if the class contains just one
4073      a single character. */      character. */
4074    
4075      class_has_8bitchar = 0;      class_has_8bitchar = 0;
4076      class_single_char = 0;      class_one_char = 0;
4077    
4078      /* Initialize the 32-char bit map to all zeros. We build the map in a      /* Initialize the 32-char bit map to all zeros. We build the map in a
4079      temporary bit of memory, in case the class contains only 1 character (less      temporary bit of memory, in case the class contains fewer than two
4080      than 256), because in that case the compiled code doesn't use the bit map.      8-bit characters because in that case the compiled code doesn't use the bit
4081      */      map. */
4082    
4083      memset(classbits, 0, 32 * sizeof(pcre_uint8));      memset(classbits, 0, 32 * sizeof(pcre_uint8));
4084    
4085  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4086      xclass = FALSE;                           /* No chars >= 256 */      xclass = FALSE;
4087      class_uchardata = code + LINK_SIZE + 2;   /* For UTF-8 items */      class_uchardata = code + LINK_SIZE + 2;   /* For XCLASS items */
4088      class_uchardata_base = class_uchardata;   /* For resetting in pass 1 */      class_uchardata_base = class_uchardata;   /* Save the start */
4089  #endif  #endif
4090    
4091      /* Process characters until ] is reached. By writing this as a "do" it      /* Process characters until ] is reached. By writing this as a "do" it
# Line 3845  for (;; ptr++) Line 4096  for (;; ptr++)
4096        {        {
4097        const pcre_uchar *oldptr;        const pcre_uchar *oldptr;
4098    
4099  #ifdef SUPPORT_UTF  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4100        if (utf && HAS_EXTRALEN(c))        if (utf && HAS_EXTRALEN(c))
4101          {                           /* Braces are required because the */          {                           /* Braces are required because the */
4102          GETCHARLEN(c, ptr, ptr);    /* macro generates multiple statements */          GETCHARLEN(c, ptr, ptr);    /* macro generates multiple statements */
# Line 3856  for (;; ptr++) Line 4107  for (;; ptr++)
4107        /* In the pre-compile phase, accumulate the length of any extra        /* In the pre-compile phase, accumulate the length of any extra
4108        data and reset the pointer. This is so that very large classes that        data and reset the pointer. This is so that very large classes that
4109        contain a zillion > 255 characters no longer overwrite the work space        contain a zillion > 255 characters no longer overwrite the work space
4110        (which is on the stack). */        (which is on the stack). We have to remember that there was XCLASS data,
4111          however. */
4112    
4113        if (lengthptr != NULL)        if (lengthptr != NULL && class_uchardata > class_uchardata_base)
4114          {          {
4115            xclass = TRUE;
4116          *lengthptr += class_uchardata - class_uchardata_base;          *lengthptr += class_uchardata - class_uchardata_base;
4117          class_uchardata = class_uchardata_base;          class_uchardata = class_uchardata_base;
4118          }          }
# Line 3918  for (;; ptr++) Line 4171  for (;; ptr++)
4171          alpha. This relies on the fact that the class table starts with          alpha. This relies on the fact that the class table starts with
4172          alpha, lower, upper as the first 3 entries. */          alpha, lower, upper as the first 3 entries. */
4173    
4174          if ((options & PCRE_CASELESS) != 0 && posix_class <= 2)          if ((options & PCRE_CASELESS) != 0 && posix_class <= 2)
4175            posix_class = 0;            posix_class = 0;
4176    
4177          /* When PCRE_UCP is set, some of the POSIX classes are converted to          /* When PCRE_UCP is set, some of the POSIX classes are converted to
# Line 3961  for (;; ptr++) Line 4214  for (;; ptr++)
4214              for (c = 0; c < 32; c++) pbits[c] &= ~cbits[c + taboffset];              for (c = 0; c < 32; c++) pbits[c] &= ~cbits[c + taboffset];
4215            }            }
4216    
4217          /* Not see if we need to remove any special characters. An option          /* Now see if we need to remove any special characters. An option
4218          value of 1 removes vertical space and 2 removes underscore. */          value of 1 removes vertical space and 2 removes underscore. */
4219    
4220          if (tabopt < 0) tabopt = -tabopt;          if (tabopt < 0) tabopt = -tabopt;
# Line 3977  for (;; ptr++) Line 4230  for (;; ptr++)
4230            for (c = 0; c < 32; c++) classbits[c] |= pbits[c];            for (c = 0; c < 32; c++) classbits[c] |= pbits[c];
4231    
4232          ptr = tempptr + 1;          ptr = tempptr + 1;
4233          /* Every class contains at least one < 256 characters. */          /* Every class contains at least one < 256 character. */
4234          class_has_8bitchar = 1;          class_has_8bitchar = 1;
4235          /* Every class contains at least two characters. */          /* Every class contains at least two characters. */
4236          class_single_char = 2;          class_one_char = 2;
4237          continue;    /* End of POSIX syntax handling */          continue;    /* End of POSIX syntax handling */
4238          }          }
4239    
# Line 3988  for (;; ptr++) Line 4241  for (;; ptr++)
4241        of the specials, which just set a flag. The sequence \b is a special        of the specials, which just set a flag. The sequence \b is a special
4242        case. Inside a class (and only there) it is treated as backspace. We        case. Inside a class (and only there) it is treated as backspace. We
4243        assume that other escapes have more than one character in them, so        assume that other escapes have more than one character in them, so
4244        speculatively set both class_has_8bitchar and class_single_char bigger        speculatively set both class_has_8bitchar and class_one_char bigger
4245        than one. Unrecognized escapes fall through and are either treated        than one. Unrecognized escapes fall through and are either treated
4246        as literal characters (by default), or are faulted if        as literal characters (by default), or are faulted if
4247        PCRE_EXTRA is set. */        PCRE_EXTRA is set. */
4248    
4249        if (c == CHAR_BACKSLASH)        if (c == CHAR_BACKSLASH)
4250          {          {
4251          c = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);          escape = check_escape(&ptr, &ec, errorcodeptr, cd->bracount, options, TRUE);
4252    
4253          if (*errorcodeptr != 0) goto FAILED;          if (*errorcodeptr != 0) goto FAILED;
4254    
4255          if (-c == ESC_b) c = CHAR_BS;    /* \b is backspace in a class */          if (escape == 0)
4256          else if (-c == ESC_N)            /* \N is not supported in a class */            c = ec;
4257            else if (escape == ESC_b) c = CHAR_BS; /* \b is backspace in a class */
4258            else if (escape == ESC_N)            /* \N is not supported in a class */
4259            {            {
4260            *errorcodeptr = ERR71;            *errorcodeptr = ERR71;
4261            goto FAILED;            goto FAILED;
4262            }            }
4263          else if (-c == ESC_Q)            /* Handle start of quoted string */          else if (escape == ESC_Q)            /* Handle start of quoted string */
4264            {            {
4265            if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)            if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)
4266              {              {
# Line 4013  for (;; ptr++) Line 4269  for (;; ptr++)
4269            else inescq = TRUE;            else inescq = TRUE;
4270            continue;            continue;
4271            }            }
4272          else if (-c == ESC_E) continue;  /* Ignore orphan \E */          else if (escape == ESC_E) continue;  /* Ignore orphan \E */
4273    
4274          if (c < 0)          else
4275            {            {
4276            register const pcre_uint8 *cbits = cd->cbits;            register const pcre_uint8 *cbits = cd->cbits;
4277            /* Every class contains at least two < 256 characters. */            /* Every class contains at least two < 256 characters. */
4278            class_has_8bitchar++;            class_has_8bitchar++;
4279            /* Every class contains at least two characters. */            /* Every class contains at least two characters. */
4280            class_single_char += 2;            class_one_char += 2;
4281    
4282            switch (-c)            switch (escape)
4283              {              {
4284  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
4285              case ESC_du:     /* These are the values given for \d etc */              case ESC_du:     /* These are the values given for \d etc */
# Line 4033  for (;; ptr++) Line 4289  for (;; ptr++)
4289              case ESC_su:     /* of the default ASCII testing. */              case ESC_su:     /* of the default ASCII testing. */
4290              case ESC_SU:              case ESC_SU:
4291              nestptr = ptr;              nestptr = ptr;
4292              ptr = substitutes[-c - ESC_DU] - 1;  /* Just before substitute */              ptr = substitutes[escape - ESC_DU] - 1;  /* Just before substitute */
4293              class_has_8bitchar--;                /* Undo! */              class_has_8bitchar--;                /* Undo! */
4294              continue;              continue;
4295  #endif  #endif
# Line 4057  for (;; ptr++) Line 4313  for (;; ptr++)
4313    
4314              /* Perl 5.004 onwards omits VT from \s, but we must preserve it              /* Perl 5.004 onwards omits VT from \s, but we must preserve it
4315              if it was previously set by something earlier in the character              if it was previously set by something earlier in the character
4316              class. */              class. Luckily, the value of CHAR_VT is 0x0b in both ASCII and
4317                EBCDIC, so we lazily just adjust the appropriate bit. */
4318    
4319              case ESC_s:              case ESC_s:
4320              classbits[0] |= cbits[cbit_space];              classbits[0] |= cbits[cbit_space];
# Line 4070  for (;; ptr++) Line 4327  for (;; ptr++)
4327              for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_space];              for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_space];
4328              classbits[1] |= 0x08;    /* Perl 5.004 onwards omits VT from \s */              classbits[1] |= 0x08;    /* Perl 5.004 onwards omits VT from \s */
4329              continue;              continue;
4330    
4331                /* The rest apply in both UCP and non-UCP cases. */
4332    
4333              case ESC_h:              case ESC_h:
4334              SETBIT(classbits, 0x09); /* VT */              (void)add_list_to_class(classbits, &class_uchardata, options, cd,
4335              SETBIT(classbits, 0x20); /* SPACE */                PRIV(hspace_list), NOTACHAR);
             SETBIT(classbits, 0xa0); /* NSBP */  
 #ifndef COMPILE_PCRE8  
             xclass = TRUE;  
             *class_uchardata++ = XCL_SINGLE;  
             *class_uchardata++ = 0x1680;  
             *class_uchardata++ = XCL_SINGLE;  
             *class_uchardata++ = 0x180e;  
             *class_uchardata++ = XCL_RANGE;  
             *class_uchardata++ = 0x2000;  
             *class_uchardata++ = 0x200a;  
             *class_uchardata++ = XCL_SINGLE;  
             *class_uchardata++ = 0x202f;  
             *class_uchardata++ = XCL_SINGLE;  
             *class_uchardata++ = 0x205f;  
             *class_uchardata++ = XCL_SINGLE;  
             *class_uchardata++ = 0x3000;  
 #elif defined SUPPORT_UTF  
             if (utf)  
               {  
               xclass = TRUE;  
               *class_uchardata++ = XCL_SINGLE;  
               class_uchardata += PRIV(ord2utf)(0x1680, class_uchardata);  
               *class_uchardata++ = XCL_SINGLE;  
               class_uchardata += PRIV(ord2utf)(0x180e, class_uchardata);  
               *class_uchardata++ = XCL_RANGE;  
               class_uchardata += PRIV(ord2utf)(0x2000, class_uchardata);  
               class_uchardata += PRIV(ord2utf)(0x200a, class_uchardata);  
               *class_uchardata++ = XCL_SINGLE;  
               class_uchardata += PRIV(ord2utf)(0x202f, class_uchardata);  
               *class_uchardata++ = XCL_SINGLE;  
               class_uchardata += PRIV(ord2utf)(0x205f, class_uchardata);  
               *class_uchardata++ = XCL_SINGLE;  
               class_uchardata += PRIV(ord2utf)(0x3000, class_uchardata);  
               }  
 #endif  
4336              continue;              continue;
4337    
4338              case ESC_H:              case ESC_H:
4339              for (c = 0; c < 32; c++)              (void)add_not_list_to_class(classbits, &class_uchardata, options,
4340                {                cd, PRIV(hspace_list));
               int x = 0xff;  
               switch (c)  
                 {  
                 case 0x09/8: x ^= 1 << (0x09%8); break;  
                 case 0x20/8: x ^= 1 << (0x20%8); break;  
                 case 0xa0/8: x ^= 1 << (0xa0%8); break;  
                 default: break;  
                 }  
               classbits[c] |= x;  
               }  
 #ifndef COMPILE_PCRE8  
             xclass = TRUE;  
             *class_uchardata++ = XCL_RANGE;  
             *class_uchardata++ = 0x0100;  
             *class_uchardata++ = 0x167f;  
             *class_uchardata++ = XCL_RANGE;  
             *class_uchardata++ = 0x1681;  
             *class_uchardata++ = 0x180d;  
             *class_uchardata++ = XCL_RANGE;  
             *class_uchardata++ = 0x180f;  
             *class_uchardata++ = 0x1fff;  
             *class_uchardata++ = XCL_RANGE;  
             *class_uchardata++ = 0x200b;  
             *class_uchardata++ = 0x202e;  
             *class_uchardata++ = XCL_RANGE;  
             *class_uchardata++ = 0x2030;  
             *class_uchardata++ = 0x205e;  
             *class_uchardata++ = XCL_RANGE;  
             *class_uchardata++ = 0x2060;  
             *class_uchardata++ = 0x2fff;  
             *class_uchardata++ = XCL_RANGE;  
             *class_uchardata++ = 0x3001;  
 #ifdef SUPPORT_UTF  
             if (utf)  
               class_uchardata += PRIV(ord2utf)(0x10ffff, class_uchardata);  
             else  
 #endif  
               *class_uchardata++ = 0xffff;  
 #elif defined SUPPORT_UTF  
             if (utf)  
               {  
               xclass = TRUE;  
               *class_uchardata++ = XCL_RANGE;  
               class_uchardata += PRIV(ord2utf)(0x0100, class_uchardata);  
               class_uchardata += PRIV(ord2utf)(0x167f, class_uchardata);  
               *class_uchardata++ = XCL_RANGE;  
               class_uchardata += PRIV(ord2utf)(0x1681, class_uchardata);  
               class_uchardata += PRIV(ord2utf)(0x180d, class_uchardata);  
               *class_uchardata++ = XCL_RANGE;  
               class_uchardata += PRIV(ord2utf)(0x180f, class_uchardata);  
               class_uchardata += PRIV(ord2utf)(0x1fff, class_uchardata);  
               *class_uchardata++ = XCL_RANGE;  
               class_uchardata += PRIV(ord2utf)(0x200b, class_uchardata);  
               class_uchardata += PRIV(ord2utf)(0x202e, class_uchardata);  
               *class_uchardata++ = XCL_RANGE;  
               class_uchardata += PRIV(ord2utf)(0x2030, class_uchardata);  
               class_uchardata += PRIV(ord2utf)(0x205e, class_uchardata);  
               *class_uchardata++ = XCL_RANGE;  
               class_uchardata += PRIV(ord2utf)(0x2060, class_uchardata);  
               class_uchardata += PRIV(ord2utf)(0x2fff, class_uchardata);  
               *class_uchardata++ = XCL_RANGE;  
               class_uchardata += PRIV(ord2utf)(0x3001, class_uchardata);  
               class_uchardata += PRIV(ord2utf)(0x10ffff, class_uchardata);  
               }  
 #endif  
4341              continue;              continue;
4342    
4343              case ESC_v:              case ESC_v:
4344              SETBIT(classbits, 0x0a); /* LF */              (void)add_list_to_class(classbits, &class_uchardata, options, cd,
4345              SETBIT(classbits, 0x0b); /* VT */                PRIV(vspace_list), NOTACHAR);
             SETBIT(classbits, 0x0c); /* FF */  
             SETBIT(classbits, 0x0d); /* CR */  
             SETBIT(classbits, 0x85); /* NEL */  
 #ifndef COMPILE_PCRE8  
             xclass = TRUE;  
             *class_uchardata++ = XCL_RANGE;  
             *class_uchardata++ = 0x2028;  
             *class_uchardata++ = 0x2029;  
 #elif defined SUPPORT_UTF  
             if (utf)  
               {  
               xclass = TRUE;  
               *class_uchardata++ = XCL_RANGE;  
               class_uchardata += PRIV(ord2utf)(0x2028, class_uchardata);  
               class_uchardata += PRIV(ord2utf)(0x2029, class_uchardata);  
               }  
 #endif  
4346              continue;              continue;
4347    
4348              case ESC_V:              case ESC_V:
4349              for (c = 0; c < 32; c++)              (void)add_not_list_to_class(classbits, &class_uchardata, options,
4350                {                cd, PRIV(vspace_list));
               int x = 0xff;  
               switch (c)  
                 {  
                 case 0x0a/8: x ^= 1 << (0x0a%8);  
                              x ^= 1 << (0x0b%8);  
                              x ^= 1 << (0x0c%8);  
                              x ^= 1 << (0x0d%8);  
                              break;  
                 case 0x85/8: x ^= 1 << (0x85%8); break;  
                 default: break;  
                 }  
               classbits[c] |= x;  
               }  
   
 #ifndef COMPILE_PCRE8  
             xclass = TRUE;  
             *class_uchardata++ = XCL_RANGE;  
             *class_uchardata++ = 0x0100;  
             *class_uchardata++ = 0x2027;  
             *class_uchardata++ = XCL_RANGE;  
             *class_uchardata++ = 0x202a;  
 #ifdef SUPPORT_UTF  
             if (utf)  
               class_uchardata += PRIV(ord2utf)(0x10ffff, class_uchardata);  
             else  
 #endif  
               *class_uchardata++ = 0xffff;  
 #elif defined SUPPORT_UTF  
             if (utf)  
               {  
               xclass = TRUE;  
               *class_uchardata++ = XCL_RANGE;  
               class_uchardata += PRIV(ord2utf)(0x0100, class_uchardata);  
               class_uchardata += PRIV(ord2utf)(0x2027, class_uchardata);  
               *class_uchardata++ = XCL_RANGE;  
               class_uchardata += PRIV(ord2utf)(0x202a, class_uchardata);  
               class_uchardata += PRIV(ord2utf)(0x10ffff, class_uchardata);  
               }  
 #endif  
4351              continue;              continue;
4352    
4353  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 4255  for (;; ptr++) Line 4358  for (;; ptr++)
4358                int pdata;                int pdata;
4359                int ptype = get_ucp(&ptr, &negated, &pdata, errorcodeptr);                int ptype = get_ucp(&ptr, &negated, &pdata, errorcodeptr);
4360                if (ptype < 0) goto FAILED;                if (ptype < 0) goto FAILED;
4361                xclass = TRUE;                *class_uchardata++ = ((escape == ESC_p) != negated)?
               *class_uchardata++ = ((-c == ESC_p) != negated)?  
4362                  XCL_PROP : XCL_NOTPROP;                  XCL_PROP : XCL_NOTPROP;
4363                *class_uchardata++ = ptype;                *class_uchardata++ = ptype;
4364                *class_uchardata++ = pdata;                *class_uchardata++ = pdata;
# Line 4275  for (;; ptr++) Line 4377  for (;; ptr++)
4377                goto FAILED;                goto FAILED;
4378                }                }
4379              class_has_8bitchar--;    /* Undo the speculative increase. */              class_has_8bitchar--;    /* Undo the speculative increase. */
4380              class_single_char -= 2;  /* Undo the speculative increase. */              class_one_char -= 2;     /* Undo the speculative increase. */
4381              c = *ptr;                /* Get the final character and fall through */              c = *ptr;                /* Get the final character and fall through */
4382              break;              break;
4383              }              }
4384            }            }
4385    
4386          /* Fall through if we have a single character (c >= 0). This may be          /* Fall through if the escape just defined a single character (c >= 0).
4387          greater than 256. */          This may be greater than 256. */
4388    
4389            escape = 0;
4390    
4391          }   /* End of backslash handling */          }   /* End of backslash handling */
4392    
4393        /* A single character may be followed by '-' to form a range. However,        /* A character may be followed by '-' to form a range. However, Perl does
4394        Perl does not permit ']' to be the end of the range. A '-' character        not permit ']' to be the end of the range. A '-' character at the end is
4395        at the end is treated as a literal. Perl ignores orphaned \E sequences        treated as a literal. Perl ignores orphaned \E sequences entirely. The
4396        entirely. The code for handling \Q and \E is messy. */        code for handling \Q and \E is messy. */
4397    
4398        CHECK_RANGE:        CHECK_RANGE:
4399        while (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)        while (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)
# Line 4297  for (;; ptr++) Line 4401  for (;; ptr++)
4401          inescq = FALSE;          inescq = FALSE;
4402          ptr += 2;          ptr += 2;
4403          }          }
   
4404        oldptr = ptr;        oldptr = ptr;
4405    
4406        /* Remember \r or \n */        /* Remember if \r or \n were explicitly used */
4407    
4408        if (c == CHAR_CR || c == CHAR_NL) cd->external_flags |= PCRE_HASCRORLF;        if (c == CHAR_CR || c == CHAR_NL) cd->external_flags |= PCRE_HASCRORLF;
4409    
# Line 4323  for (;; ptr++) Line 4426  for (;; ptr++)
4426            inescq = TRUE;            inescq = TRUE;
4427            break;            break;
4428            }            }
4429    
4430            /* Minus (hyphen) at the end of a class is treated as a literal, so put
4431            back the pointer and jump to handle the character that preceded it. */
4432    
4433          if (*ptr == 0 || (!inescq && *ptr == CHAR_RIGHT_SQUARE_BRACKET))          if (*ptr == 0 || (!inescq && *ptr == CHAR_RIGHT_SQUARE_BRACKET))
4434            {            {
4435            ptr = oldptr;            ptr = oldptr;
4436            goto LONE_SINGLE_CHARACTER;            goto CLASS_SINGLE_CHARACTER;
4437            }            }
4438    
4439            /* Otherwise, we have a potential range; pick up the next character */
4440    
4441  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
4442          if (utf)          if (utf)
# Line 4345  for (;; ptr++) Line 4453  for (;; ptr++)
4453    
4454          if (!inescq && d == CHAR_BACKSLASH)          if (!inescq && d == CHAR_BACKSLASH)
4455            {            {
4456            d = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);            int descape;
4457              descape = check_escape(&ptr, &d, errorcodeptr, cd->bracount, options, TRUE);
4458            if (*errorcodeptr != 0) goto FAILED;            if (*errorcodeptr != 0) goto FAILED;
4459    
4460            /* \b is backspace; any other special means the '-' was literal */            /* \b is backspace; any other special means the '-' was literal. */
4461    
4462            if (d < 0)            if (descape > 0)
4463              {              {
4464              if (d == -ESC_b) d = CHAR_BS; else              if (descape == ESC_b) d = CHAR_BS; else
4465                {                {
4466                ptr = oldptr;                ptr = oldptr;
4467                goto LONE_SINGLE_CHARACTER;  /* A few lines below */                goto CLASS_SINGLE_CHARACTER;  /* A few lines below */
4468                }                }
4469              }              }
4470            }            }
4471    
4472          /* Check that the two values are in the correct order. Optimize          /* Check that the two values are in the correct order. Optimize
4473          one-character ranges */          one-character ranges. */
4474    
4475          if (d < c)          if (d < c)
4476            {            {
4477            *errorcodeptr = ERR8;            *errorcodeptr = ERR8;
4478            goto FAILED;            goto FAILED;
4479            }            }
4480            if (d == c) goto CLASS_SINGLE_CHARACTER;  /* A few lines below */
4481    
4482          if (d == c) goto LONE_SINGLE_CHARACTER;  /* A few lines below */          /* We have found a character range, so single character optimizations
4483            cannot be done anymore. Any value greater than 1 indicates that there
4484            is more than one character. */
4485    
4486            class_one_char = 2;
4487    
4488          /* Remember \r or \n */          /* Remember an explicit \r or \n, and add the range to the class. */
4489    
4490          if (d == CHAR_CR || d == CHAR_NL) cd->external_flags |= PCRE_HASCRORLF;          if (d == CHAR_CR || d == CHAR_NL) cd->external_flags |= PCRE_HASCRORLF;
4491    
4492          /* Since we found a character range, single character optimizations          class_has_8bitchar +=
4493          cannot be done anymore. */            add_to_class(classbits, &class_uchardata, options, cd, c, d);
4494          class_single_char = 2;  
   
         /* In UTF-8 mode, if the upper limit is > 255, or > 127 for caseless  
         matching, we have to use an XCLASS with extra data items. Caseless  
         matching for characters > 127 is available only if UCP support is  
         available. */  
   
 #if defined SUPPORT_UTF && !(defined COMPILE_PCRE8)  
         if ((d > 255) || (utf && ((options & PCRE_CASELESS) != 0 && d > 127)))  
 #elif defined  SUPPORT_UTF  
         if (utf && (d > 255 || ((options & PCRE_CASELESS) != 0 && d > 127)))  
 #elif !(defined COMPILE_PCRE8)  
         if (d > 255)  
 #endif  
 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)  
           {  
           xclass = TRUE;  
   
           /* With UCP support, we can find the other case equivalents of  
           the relevant characters. There may be several ranges. Optimize how  
           they fit with the basic range. */  
   
 #ifdef SUPPORT_UCP  
 #ifndef COMPILE_PCRE8  
           if (utf && (options & PCRE_CASELESS) != 0)  
 #else  
           if ((options & PCRE_CASELESS) != 0)  
 #endif  
             {  
             unsigned int occ, ocd;  
             unsigned int cc = c;  
             unsigned int origd = d;  
             while (get_othercase_range(&cc, origd, &occ, &ocd))  
               {  
               if (occ >= (unsigned int)c &&  
                   ocd <= (unsigned int)d)  
                 continue;                          /* Skip embedded ranges */  
   
               if (occ < (unsigned int)c  &&  
                   ocd >= (unsigned int)c - 1)      /* Extend the basic range */  
                 {                                  /* if there is overlap,   */  
                 c = occ;                           /* noting that if occ < c */  
                 continue;                          /* we can't have ocd > d  */  
                 }                                  /* because a subrange is  */  
               if (ocd > (unsigned int)d &&  
                   occ <= (unsigned int)d + 1)      /* always shorter than    */  
                 {                                  /* the basic range.       */  
                 d = ocd;  
                 continue;  
                 }  
   
               if (occ == ocd)  
                 {  
                 *class_uchardata++ = XCL_SINGLE;  
                 }  
               else  
                 {  
                 *class_uchardata++ = XCL_RANGE;  
                 class_uchardata += PRIV(ord2utf)(occ, class_uchardata);  
                 }  
               class_uchardata += PRIV(ord2utf)(ocd, class_uchardata);  
               }  
             }  
 #endif  /* SUPPORT_UCP */  
   
           /* Now record the original range, possibly modified for UCP caseless  
           overlapping ranges. */  
   
           *class_uchardata++ = XCL_RANGE;  
 #ifdef SUPPORT_UTF  
 #ifndef COMPILE_PCRE8  
           if (utf)  
             {  
             class_uchardata += PRIV(ord2utf)(c, class_uchardata);  
             class_uchardata += PRIV(ord2utf)(d, class_uchardata);  
             }  
           else  
             {  
             *class_uchardata++ = c;  
             *class_uchardata++ = d;  
             }  
 #else  
           class_uchardata += PRIV(ord2utf)(c, class_uchardata);  
           class_uchardata += PRIV(ord2utf)(d, class_uchardata);  
 #endif  
 #else /* SUPPORT_UTF */  
           *class_uchardata++ = c;  
           *class_uchardata++ = d;  
 #endif /* SUPPORT_UTF */  
   
           /* With UCP support, we are done. Without UCP support, there is no  
           caseless matching for UTF characters > 127; we can use the bit map  
           for the smaller ones. As for 16 bit characters without UTF, we  
           can still use  */  
   
 #ifdef SUPPORT_UCP  
 #ifndef COMPILE_PCRE8  
           if (utf)  
 #endif  
             continue;    /* With next character in the class */  
 #endif  /* SUPPORT_UCP */  
   
 #if defined SUPPORT_UTF && !defined(SUPPORT_UCP) && !(defined COMPILE_PCRE8)  
           if (utf)  
             {  
             if ((options & PCRE_CASELESS) == 0 || c > 127) continue;  
             /* Adjust upper limit and fall through to set up the map */  
             d = 127;  
             }  
           else  
             {  
             if (c > 255) continue;  
             /* Adjust upper limit and fall through to set up the map */  
             d = 255;  
             }  
 #elif defined SUPPORT_UTF && !defined(SUPPORT_UCP)  
           if ((options & PCRE_CASELESS) == 0 || c > 127) continue;  
           /* Adjust upper limit and fall through to set up the map */  
           d = 127;  
 #else  
           if (c > 255) continue;  
           /* Adjust upper limit and fall through to set up the map */  
           d = 255;  
 #endif  /* SUPPORT_UTF && !SUPPORT_UCP && !COMPILE_PCRE8 */  
           }  
 #endif  /* SUPPORT_UTF || !COMPILE_PCRE8 */  
   
         /* We use the bit map for 8 bit mode, or when the characters fall  
         partially or entirely to [0-255] ([0-127] for UCP) ranges. */  
   
         class_has_8bitchar = 1;  
   
         /* We can save a bit of time by skipping this in the pre-compile. */  
   
         if (lengthptr == NULL) for (; c <= d; c++)  
           {  
           classbits[c/8] |= (1 << (c&7));  
           if ((options & PCRE_CASELESS) != 0)  
             {  
             int uc = cd->fcc[c]; /* flip case */  
             classbits[uc/8] |= (1 << (uc&7));  
             }  
           }  
   
4495          continue;   /* Go get the next char in the class */          continue;   /* Go get the next char in the class */
4496          }          }
4497    
4498        /* Handle a lone single character - we can get here for a normal        /* Handle a single character - we can get here for a normal non-escape
4499        non-escape char, or after \ that introduces a single character or for an        char, or after \ that introduces a single character or for an apparent
4500        apparent range that isn't. */        range that isn't. Only the value 1 matters for class_one_char, so don't
4501          increase it if it is already 2 or more ... just in case there's a class
4502        LONE_SINGLE_CHARACTER:        with a zillion characters in it. */
4503    
4504          CLASS_SINGLE_CHARACTER:
4505          if (class_one_char < 2) class_one_char++;
4506    
4507          /* If class_one_char is 1, we have the first single character in the
4508          class, and there have been no prior ranges, or XCLASS items generated by
4509          escapes. If this is the final character in the class, we can optimize by
4510          turning the item into a 1-character OP_CHAR[I] if it's positive, or
4511          OP_NOT[I] if it's negative. In the positive case, it can cause firstchar
4512          to be set. Otherwise, there can be no first char if this item is first,
4513          whatever repeat count may follow. In the case of reqchar, save the
4514          previous value for reinstating. */
4515    
4516        /* Only the value of 1 matters for class_single_char. */        if (class_one_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
   
       if (class_single_char < 2) class_single_char++;  
   
       /* If class_charcount is 1, we saw precisely one character. As long as  
       there was no use of \p or \P, in other words, no use of any XCLASS  
       features, we can optimize.  
   
       The optimization throws away the bit map. We turn the item into a  
       1-character OP_CHAR[I] if it's positive, or OP_NOT[I] if it's negative.  
       In the positive case, it can cause firstchar to be set. Otherwise, there  
       can be no first char if this item is first, whatever repeat count may  
       follow. In the case of reqchar, save the previous value for reinstating. */  
   
       if (class_single_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)  
4517          {          {
4518          ptr++;          ptr++;
4519          zeroreqchar = reqchar;          zeroreqchar = reqchar;
4520    
4521          if (negate_class)          if (negate_class)
4522            {            {
4523    #ifdef SUPPORT_UCP
4524                // FIXMEchpe pcreuint32?
4525              int d;
4526    #endif
4527            if (firstchar == REQ_UNSET) firstchar = REQ_NONE;            if (firstchar == REQ_UNSET) firstchar = REQ_NONE;
4528            zerofirstchar = firstchar;            zerofirstchar = firstchar;
4529            *code++ = ((options & PCRE_CASELESS) != 0)? OP_NOTI: OP_NOT;  
4530  #ifdef SUPPORT_UTF            /* For caseless UTF-8 mode when UCP support is available, check
4531            if (utf && c > MAX_VALUE_FOR_SINGLE_CHAR)            whether this character has more than one other case. If so, generate
4532              code += PRIV(ord2utf)(c, code);            a special OP_NOTPROP item instead of OP_NOTI. */
4533            else  
4534    #ifdef SUPPORT_UCP
4535              if (utf && (options & PCRE_CASELESS) != 0 &&
4536                  (d = UCD_CASESET(c)) != 0)
4537                {
4538                *code++ = OP_NOTPROP;
4539                *code++ = PT_CLIST;
4540                *code++ = d;
4541                }
4542              else
4543  #endif  #endif
4544              *code++ = c;            /* Char has only one other case, or UCP not available */
4545            goto NOT_CHAR;  
4546                {
4547                *code++ = ((options & PCRE_CASELESS) != 0)? OP_NOTI: OP_NOT;
4548    #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4549                if (utf && c > MAX_VALUE_FOR_SINGLE_CHAR)
4550                  code += PRIV(ord2utf)(c, code);
4551                else
4552    #endif
4553                  *code++ = c;
4554                }
4555    
4556              /* We are finished with this character class */
4557    
4558              goto END_CLASS;
4559            }            }
4560    
4561          /* For a single, positive character, get the value into mcbuffer, and          /* For a single, positive character, get the value into mcbuffer, and
4562          then we can handle this with the normal one-character code. */          then we can handle this with the normal one-character code. */
4563    
4564  #ifdef SUPPORT_UTF  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4565          if (utf && c > MAX_VALUE_FOR_SINGLE_CHAR)          if (utf && c > MAX_VALUE_FOR_SINGLE_CHAR)
4566            mclength = PRIV(ord2utf)(c, mcbuffer);            mclength = PRIV(ord2utf)(c, mcbuffer);
4567          else          else
# Line 4577  for (;; ptr++) Line 4572  for (;; ptr++)
4572            }            }
4573          goto ONE_CHAR;          goto ONE_CHAR;
4574          }       /* End of 1-char optimization */          }       /* End of 1-char optimization */
4575    
4576        /* Handle a character that cannot go in the bit map. */        /* There is more than one character in the class, or an XCLASS item
4577          has been generated. Add this character to the class. */
4578  #if defined SUPPORT_UTF && !(defined COMPILE_PCRE8)  
4579        if ((c > 255) || (utf && ((options & PCRE_CASELESS) != 0 && c > 127)))        class_has_8bitchar +=
4580  #elif defined SUPPORT_UTF          add_to_class(classbits, &class_uchardata, options, cd, c, c);
       if (utf && (c > 255 || ((options & PCRE_CASELESS) != 0 && c > 127)))  
 #elif !(defined COMPILE_PCRE8)  
       if (c > 255)  
 #endif  
   
 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)  
         {  
         xclass = TRUE;  
         *class_uchardata++ = XCL_SINGLE;  
 #ifdef SUPPORT_UTF  
 #ifndef COMPILE_PCRE8  
         /* In non 8 bit mode, we can get here even if we are not in UTF mode. */  
         if (!utf)  
           *class_uchardata++ = c;  
         else  
 #endif  
           class_uchardata += PRIV(ord2utf)(c, class_uchardata);  
 #else /* SUPPORT_UTF */  
         *class_uchardata++ = c;  
 #endif /* SUPPORT_UTF */  
   
 #ifdef SUPPORT_UCP  
 #ifdef COMPILE_PCRE8  
         if ((options & PCRE_CASELESS) != 0)  
 #else  
         /* In non 8 bit mode, we can get here even if we are not in UTF mode. */  
         if (utf && (options & PCRE_CASELESS) != 0)  
 #endif  
           {  
           unsigned int othercase;  
           if ((int)(othercase = UCD_OTHERCASE(c)) != c)  
             {  
             *class_uchardata++ = XCL_SINGLE;  
             class_uchardata += PRIV(ord2utf)(othercase, class_uchardata);  
             }  
           }  
 #endif  /* SUPPORT_UCP */  
   
         }  
       else  
 #endif  /* SUPPORT_UTF || COMPILE_PCRE16 */  
   
       /* Handle a single-byte character */  
         {  
         class_has_8bitchar = 1;  
         classbits[c/8] |= (1 << (c&7));  
         if ((options & PCRE_CASELESS) != 0)  
           {  
           c = cd->fcc[c]; /* flip case */  
           classbits[c/8] |= (1 << (c&7));  
           }  
         }  
4581        }        }
4582    
4583      /* Loop until ']' reached. This "while" is the end of the "do" far above.      /* Loop until ']' reached. This "while" is the end of the "do" far above.
# Line 4654  for (;; ptr++) Line 4597  for (;; ptr++)
4597        goto FAILED;        goto FAILED;
4598        }        }
4599    
4600        /* We will need an XCLASS if data has been placed in class_uchardata. In
4601        the second phase this is a sufficient test. However, in the pre-compile
4602        phase, class_uchardata gets emptied to prevent workspace overflow, so it
4603        only if the very last character in the class needs XCLASS will it contain
4604        anything at this point. For this reason, xclass gets set TRUE above when
4605        uchar_classdata is emptied, and that's why this code is the way it is here
4606        instead of just doing a test on class_uchardata below. */
4607    
4608    #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4609        if (class_uchardata > class_uchardata_base) xclass = TRUE;
4610    #endif
4611    
4612      /* If this is the first thing in the branch, there can be no first char      /* If this is the first thing in the branch, there can be no first char
4613      setting, whatever the repeat count. Any reqchar setting must remain      setting, whatever the repeat count. Any reqchar setting must remain
4614      unchanged after any kind of repeat. */      unchanged after any kind of repeat. */
# Line 4716  for (;; ptr++) Line 4671  for (;; ptr++)
4671        memcpy(code, classbits, 32);        memcpy(code, classbits, 32);
4672        }        }
4673      code += 32 / sizeof(pcre_uchar);      code += 32 / sizeof(pcre_uchar);
4674      NOT_CHAR:  
4675        END_CLASS:
4676      break;      break;
4677    
4678    
# Line 4840  for (;; ptr++) Line 4796  for (;; ptr++)
4796        hold the length of the character in bytes, plus UTF_LENGTH to flag that        hold the length of the character in bytes, plus UTF_LENGTH to flag that
4797        it's a length rather than a small character. */        it's a length rather than a small character. */
4798    
4799  #ifdef SUPPORT_UTF  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4800        if (utf && NOT_FIRSTCHAR(code[-1]))        if (utf && NOT_FIRSTCHAR(code[-1]))
4801          {          {
4802          pcre_uchar *lastchar = code - 1;          pcre_uchar *lastchar = code - 1;
# Line 4976  for (;; ptr++) Line 4932  for (;; ptr++)
4932    
4933          if (repeat_max < 0)          if (repeat_max < 0)
4934            {            {
4935  #ifdef SUPPORT_UTF  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4936            if (utf && (c & UTF_LENGTH) != 0)            if (utf && (c & UTF_LENGTH) != 0)
4937              {              {
4938              memcpy(code, utf_chars, IN_UCHARS(c & 7));              memcpy(code, utf_chars, IN_UCHARS(c & 7));
# Line 5001  for (;; ptr++) Line 4957  for (;; ptr++)
4957    
4958          else if (repeat_max != repeat_min)          else if (repeat_max != repeat_min)
4959            {            {
4960  #ifdef SUPPORT_UTF  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4961            if (utf && (c & UTF_LENGTH) != 0)            if (utf && (c & UTF_LENGTH) != 0)
4962              {              {
4963              memcpy(code, utf_chars, IN_UCHARS(c & 7));              memcpy(code, utf_chars, IN_UCHARS(c & 7));
# Line 5031  for (;; ptr++) Line 4987  for (;; ptr++)
4987    
4988        /* The character or character type itself comes last in all cases. */        /* The character or character type itself comes last in all cases. */
4989    
4990  #ifdef SUPPORT_UTF  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4991        if (utf && (c & UTF_LENGTH) != 0)        if (utf && (c & UTF_LENGTH) != 0)
4992          {          {
4993          memcpy(code, utf_chars, IN_UCHARS(c & 7));          memcpy(code, utf_chars, IN_UCHARS(c & 7));
# Line 5518  for (;; ptr++) Line 5474  for (;; ptr++)
5474        else if (*tempcode == OP_EXACT || *tempcode == OP_NOTEXACT)        else if (*tempcode == OP_EXACT || *tempcode == OP_NOTEXACT)
5475          {          {
5476          tempcode += PRIV(OP_lengths)[*tempcode];          tempcode += PRIV(OP_lengths)[*tempcode];
5477  #ifdef SUPPORT_UTF  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
5478          if (utf && HAS_EXTRALEN(tempcode[-1]))          if (utf && HAS_EXTRALEN(tempcode[-1]))
5479            tempcode += GET_EXTRALEN(tempcode[-1]);            tempcode += GET_EXTRALEN(tempcode[-1]);
5480  #endif  #endif
# Line 5616  for (;; ptr++) Line 5572  for (;; ptr++)
5572          arg = ++ptr;          arg = ++ptr;
5573          while (*ptr != 0 && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;          while (*ptr != 0 && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
5574          arglen = (int)(ptr - arg);          arglen = (int)(ptr - arg);
5575          if (arglen > (int)MAX_MARK)          if ((unsigned int)arglen > MAX_MARK)
5576            {            {
5577            *errorcodeptr = ERR75;            *errorcodeptr = ERR75;
5578            goto FAILED;            goto FAILED;
# Line 6724  for (;; ptr++) Line 6680  for (;; ptr++)
6680    
6681      case CHAR_BACKSLASH:      case CHAR_BACKSLASH:
6682      tempptr = ptr;      tempptr = ptr;
6683      c = check_escape(&ptr, errorcodeptr, cd->bracount, options, FALSE);      escape = check_escape(&ptr, &ec, errorcodeptr, cd->bracount, options, FALSE);
6684    
6685      if (*errorcodeptr != 0) goto FAILED;      if (*errorcodeptr != 0) goto FAILED;
6686    
6687      if (c < 0)      if (escape == 0)
6688          c = ec;
6689        else
6690        {        {
6691        if (-c == ESC_Q)            /* Handle start of quoted string */        if (escape == ESC_Q)            /* Handle start of quoted string */
6692          {          {
6693          if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)          if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)
6694            ptr += 2;               /* avoid empty string */            ptr += 2;               /* avoid empty string */
# Line 6737  for (;; ptr++) Line 6696  for (;; ptr++)
6696          continue;          continue;
6697          }          }
6698    
6699        if (-c == ESC_E) continue;  /* Perl ignores an orphan \E */        if (escape == ESC_E) continue;  /* Perl ignores an orphan \E */
6700    
6701        /* For metasequences that actually match a character, we disable the        /* For metasequences that actually match a character, we disable the
6702        setting of a first character if it hasn't already been set. */        setting of a first character if it hasn't already been set. */
6703    
6704        if (firstchar == REQ_UNSET && -c > ESC_b && -c < ESC_Z)        if (firstchar == REQ_UNSET && escape > ESC_b && escape < ESC_Z)
6705          firstchar = REQ_NONE;          firstchar = REQ_NONE;
6706    
6707        /* Set values to reset to if this is followed by a zero repeat. */        /* Set values to reset to if this is followed by a zero repeat. */
# Line 6752  for (;; ptr++) Line 6711  for (;; ptr++)
6711    
6712        /* \g<name> or \g'name' is a subroutine call by name and \g<n> or \g'n'        /* \g<name> or \g'name' is a subroutine call by name and \g<n> or \g'n'
6713        is a subroutine call by number (Oniguruma syntax). In fact, the value        is a subroutine call by number (Oniguruma syntax). In fact, the value
6714        -ESC_g is returned only for these cases. So we don't need to check for <        ESC_g is returned only for these cases. So we don't need to check for <
6715        or ' if the value is -ESC_g. For the Perl syntax \g{n} the value is        or ' if the value is ESC_g. For the Perl syntax \g{n} the value is
6716        -ESC_REF+n, and for the Perl syntax \g{name} the result is -ESC_k (as        ESC_REF+n, and for the Perl syntax \g{name} the result is ESC_k (as
6717        that is a synonym for a named back reference). */        that is a synonym for a named back reference). */
6718    
6719        if (-c == ESC_g)        if (escape == ESC_g)
6720          {          {
6721          const pcre_uchar *p;          const pcre_uchar *p;
6722          save_hwm = cd->hwm;   /* Normally this is set when '(' is read */          save_hwm = cd->hwm;   /* Normally this is set when '(' is read */
# Line 6813  for (;; ptr++) Line 6772  for (;; ptr++)
6772        /* \k<name> or \k'name' is a back reference by name (Perl syntax).        /* \k<name> or \k'name' is a back reference by name (Perl syntax).
6773        We also support \k{name} (.NET syntax).  */        We also support \k{name} (.NET syntax).  */
6774    
6775        if (-c == ESC_k)        if (escape == ESC_k)
6776          {          {
6777          if ((ptr[1] != CHAR_LESS_THAN_SIGN &&          if ((ptr[1] != CHAR_LESS_THAN_SIGN &&
6778            ptr[1] != CHAR_APOSTROPHE && ptr[1] != CHAR_LEFT_CURLY_BRACKET))            ptr[1] != CHAR_APOSTROPHE && ptr[1] != CHAR_LEFT_CURLY_BRACKET))
# Line 6832  for (;; ptr++) Line 6791  for (;; ptr++)
6791        not set to cope with cases like (?=(\w+))\1: which would otherwise set        not set to cope with cases like (?=(\w+))\1: which would otherwise set
6792        ':' later. */        ':' later. */
6793    
6794        if (-c >= ESC_REF)        if (escape >= ESC_REF)
6795          {          {
6796          open_capitem *oc;          open_capitem *oc;
6797          recno = -c - ESC_REF;          recno = escape - ESC_REF;
6798    
6799          HANDLE_REFERENCE:    /* Come here from named backref handling */          HANDLE_REFERENCE:    /* Come here from named backref handling */
6800          if (firstchar == REQ_UNSET) firstchar = REQ_NONE;          if (firstchar == REQ_UNSET) firstchar = REQ_NONE;
# Line 6862  for (;; ptr++) Line 6821  for (;; ptr++)
6821        /* So are Unicode property matches, if supported. */        /* So are Unicode property matches, if supported. */
6822    
6823  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
6824        else if (-c == ESC_P || -c == ESC_p)        else if (escape == ESC_P || escape == ESC_p)
6825          {          {
6826          BOOL negated;          BOOL negated;
6827          int pdata;          int pdata;
6828          int ptype = get_ucp(&ptr, &negated, &pdata, errorcodeptr);          int ptype = get_ucp(&ptr, &negated, &pdata, errorcodeptr);
6829          if (ptype < 0) goto FAILED;          if (ptype < 0) goto FAILED;
6830          previous = code;          previous = code;
6831          *code++ = ((-c == ESC_p) != negated)? OP_PROP : OP_NOTPROP;          *code++ = ((escape == ESC_p) != negated)? OP_PROP : OP_NOTPROP;
6832          *code++ = ptype;          *code++ = ptype;
6833          *code++ = pdata;          *code++ = pdata;
6834          }          }
# Line 6878  for (;; ptr++) Line 6837  for (;; ptr++)
6837        /* If Unicode properties are not supported, \X, \P, and \p are not        /* If Unicode properties are not supported, \X, \P, and \p are not
6838        allowed. */        allowed. */
6839    
6840        else if (-c == ESC_X || -c == ESC_P || -c == ESC_p)        else if (escape == ESC_X || escape == ESC_P || escape == ESC_p)
6841          {          {
6842          *errorcodeptr = ERR45;          *errorcodeptr = ERR45;
6843          goto FAILED;          goto FAILED;
# Line 6893  for (;; ptr++) Line 6852  for (;; ptr++)
6852    
6853        else        else
6854          {          {
6855          if ((-c == ESC_b || -c == ESC_B) && cd->max_lookbehind == 0)          if ((escape == ESC_b || escape == ESC_B) && cd->max_lookbehind == 0)
6856            cd->max_lookbehind = 1;            cd->max_lookbehind = 1;
6857  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
6858          if (-c >= ESC_DU && -c <= ESC_wu)          if (escape >= ESC_DU && escape <= ESC_wu)
6859            {            {
6860            nestptr = ptr + 1;                   /* Where to resume */            nestptr = ptr + 1;                   /* Where to resume */
6861            ptr = substitutes[-c - ESC_DU] - 1;  /* Just before substitute */            ptr = substitutes[escape - ESC_DU] - 1;  /* Just before substitute */
6862            }            }
6863          else          else
6864  #endif  #endif
# Line 6907  for (;; ptr++) Line 6866  for (;; ptr++)
6866          so that it works in DFA mode and in lookbehinds. */          so that it works in DFA mode and in lookbehinds. */
6867    
6868            {            {
6869            previous = (-c > ESC_b && -c < ESC_Z)? code : NULL;            previous = (escape > ESC_b && escape < ESC_Z)? code : NULL;
6870            *code++ = (!utf && c == -ESC_C)? OP_ALLANY : -c;            *code++ = (!utf && escape == ESC_C)? OP_ALLANY : escape;
6871            }            }
6872          }          }
6873        continue;        continue;
# Line 6918  for (;; ptr++) Line 6877  for (;; ptr++)
6877      a value > 127. We set its representation in the length/buffer, and then      a value > 127. We set its representation in the length/buffer, and then
6878      handle it as a data character. */      handle it as a data character. */
6879    
6880  #ifdef SUPPORT_UTF  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
6881      if (utf && c > MAX_VALUE_FOR_SINGLE_CHAR)      if (utf && c > MAX_VALUE_FOR_SINGLE_CHAR)
6882        mclength = PRIV(ord2utf)(c, mcbuffer);        mclength = PRIV(ord2utf)(c, mcbuffer);
6883      else      else
# Line 6941  for (;; ptr++) Line 6900  for (;; ptr++)
6900      mclength = 1;      mclength = 1;
6901      mcbuffer[0] = c;      mcbuffer[0] = c;
6902    
6903  #ifdef SUPPORT_UTF  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
6904      if (utf && HAS_EXTRALEN(c))      if (utf && HAS_EXTRALEN(c))
6905        ACROSSCHAR(TRUE, ptr[1], mcbuffer[mclength++] = *(++ptr));        ACROSSCHAR(TRUE, ptr[1], mcbuffer[mclength++] = *(++ptr));
6906  #endif  #endif
# Line 6951  for (;; ptr++) Line 6910  for (;; ptr++)
6910    
6911      ONE_CHAR:      ONE_CHAR:
6912      previous = code;      previous = code;
6913    
6914        /* For caseless UTF-8 mode when UCP support is available, check whether
6915        this character has more than one other case. If so, generate a special
6916        OP_PROP item instead of OP_CHARI. */
6917    
6918    #ifdef SUPPORT_UCP
6919        if (utf && (options & PCRE_CASELESS) != 0)
6920          {
6921          GETCHAR(c, mcbuffer);
6922          if ((c = UCD_CASESET(c)) != 0)
6923            {
6924            *code++ = OP_PROP;
6925            *code++ = PT_CLIST;
6926            *code++ = c;
6927            if (firstchar == REQ_UNSET) firstchar = zerofirstchar = REQ_NONE;
6928            break;
6929            }
6930          }
6931    #endif
6932    
6933        /* Caseful matches, or not one of the multicase characters. */
6934    
6935      *code++ = ((options & PCRE_CASELESS) != 0)? OP_CHARI : OP_CHAR;      *code++ = ((options & PCRE_CASELESS) != 0)? OP_CHARI : OP_CHAR;
6936      for (c = 0; c < mclength; c++) *code++ = mcbuffer[c];      for (c = 0; c < mclength; c++) *code++ = mcbuffer[c];
6937    
# Line 7650  Returns:        pointer to compiled data Line 7631  Returns:        pointer to compiled data
7631                  with errorptr and erroroffset set                  with errorptr and erroroffset set
7632  */  */
7633    
7634  #ifdef COMPILE_PCRE8  #if defined COMPILE_PCRE8
7635  PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION  PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION
7636  pcre_compile(const char *pattern, int options, const char **errorptr,  pcre_compile(const char *pattern, int options, const char **errorptr,
7637    int *erroroffset, const unsigned char *tables)    int *erroroffset, const unsigned char *tables)
7638  #else  #elif defined COMPILE_PCRE16
7639  PCRE_EXP_DEFN pcre16 * PCRE_CALL_CONVENTION  PCRE_EXP_DEFN pcre16 * PCRE_CALL_CONVENTION
7640  pcre16_compile(PCRE_SPTR16 pattern, int options, const char **errorptr,  pcre16_compile(PCRE_SPTR16 pattern, int options, const char **errorptr,
7641    int *erroroffset, const unsigned char *tables)    int *erroroffset, const unsigned char *tables)
7642    #elif defined COMPILE_PCRE32
7643    PCRE_EXP_DEFN pcre32 * PCRE_CALL_CONVENTION
7644    pcre32_compile(PCRE_SPTR32 pattern, int options, const char **errorptr,
7645      int *erroroffset, const unsigned char *tables)
7646  #endif  #endif
7647  {  {
7648  #ifdef COMPILE_PCRE8  #if defined COMPILE_PCRE8
7649  return pcre_compile2(pattern, options, NULL, errorptr, erroroffset, tables);  return pcre_compile2(pattern, options, NULL, errorptr, erroroffset, tables);
7650  #else  #elif defined COMPILE_PCRE16
7651  return pcre16_compile2(pattern, options, NULL, errorptr, erroroffset, tables);  return pcre16_compile2(pattern, options, NULL, errorptr, erroroffset, tables);
7652    #elif defined COMPILE_PCRE32
7653    return pcre32_compile2(pattern, options, NULL, errorptr, erroroffset, tables);
7654  #endif  #endif
7655  }  }
7656    
7657    
7658  #ifdef COMPILE_PCRE8  #if defined COMPILE_PCRE8
7659  PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION  PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION
7660  pcre_compile2(const char *pattern, int options, int *errorcodeptr,  pcre_compile2(const char *pattern, int options, int *errorcodeptr,
7661    const char **errorptr, int *erroroffset, const unsigned char *tables)    const char **errorptr, int *erroroffset, const unsigned char *tables)
7662  #else  #elif defined COMPILE_PCRE16
7663  PCRE_EXP_DEFN pcre16 * PCRE_CALL_CONVENTION  PCRE_EXP_DEFN pcre16 * PCRE_CALL_CONVENTION
7664  pcre16_compile2(PCRE_SPTR16 pattern, int options, int *errorcodeptr,  pcre16_compile2(PCRE_SPTR16 pattern, int options, int *errorcodeptr,
7665    const char **errorptr, int *erroroffset, const unsigned char *tables)    const char **errorptr, int *erroroffset, const unsigned char *tables)
7666    #elif defined COMPILE_PCRE32
7667    PCRE_EXP_DEFN pcre32 * PCRE_CALL_CONVENTION
7668    pcre32_compile2(PCRE_SPTR32 pattern, int options, int *errorcodeptr,
7669      const char **errorptr, int *erroroffset, const unsigned char *tables)
7670  #endif  #endif
7671  {  {
7672  REAL_PCRE *re;  REAL_PCRE *re;
# Line 7761  while (ptr[skipatstart] == CHAR_LEFT_PAR Line 7752  while (ptr[skipatstart] == CHAR_LEFT_PAR
7752    if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF_RIGHTPAR, 6) == 0)    if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF_RIGHTPAR, 6) == 0)
7753      { skipatstart += 8; options |= PCRE_UTF16; continue; }      { skipatstart += 8; options |= PCRE_UTF16; continue; }
7754  #endif  #endif
7755    #ifdef COMPILE_PCRE32
7756      if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF_RIGHTPAR, 6) == 0)
7757        { skipatstart += 8; options |= PCRE_UTF32; continue; }
7758    #endif
7759    else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UCP_RIGHTPAR, 4) == 0)    else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UCP_RIGHTPAR, 4) == 0)
7760      { skipatstart += 6; options |= PCRE_UCP; continue; }      { skipatstart += 6; options |= PCRE_UCP; continue; }
7761    else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_NO_START_OPT_RIGHTPAR, 13) == 0)    else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_NO_START_OPT_RIGHTPAR, 13) == 0)
# Line 7789  while (ptr[skipatstart] == CHAR_LEFT_PAR Line 7784  while (ptr[skipatstart] == CHAR_LEFT_PAR
7784    else break;    else break;
7785    }    }
7786    
7787  /* PCRE_UTF16 has the same value as PCRE_UTF8. */  /* PCRE_UTF(16|32) have the same value as PCRE_UTF8. */
7788  utf = (options & PCRE_UTF8) != 0;  utf = (options & PCRE_UTF8) != 0;
7789    
7790  /* Can't support UTF unless PCRE has been compiled to include the code. The  /* Can't support UTF unless PCRE has been compiled to include the code. The
# Line 7801  not used here. */ Line 7796  not used here. */
7796  if (utf && (options & PCRE_NO_UTF8_CHECK) == 0 &&  if (utf && (options & PCRE_NO_UTF8_CHECK) == 0 &&
7797       (errorcode = PRIV(valid_utf)((PCRE_PUCHAR)pattern, -1, erroroffset)) != 0)       (errorcode = PRIV(valid_utf)((PCRE_PUCHAR)pattern, -1, erroroffset)) != 0)
7798    {    {
7799  #ifdef COMPILE_PCRE8  #if defined COMPILE_PCRE8
7800    errorcode = ERR44;    errorcode = ERR44;
7801  #else  #elif defined COMPILE_PCRE16
7802    errorcode = ERR74;    errorcode = ERR74;
7803    #elif defined COMPILE_PCRE32
7804      errorcode = ERR77;
7805  #endif  #endif
7806    goto PCRE_EARLY_ERROR_RETURN2;    goto PCRE_EARLY_ERROR_RETURN2;
7807    }    }
# Line 7968  re->name_count = cd->names_found; Line 7965  re->name_count = cd->names_found;
7965  re->ref_count = 0;  re->ref_count = 0;
7966  re->tables = (tables == PRIV(default_tables))? NULL : tables;  re->tables = (tables == PRIV(default_tables))? NULL : tables;
7967  re->nullpad = NULL;  re->nullpad = NULL;
7968    #ifdef COMPILE_PCRE32
7969    re->dummy1 = re->dummy2 = 0;
7970    #endif
7971    
7972  /* The starting points of the name/number translation table and of the code are  /* The starting points of the name/number translation table and of the code are
7973  passed around in the compile data block. The start/end pattern and initial  passed around in the compile data block. The start/end pattern and initial
# Line 8130  if ((re->options & PCRE_ANCHORED) == 0) Line 8130  if ((re->options & PCRE_ANCHORED) == 0)
8130        firstchar = find_firstassertedchar(codestart, FALSE);        firstchar = find_firstassertedchar(codestart, FALSE);
8131      if (firstchar >= 0)   /* Remove caseless flag for non-caseable chars */      if (firstchar >= 0)   /* Remove caseless flag for non-caseable chars */
8132        {        {
8133  #ifdef COMPILE_PCRE8  #if defined COMPILE_PCRE8
8134        re->first_char = firstchar & 0xff;        re->first_char = firstchar & 0xff;
8135  #else  #elif defined COMPILE_PCRE16
 #ifdef COMPILE_PCRE16  
8136        re->first_char = firstchar & 0xffff;        re->first_char = firstchar & 0xffff;
8137  #endif  #elif defined COMPILE_PCRE32
8138          re->first_char = firstchar & ~REQ_MASK;
8139  #endif  #endif
8140        if ((firstchar & REQ_CASELESS) != 0)        if ((firstchar & REQ_CASELESS) != 0)
8141          {          {
# Line 8172  bytes. */ Line 8172  bytes. */
8172  if (reqchar >= 0 &&  if (reqchar >= 0 &&
8173       ((re->options & PCRE_ANCHORED) == 0 || (reqchar & REQ_VARY) != 0))       ((re->options & PCRE_ANCHORED) == 0 || (reqchar & REQ_VARY) != 0))
8174    {    {
8175  #ifdef COMPILE_PCRE8  #if defined COMPILE_PCRE8
8176    re->req_char = reqchar & 0xff;    re->req_char = reqchar & 0xff;
8177  #else  #elif defined COMPILE_PCRE16
 #ifdef COMPILE_PCRE16  
8178    re->req_char = reqchar & 0xffff;    re->req_char = reqchar & 0xffff;
8179  #endif  #elif defined COMPILE_PCRE32
8180      re->req_char = reqchar & ~REQ_MASK;
8181  #endif  #endif
8182    if ((reqchar & REQ_CASELESS) != 0)    if ((reqchar & REQ_CASELESS) != 0)
8183      {      {
# Line 8229  if ((re->flags & PCRE_REQCHSET) != 0) Line 8229  if ((re->flags & PCRE_REQCHSET) != 0)
8229      else printf("Req char = \\x%02x%s\n", ch, caseless);      else printf("Req char = \\x%02x%s\n", ch, caseless);
8230    }    }
8231    
8232  #ifdef COMPILE_PCRE8  #if defined COMPILE_PCRE8
8233  pcre_printint((pcre *)re, stdout, TRUE);  pcre_printint((pcre *)re, stdout, TRUE);
8234  #else  #elif defined COMPILE_PCRE16
8235  pcre16_printint((pcre *)re, stdout, TRUE);  pcre16_printint((pcre *)re, stdout, TRUE);
8236    #elif defined COMPILE_PCRE32
8237    pcre32_printint((pcre *)re, stdout, TRUE);
8238  #endif  #endif
8239    
8240  /* This check is done here in the debugging case so that the code that  /* This check is done here in the debugging case so that the code that
# Line 8248  if (code - codestart > length) Line 8250  if (code - codestart > length)
8250    }    }
8251  #endif   /* PCRE_DEBUG */  #endif   /* PCRE_DEBUG */
8252    
8253  #ifdef COMPILE_PCRE8  #if defined COMPILE_PCRE8
8254  return (pcre *)re;  return (pcre *)re;
8255  #else  #elif defined COMPILE_PCRE16
8256  return (pcre16 *)re;  return (pcre16 *)re;
8257    #elif defined COMPILE_PCRE32
8258    return (pcre32 *)re;
8259  #endif  #endif
8260  }  }
8261    

Legend:
Removed from v.994  
changed lines
  Added in v.1060

  ViewVC Help
Powered by ViewVC 1.1.5