/[pcre]/code/branches/pcre16/pcre_compile.c
ViewVC logotype

Diff of /code/branches/pcre16/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

code/trunk/pcre_compile.c revision 749 by ph10, Fri Nov 18 10:36:45 2011 UTC code/branches/pcre16/pcre_compile.c revision 770 by zherczeg, Mon Nov 28 20:39:30 2011 UTC
# Line 231  static const char posix_names[] = Line 231  static const char posix_names[] =
231    STRING_graph0 STRING_print0 STRING_punct0 STRING_space0    STRING_graph0 STRING_print0 STRING_punct0 STRING_space0
232    STRING_word0  STRING_xdigit;    STRING_word0  STRING_xdigit;
233    
234  static const uschar posix_name_lengths[] = {  static const pcre_uint8 posix_name_lengths[] = {
235    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 };    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 };
236    
237  /* Table of class bit maps for each POSIX class. Each class is formed from a  /* Table of class bit maps for each POSIX class. Each class is formed from a
# Line 266  substitutes must be in the order of the Line 266  substitutes must be in the order of the
266  both positive and negative cases. NULL means no substitute. */  both positive and negative cases. NULL means no substitute. */
267    
268  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
269  static const uschar *substitutes[] = {  static const pcre_uchar string_PNd[]  = {
270    (uschar *)"\\P{Nd}",    /* \D */    CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
271    (uschar *)"\\p{Nd}",    /* \d */    CHAR_N, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' };
272    (uschar *)"\\P{Xsp}",   /* \S */       /* NOTE: Xsp is Perl space */  static const pcre_uchar string_pNd[]  = {
273    (uschar *)"\\p{Xsp}",   /* \s */    CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
274    (uschar *)"\\P{Xwd}",   /* \W */    CHAR_N, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' };
275    (uschar *)"\\p{Xwd}"    /* \w */  static const pcre_uchar string_PXsp[] = {
276      CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
277      CHAR_X, CHAR_s, CHAR_p, CHAR_RIGHT_CURLY_BRACKET, '\0' };
278    static const pcre_uchar string_pXsp[] = {
279      CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
280      CHAR_X, CHAR_s, CHAR_p, CHAR_RIGHT_CURLY_BRACKET, '\0' };
281    static const pcre_uchar string_PXwd[] = {
282      CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
283      CHAR_X, CHAR_w, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' };
284    static const pcre_uchar string_pXwd[] = {
285      CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
286      CHAR_X, CHAR_w, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' };
287    
288    static const pcre_uchar *substitutes[] = {
289      string_PNd,           /* \D */
290      string_pNd,           /* \d */
291      string_PXsp,          /* \S */       /* NOTE: Xsp is Perl space */
292      string_pXsp,          /* \s */
293      string_PXwd,          /* \W */
294      string_pXwd           /* \w */
295  };  };
296    
297  static const uschar *posix_substitutes[] = {  static const pcre_uchar string_pL[] =   {
298    (uschar *)"\\p{L}",     /* alpha */    CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
299    (uschar *)"\\p{Ll}",    /* lower */    CHAR_L, CHAR_RIGHT_CURLY_BRACKET, '\0' };
300    (uschar *)"\\p{Lu}",    /* upper */  static const pcre_uchar string_pLl[] =  {
301    (uschar *)"\\p{Xan}",   /* alnum */    CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
302    NULL,                   /* ascii */    CHAR_L, CHAR_l, CHAR_RIGHT_CURLY_BRACKET, '\0' };
303    (uschar *)"\\h",        /* blank */  static const pcre_uchar string_pLu[] =  {
304    NULL,                   /* cntrl */    CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
305    (uschar *)"\\p{Nd}",    /* digit */    CHAR_L, CHAR_u, CHAR_RIGHT_CURLY_BRACKET, '\0' };
306    NULL,                   /* graph */  static const pcre_uchar string_pXan[] = {
307    NULL,                   /* print */    CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
308    NULL,                   /* punct */    CHAR_X, CHAR_a, CHAR_n, CHAR_RIGHT_CURLY_BRACKET, '\0' };
309    (uschar *)"\\p{Xps}",   /* space */    /* NOTE: Xps is POSIX space */  static const pcre_uchar string_h[] =    {
310    (uschar *)"\\p{Xwd}",   /* word */    CHAR_BACKSLASH, CHAR_h, '\0' };
311    NULL,                   /* xdigit */  static const pcre_uchar string_pXps[] = {
312      CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
313      CHAR_X, CHAR_p, CHAR_s, CHAR_RIGHT_CURLY_BRACKET, '\0' };
314    static const pcre_uchar string_PL[] =   {
315      CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
316      CHAR_L, CHAR_RIGHT_CURLY_BRACKET, '\0' };
317    static const pcre_uchar string_PLl[] =  {
318      CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
319      CHAR_L, CHAR_l, CHAR_RIGHT_CURLY_BRACKET, '\0' };
320    static const pcre_uchar string_PLu[] =  {
321      CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
322      CHAR_L, CHAR_u, CHAR_RIGHT_CURLY_BRACKET, '\0' };
323    static const pcre_uchar string_PXan[] = {
324      CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
325      CHAR_X, CHAR_a, CHAR_n, CHAR_RIGHT_CURLY_BRACKET, '\0' };
326    static const pcre_uchar string_H[] =    {
327      CHAR_BACKSLASH, CHAR_H, '\0' };
328    static const pcre_uchar string_PXps[] = {
329      CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
330      CHAR_X, CHAR_p, CHAR_s, CHAR_RIGHT_CURLY_BRACKET, '\0' };
331    
332    static const pcre_uchar *posix_substitutes[] = {
333      string_pL,            /* alpha */
334      string_pLl,           /* lower */
335      string_pLu,           /* upper */
336      string_pXan,          /* alnum */
337      NULL,                 /* ascii */
338      string_h,             /* blank */
339      NULL,                 /* cntrl */
340      string_pNd,           /* digit */
341      NULL,                 /* graph */
342      NULL,                 /* print */
343      NULL,                 /* punct */
344      string_pXps,          /* space */    /* NOTE: Xps is POSIX space */
345      string_pXwd,          /* word */
346      NULL,                 /* xdigit */
347    /* Negated cases */    /* Negated cases */
348    (uschar *)"\\P{L}",     /* ^alpha */    string_PL,            /* ^alpha */
349    (uschar *)"\\P{Ll}",    /* ^lower */    string_PLl,           /* ^lower */
350    (uschar *)"\\P{Lu}",    /* ^upper */    string_PLu,           /* ^upper */
351    (uschar *)"\\P{Xan}",   /* ^alnum */    string_PXan,          /* ^alnum */
352    NULL,                   /* ^ascii */    NULL,                 /* ^ascii */
353    (uschar *)"\\H",        /* ^blank */    string_H,             /* ^blank */
354    NULL,                   /* ^cntrl */    NULL,                 /* ^cntrl */
355    (uschar *)"\\P{Nd}",    /* ^digit */    string_PNd,           /* ^digit */
356    NULL,                   /* ^graph */    NULL,                 /* ^graph */
357    NULL,                   /* ^print */    NULL,                 /* ^print */
358    NULL,                   /* ^punct */    NULL,                 /* ^punct */
359    (uschar *)"\\P{Xps}",   /* ^space */   /* NOTE: Xps is POSIX space */    string_PXps,          /* ^space */   /* NOTE: Xps is POSIX space */
360    (uschar *)"\\P{Xwd}",   /* ^word */    string_PXwd,          /* ^word */
361    NULL                    /* ^xdigit */    NULL                  /* ^xdigit */
362  };  };
363  #define POSIX_SUBSIZE (sizeof(posix_substitutes)/sizeof(uschar *))  #define POSIX_SUBSIZE (sizeof(posix_substitutes) / sizeof(pcre_uchar *))
364  #endif  #endif
365    
366  #define STRING(a)  # a  #define STRING(a)  # a
# Line 548  static const unsigned char ebcdic_charta Line 602  static const unsigned char ebcdic_charta
602  /* Definition to allow mutual recursion */  /* Definition to allow mutual recursion */
603    
604  static BOOL  static BOOL
605    compile_regex(int, uschar **, const uschar **, int *, BOOL, BOOL, int, int,    compile_regex(int, pcre_uchar **, const pcre_uchar **, int *, BOOL, BOOL, int, int,
606      int *, int *, branch_chain *, compile_data *, int *);      int *, int *, branch_chain *, compile_data *, int *);
607    
608    
# Line 595  Returns:    TRUE or FALSE Line 649  Returns:    TRUE or FALSE
649  */  */
650    
651  static BOOL  static BOOL
652  is_counted_repeat(const uschar *p)  is_counted_repeat(const pcre_uchar *p)
653  {  {
654  if ((digitab[*p++] & ctype_digit) == 0) return FALSE;  if ((digitab[*p++] & ctype_digit) == 0) return FALSE;
655  while ((digitab[*p] & ctype_digit) != 0) p++;  while ((digitab[*p] & ctype_digit) != 0) p++;
# Line 637  Returns:         zero or positive => a d Line 691  Returns:         zero or positive => a d
691  */  */
692    
693  static int  static int
694  check_escape(const uschar **ptrptr, int *errorcodeptr, int bracount,  check_escape(const pcre_uchar **ptrptr, int *errorcodeptr, int bracount,
695    int options, BOOL isclass)    int options, BOOL isclass)
696  {  {
697  BOOL utf8 = (options & PCRE_UTF8) != 0;  BOOL utf8 = (options & PCRE_UTF8) != 0;
698  const uschar *ptr = *ptrptr + 1;  const pcre_uchar *ptr = *ptrptr + 1;
699  int c, i;  int c, i;
700    
701  GETCHARINCTEST(c, ptr);           /* Get character value, increment pointer */  GETCHARINCTEST(c, ptr);           /* Get character value, increment pointer */
# Line 668  else if ((i = escapes[c - 0x48]) != 0) Line 722  else if ((i = escapes[c - 0x48]) != 0)
722    
723  else  else
724    {    {
725    const uschar *oldptr;    const pcre_uchar *oldptr;
726    BOOL braced, negated;    BOOL braced, negated;
727    
728    switch (c)    switch (c)
# Line 741  else Line 795  else
795    
796      if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)      if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)
797        {        {
798        const uschar *p;        const pcre_uchar *p;
799        for (p = ptr+2; *p != 0 && *p != CHAR_RIGHT_CURLY_BRACKET; p++)        for (p = ptr+2; *p != 0 && *p != CHAR_RIGHT_CURLY_BRACKET; p++)
800          if (*p != CHAR_MINUS && (digitab[*p] & ctype_digit) == 0) break;          if (*p != CHAR_MINUS && (digitab[*p] & ctype_digit) == 0) break;
801        if (*p != 0 && *p != CHAR_RIGHT_CURLY_BRACKET)        if (*p != 0 && *p != CHAR_RIGHT_CURLY_BRACKET)
# Line 883  else Line 937  else
937    
938      if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)      if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)
939        {        {
940        const uschar *pt = ptr + 2;        const pcre_uchar *pt = ptr + 2;
941        int count = 0;        int count = 0;
942    
943        c = 0;        c = 0;
# Line 1014  Returns:         type value from ucp_typ Line 1068  Returns:         type value from ucp_typ
1068  */  */
1069    
1070  static int  static int
1071  get_ucp(const uschar **ptrptr, BOOL *negptr, int *dptr, int *errorcodeptr)  get_ucp(const pcre_uchar **ptrptr, BOOL *negptr, int *dptr, int *errorcodeptr)
1072  {  {
1073  int c, i, bot, top;  int c, i, bot, top;
1074  const uschar *ptr = *ptrptr;  const pcre_uchar *ptr = *ptrptr;
1075  char name[32];  pcre_uchar name[32];
1076    
1077  c = *(++ptr);  c = *(++ptr);
1078  if (c == 0) goto ERROR_RETURN;  if (c == 0) goto ERROR_RETURN;
# Line 1059  else Line 1113  else
1113  /* Search for a recognized property name using binary chop */  /* Search for a recognized property name using binary chop */
1114    
1115  bot = 0;  bot = 0;
1116  top = _pcre_utt_size;  top = PRIV(utt_size);
1117    
1118  while (bot < top)  while (bot < top)
1119    {    {
1120    i = (bot + top) >> 1;    i = (bot + top) >> 1;
1121    c = strcmp(name, _pcre_utt_names + _pcre_utt[i].name_offset);    c = STRCMP_UC_C8(name, PRIV(utt_names) + PRIV(utt)[i].name_offset);
1122    if (c == 0)    if (c == 0)
1123      {      {
1124      *dptr = _pcre_utt[i].value;      *dptr = PRIV(utt)[i].value;
1125      return _pcre_utt[i].type;      return PRIV(utt)[i].type;
1126      }      }
1127    if (c > 0) bot = i + 1; else top = i;    if (c > 0) bot = i + 1; else top = i;
1128    }    }
# Line 1106  Returns:         pointer to '}' on succe Line 1160  Returns:         pointer to '}' on succe
1160                   current ptr on error, with errorcodeptr set non-zero                   current ptr on error, with errorcodeptr set non-zero
1161  */  */
1162    
1163  static const uschar *  static const pcre_uchar *
1164  read_repeat_counts(const uschar *p, int *minp, int *maxp, int *errorcodeptr)  read_repeat_counts(const pcre_uchar *p, int *minp, int *maxp, int *errorcodeptr)
1165  {  {
1166  int min = 0;  int min = 0;
1167  int max = -1;  int max = -1;
# Line 1192  Returns:       the number of the named s Line 1246  Returns:       the number of the named s
1246  */  */
1247    
1248  static int  static int
1249  find_parens_sub(uschar **ptrptr, compile_data *cd, const uschar *name, int lorn,  find_parens_sub(pcre_uchar **ptrptr, compile_data *cd, const pcre_uchar *name, int lorn,
1250    BOOL xmode, BOOL utf8, int *count)    BOOL xmode, BOOL utf8, int *count)
1251  {  {
1252  uschar *ptr = *ptrptr;  pcre_uchar *ptr = *ptrptr;
1253  int start_count = *count;  int start_count = *count;
1254  int hwm_count = start_count;  int hwm_count = start_count;
1255  BOOL dup_parens = FALSE;  BOOL dup_parens = FALSE;
# Line 1262  if (ptr[0] == CHAR_LEFT_PARENTHESIS) Line 1316  if (ptr[0] == CHAR_LEFT_PARENTHESIS)
1316          ptr[1] != CHAR_EQUALS_SIGN) || *ptr == CHAR_APOSTROPHE)          ptr[1] != CHAR_EQUALS_SIGN) || *ptr == CHAR_APOSTROPHE)
1317        {        {
1318        int term;        int term;
1319        const uschar *thisname;        const pcre_uchar *thisname;
1320        *count += 1;        *count += 1;
1321        if (name == NULL && *count == lorn) return *count;        if (name == NULL && *count == lorn) return *count;
1322        term = *ptr++;        term = *ptr++;
# Line 1270  if (ptr[0] == CHAR_LEFT_PARENTHESIS) Line 1324  if (ptr[0] == CHAR_LEFT_PARENTHESIS)
1324        thisname = ptr;        thisname = ptr;
1325        while (*ptr != term) ptr++;        while (*ptr != term) ptr++;
1326        if (name != NULL && lorn == ptr - thisname &&        if (name != NULL && lorn == ptr - thisname &&
1327            strncmp((const char *)name, (const char *)thisname, lorn) == 0)            STRNCMP_UC_UC(name, thisname, lorn) == 0)
1328          return *count;          return *count;
1329        term++;        term++;
1330        }        }
# Line 1313  for (; ptr < cd->end_pattern; ptr++) Line 1367  for (; ptr < cd->end_pattern; ptr++)
1367          {          {
1368          if (ptr[2] == CHAR_E)          if (ptr[2] == CHAR_E)
1369            ptr+= 2;            ptr+= 2;
1370          else if (strncmp((const char *)ptr+2,          else if (STRNCMP_UC_C8(ptr + 2,
1371                   STR_Q STR_BACKSLASH STR_E, 3) == 0)                   STR_Q STR_BACKSLASH STR_E, 3) == 0)
1372            ptr += 4;            ptr += 4;
1373          else          else
# Line 1425  Returns:       the number of the found s Line 1479  Returns:       the number of the found s
1479  */  */
1480    
1481  static int  static int
1482  find_parens(compile_data *cd, const uschar *name, int lorn, BOOL xmode,  find_parens(compile_data *cd, const pcre_uchar *name, int lorn, BOOL xmode,
1483    BOOL utf8)    BOOL utf8)
1484  {  {
1485  uschar *ptr = (uschar *)cd->start_pattern;  pcre_uchar *ptr = (pcre_uchar *)cd->start_pattern;
1486  int count = 0;  int count = 0;
1487  int rc;  int rc;
1488    
# Line 1466  Arguments: Line 1520  Arguments:
1520  Returns:       pointer to the first significant opcode  Returns:       pointer to the first significant opcode
1521  */  */
1522    
1523  static const uschar*  static const pcre_uchar*
1524  first_significant_code(const uschar *code, BOOL skipassert)  first_significant_code(const pcre_uchar *code, BOOL skipassert)
1525  {  {
1526  for (;;)  for (;;)
1527    {    {
# Line 1478  for (;;) Line 1532  for (;;)
1532      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
1533      if (!skipassert) return code;      if (!skipassert) return code;
1534      do code += GET(code, 1); while (*code == OP_ALT);      do code += GET(code, 1); while (*code == OP_ALT);
1535      code += _pcre_OP_lengths[*code];      code += PRIV(OP_lengths)[*code];
1536      break;      break;
1537    
1538      case OP_WORD_BOUNDARY:      case OP_WORD_BOUNDARY:
# Line 1492  for (;;) Line 1546  for (;;)
1546      case OP_RREF:      case OP_RREF:
1547      case OP_NRREF:      case OP_NRREF:
1548      case OP_DEF:      case OP_DEF:
1549      code += _pcre_OP_lengths[*code];      code += PRIV(OP_lengths)[*code];
1550      break;      break;
1551    
1552      default:      default:
# Line 1528  Arguments: Line 1582  Arguments:
1582    
1583  Returns:   the fixed length,  Returns:   the fixed length,
1584               or -1 if there is no fixed length,               or -1 if there is no fixed length,
1585               or -2 if \C was encountered               or -2 if \C was encountered (in UTF-8 mode only)
1586               or -3 if an OP_RECURSE item was encountered and atend is FALSE               or -3 if an OP_RECURSE item was encountered and atend is FALSE
1587               or -4 if an unknown opcode was encountered (internal error)               or -4 if an unknown opcode was encountered (internal error)
1588  */  */
1589    
1590  static int  static int
1591  find_fixedlength(uschar *code, BOOL utf8, BOOL atend, compile_data *cd)  find_fixedlength(pcre_uchar *code, BOOL utf8, BOOL atend, compile_data *cd)
1592  {  {
1593  int length = -1;  int length = -1;
1594    
1595  register int branchlength = 0;  register int branchlength = 0;
1596  register uschar *cc = code + 1 + LINK_SIZE;  register pcre_uchar *cc = code + 1 + LINK_SIZE;
1597    
1598  /* Scan along the opcodes for this branch. If we get to the end of the  /* Scan along the opcodes for this branch. If we get to the end of the
1599  branch, check the length against that of the other branches. */  branch, check the length against that of the other branches. */
# Line 1547  branch, check the length against that of Line 1601  branch, check the length against that of
1601  for (;;)  for (;;)
1602    {    {
1603    int d;    int d;
1604    uschar *ce, *cs;    pcre_uchar *ce, *cs;
1605    register int op = *cc;    register int op = *cc;
1606    switch (op)    switch (op)
1607      {      {
# Line 1561  for (;;) Line 1615  for (;;)
1615      case OP_ONCE:      case OP_ONCE:
1616      case OP_ONCE_NC:      case OP_ONCE_NC:
1617      case OP_COND:      case OP_COND:
1618      d = find_fixedlength(cc + ((op == OP_CBRA)? 2:0), utf8, atend, cd);      d = find_fixedlength(cc + ((op == OP_CBRA)? IMM2_SIZE : 0), utf8, atend, cd);
1619      if (d < 0) return d;      if (d < 0) return d;
1620      branchlength += d;      branchlength += d;
1621      do cc += GET(cc, 1); while (*cc == OP_ALT);      do cc += GET(cc, 1); while (*cc == OP_ALT);
# Line 1592  for (;;) Line 1646  for (;;)
1646    
1647      case OP_RECURSE:      case OP_RECURSE:
1648      if (!atend) return -3;      if (!atend) return -3;
1649      cs = ce = (uschar *)cd->start_code + GET(cc, 1);  /* Start subpattern */      cs = ce = (pcre_uchar *)cd->start_code + GET(cc, 1);  /* Start subpattern */
1650      do ce += GET(ce, 1); while (*ce == OP_ALT);       /* End subpattern */      do ce += GET(ce, 1); while (*ce == OP_ALT);           /* End subpattern */
1651      if (cc > cs && cc < ce) return -1;                /* Recursion */      if (cc > cs && cc < ce) return -1;                    /* Recursion */
1652      d = find_fixedlength(cs + 2, utf8, atend, cd);      d = find_fixedlength(cs + 2, utf8, atend, cd);
1653      if (d < 0) return d;      if (d < 0) return d;
1654      branchlength += d;      branchlength += d;
# Line 1616  for (;;) Line 1670  for (;;)
1670      case OP_PRUNE_ARG:      case OP_PRUNE_ARG:
1671      case OP_SKIP_ARG:      case OP_SKIP_ARG:
1672      case OP_THEN_ARG:      case OP_THEN_ARG:
1673      cc += cc[1] + _pcre_OP_lengths[*cc];      cc += cc[1] + PRIV(OP_lengths)[*cc];
1674      break;      break;
1675    
1676      case OP_CALLOUT:      case OP_CALLOUT:
# Line 1643  for (;;) Line 1697  for (;;)
1697      case OP_SOM:      case OP_SOM:
1698      case OP_THEN:      case OP_THEN:
1699      case OP_WORD_BOUNDARY:      case OP_WORD_BOUNDARY:
1700      cc += _pcre_OP_lengths[*cc];      cc += PRIV(OP_lengths)[*cc];
1701      break;      break;
1702    
1703      /* Handle literal characters */      /* Handle literal characters */
# Line 1655  for (;;) Line 1709  for (;;)
1709      branchlength++;      branchlength++;
1710      cc += 2;      cc += 2;
1711  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1712      if (utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f];      if (utf8 && cc[-1] >= 0xc0) cc += PRIV(utf8_table4)[cc[-1] & 0x3f];
1713  #endif  #endif
1714      break;      break;
1715    
# Line 1667  for (;;) Line 1721  for (;;)
1721      case OP_NOTEXACT:      case OP_NOTEXACT:
1722      case OP_NOTEXACTI:      case OP_NOTEXACTI:
1723      branchlength += GET2(cc,1);      branchlength += GET2(cc,1);
1724      cc += 4;      cc += 2 + IMM2_SIZE;
1725  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1726      if (utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f];      if (utf8 && cc[-1] >= 0xc0) cc += PRIV(utf8_table4)[cc[-1] & 0x3f];
1727  #endif  #endif
1728      break;      break;
1729    
1730      case OP_TYPEEXACT:      case OP_TYPEEXACT:
1731      branchlength += GET2(cc,1);      branchlength += GET2(cc,1);
1732      if (cc[3] == OP_PROP || cc[3] == OP_NOTPROP) cc += 2;      if (cc[1 + IMM2_SIZE] == OP_PROP || cc[1 + IMM2_SIZE] == OP_NOTPROP) cc += 2;
1733      cc += 4;      cc += 1 + IMM2_SIZE + 1;
1734      break;      break;
1735    
1736      /* Handle single-char matchers */      /* Handle single-char matchers */
# Line 1702  for (;;) Line 1756  for (;;)
1756      cc++;      cc++;
1757      break;      break;
1758    
1759      /* The single-byte matcher isn't allowed */      /* The single-byte matcher isn't allowed. This only happens in UTF-8 mode;
1760        otherwise \C is coded as OP_ALLANY. */
1761    
1762      case OP_ANYBYTE:      case OP_ANYBYTE:
1763      return -2;      return -2;
1764    
1765      /* Check a class for variable quantification */      /* Check a class for variable quantification */
1766    
1767  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || defined COMPILE_PCRE16
1768      case OP_XCLASS:      case OP_XCLASS:
1769      cc += GET(cc, 1) - 33;      cc += GET(cc, 1) - PRIV(OP_lengths)[OP_CLASS];
1770      /* Fall through */      /* Fall through */
1771  #endif  #endif
1772    
1773      case OP_CLASS:      case OP_CLASS:
1774      case OP_NCLASS:      case OP_NCLASS:
1775      cc += 33;      cc += PRIV(OP_lengths)[OP_CLASS];
1776    
1777      switch (*cc)      switch (*cc)
1778        {        {
# Line 1731  for (;;) Line 1786  for (;;)
1786    
1787        case OP_CRRANGE:        case OP_CRRANGE:
1788        case OP_CRMINRANGE:        case OP_CRMINRANGE:
1789        if (GET2(cc,1) != GET2(cc,3)) return -1;        if (GET2(cc,1) != GET2(cc,1+IMM2_SIZE)) return -1;
1790        branchlength += GET2(cc,1);        branchlength += GET2(cc,1);
1791        cc += 5;        cc += 1 + 2 * IMM2_SIZE;
1792        break;        break;
1793    
1794        default:        default:
# Line 1854  Arguments: Line 1909  Arguments:
1909  Returns:      pointer to the opcode for the bracket, or NULL if not found  Returns:      pointer to the opcode for the bracket, or NULL if not found
1910  */  */
1911    
1912  const uschar *  const pcre_uchar *
1913  _pcre_find_bracket(const uschar *code, BOOL utf8, int number)  PRIV(find_bracket)(const pcre_uchar *code, BOOL utf8, int number)
1914  {  {
1915  for (;;)  for (;;)
1916    {    {
# Line 1873  for (;;) Line 1928  for (;;)
1928    
1929    else if (c == OP_REVERSE)    else if (c == OP_REVERSE)
1930      {      {
1931      if (number < 0) return (uschar *)code;      if (number < 0) return (pcre_uchar *)code;
1932      code += _pcre_OP_lengths[c];      code += PRIV(OP_lengths)[c];
1933      }      }
1934    
1935    /* Handle capturing bracket */    /* Handle capturing bracket */
# Line 1883  for (;;) Line 1938  for (;;)
1938             c == OP_CBRAPOS || c == OP_SCBRAPOS)             c == OP_CBRAPOS || c == OP_SCBRAPOS)
1939      {      {
1940      int n = GET2(code, 1+LINK_SIZE);      int n = GET2(code, 1+LINK_SIZE);
1941      if (n == number) return (uschar *)code;      if (n == number) return (pcre_uchar *)code;
1942      code += _pcre_OP_lengths[c];      code += PRIV(OP_lengths)[c];
1943      }      }
1944    
1945    /* Otherwise, we can get the item's length from the table, except that for    /* Otherwise, we can get the item's length from the table, except that for
# Line 1912  for (;;) Line 1967  for (;;)
1967        case OP_TYPEMINUPTO:        case OP_TYPEMINUPTO:
1968        case OP_TYPEEXACT:        case OP_TYPEEXACT:
1969        case OP_TYPEPOSUPTO:        case OP_TYPEPOSUPTO:
1970        if (code[3] == OP_PROP || code[3] == OP_NOTPROP) code += 2;        if (code[1 + IMM2_SIZE] == OP_PROP
1971            || code[1 + IMM2_SIZE] == OP_NOTPROP) code += 2;
1972        break;        break;
1973    
1974        case OP_MARK:        case OP_MARK:
# Line 1928  for (;;) Line 1984  for (;;)
1984    
1985      /* Add in the fixed length from the table */      /* Add in the fixed length from the table */
1986    
1987      code += _pcre_OP_lengths[c];      code += PRIV(OP_lengths)[c];
1988    
1989    /* In UTF-8 mode, opcodes that are followed by a character may be followed by    /* In UTF-8 mode, opcodes that are followed by a character may be followed by
1990    a multi-byte character. The length in the table is a minimum, so we have to    a multi-byte character. The length in the table is a minimum, so we have to
# Line 1965  for (;;) Line 2021  for (;;)
2021        case OP_MINQUERYI:        case OP_MINQUERYI:
2022        case OP_POSQUERY:        case OP_POSQUERY:
2023        case OP_POSQUERYI:        case OP_POSQUERYI:
2024        if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];        if (code[-1] >= 0xc0) code += PRIV(utf8_table4)[code[-1] & 0x3f];
2025        break;        break;
2026        }        }
2027  #else  #else
# Line 1991  Arguments: Line 2047  Arguments:
2047  Returns:      pointer to the opcode for OP_RECURSE, or NULL if not found  Returns:      pointer to the opcode for OP_RECURSE, or NULL if not found
2048  */  */
2049    
2050  static const uschar *  static const pcre_uchar *
2051  find_recurse(const uschar *code, BOOL utf8)  find_recurse(const pcre_uchar *code, BOOL utf8)
2052  {  {
2053  for (;;)  for (;;)
2054    {    {
# Line 2031  for (;;) Line 2087  for (;;)
2087        case OP_TYPEUPTO:        case OP_TYPEUPTO:
2088        case OP_TYPEMINUPTO:        case OP_TYPEMINUPTO:
2089        case OP_TYPEEXACT:        case OP_TYPEEXACT:
2090        if (code[3] == OP_PROP || code[3] == OP_NOTPROP) code += 2;        if (code[1 + IMM2_SIZE] == OP_PROP
2091            || code[1 + IMM2_SIZE] == OP_NOTPROP) code += 2;
2092        break;        break;
2093    
2094        case OP_MARK:        case OP_MARK:
# Line 2047  for (;;) Line 2104  for (;;)
2104    
2105      /* Add in the fixed length from the table */      /* Add in the fixed length from the table */
2106    
2107      code += _pcre_OP_lengths[c];      code += PRIV(OP_lengths)[c];
2108    
2109      /* In UTF-8 mode, opcodes that are followed by a character may be followed      /* In UTF-8 mode, opcodes that are followed by a character may be followed
2110      by a multi-byte character. The length in the table is a minimum, so we have      by a multi-byte character. The length in the table is a minimum, so we have
# Line 2084  for (;;) Line 2141  for (;;)
2141        case OP_MINQUERYI:        case OP_MINQUERYI:
2142        case OP_POSQUERY:        case OP_POSQUERY:
2143        case OP_POSQUERYI:        case OP_POSQUERYI:
2144        if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];        if (code[-1] >= 0xc0) code += PRIV(utf8_table4)[code[-1] & 0x3f];
2145        break;        break;
2146        }        }
2147  #else  #else
# Line 2118  Returns:      TRUE if what is matched co Line 2175  Returns:      TRUE if what is matched co
2175  */  */
2176    
2177  static BOOL  static BOOL
2178  could_be_empty_branch(const uschar *code, const uschar *endcode, BOOL utf8,  could_be_empty_branch(const pcre_uchar *code, const pcre_uchar *endcode,
2179    compile_data *cd)    BOOL utf8, compile_data *cd)
2180  {  {
2181  register int c;  register int c;
2182  for (code = first_significant_code(code + _pcre_OP_lengths[*code], TRUE);  for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
2183       code < endcode;       code < endcode;
2184       code = first_significant_code(code + _pcre_OP_lengths[c], TRUE))       code = first_significant_code(code + PRIV(OP_lengths)[c], TRUE))
2185    {    {
2186    const uschar *ccode;    const pcre_uchar *ccode;
2187    
2188    c = *code;    c = *code;
2189    
# Line 2149  for (code = first_significant_code(code Line 2206  for (code = first_significant_code(code
2206    
2207    if (c == OP_RECURSE)    if (c == OP_RECURSE)
2208      {      {
2209      const uschar *scode;      const pcre_uchar *scode;
2210      BOOL empty_branch;      BOOL empty_branch;
2211    
2212      /* Test for forward reference */      /* Test for forward reference */
# Line 2185  for (code = first_significant_code(code Line 2242  for (code = first_significant_code(code
2242    if (c == OP_BRAZERO || c == OP_BRAMINZERO || c == OP_SKIPZERO ||    if (c == OP_BRAZERO || c == OP_BRAMINZERO || c == OP_SKIPZERO ||
2243        c == OP_BRAPOSZERO)        c == OP_BRAPOSZERO)
2244      {      {
2245      code += _pcre_OP_lengths[c];      code += PRIV(OP_lengths)[c];
2246      do code += GET(code, 1); while (*code == OP_ALT);      do code += GET(code, 1); while (*code == OP_ALT);
2247      c = *code;      c = *code;
2248      continue;      continue;
# Line 2241  for (code = first_significant_code(code Line 2298  for (code = first_significant_code(code
2298      {      {
2299      /* Check for quantifiers after a class. XCLASS is used for classes that      /* Check for quantifiers after a class. XCLASS is used for classes that
2300      cannot be represented just by a bit map. This includes negated single      cannot be represented just by a bit map. This includes negated single
2301      high-valued characters. The length in _pcre_OP_lengths[] is zero; the      high-valued characters. The length in PRIV(OP_lengths)[] is zero; the
2302      actual length is stored in the compiled code, so we must update "code"      actual length is stored in the compiled code, so we must update "code"
2303      here. */      here. */
2304    
# Line 2253  for (code = first_significant_code(code Line 2310  for (code = first_significant_code(code
2310    
2311      case OP_CLASS:      case OP_CLASS:
2312      case OP_NCLASS:      case OP_NCLASS:
2313      ccode = code + 33;      ccode = code + PRIV(OP_lengths)[OP_CLASS];
2314    
2315  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2316      CHECK_CLASS_REPEAT:      CHECK_CLASS_REPEAT:
# Line 2328  for (code = first_significant_code(code Line 2385  for (code = first_significant_code(code
2385      case OP_TYPEUPTO:      case OP_TYPEUPTO:
2386      case OP_TYPEMINUPTO:      case OP_TYPEMINUPTO:
2387      case OP_TYPEPOSUPTO:      case OP_TYPEPOSUPTO:
2388      if (code[3] == OP_PROP || code[3] == OP_NOTPROP) code += 2;      if (code[1 + IMM2_SIZE] == OP_PROP
2389          || code[1 + IMM2_SIZE] == OP_NOTPROP) code += 2;
2390      break;      break;
2391    
2392      /* End of branch */      /* End of branch */
# Line 2356  for (code = first_significant_code(code Line 2414  for (code = first_significant_code(code
2414      case OP_MINQUERYI:      case OP_MINQUERYI:
2415      case OP_POSQUERY:      case OP_POSQUERY:
2416      case OP_POSQUERYI:      case OP_POSQUERYI:
2417      if (utf8 && code[1] >= 0xc0) code += _pcre_utf8_table4[code[1] & 0x3f];      if (utf8 && code[1] >= 0xc0) code += PRIV(utf8_table4)[code[1] & 0x3f];
2418      break;      break;
2419    
2420      case OP_UPTO:      case OP_UPTO:
# Line 2365  for (code = first_significant_code(code Line 2423  for (code = first_significant_code(code
2423      case OP_MINUPTOI:      case OP_MINUPTOI:
2424      case OP_POSUPTO:      case OP_POSUPTO:
2425      case OP_POSUPTOI:      case OP_POSUPTOI:
2426      if (utf8 && code[3] >= 0xc0) code += _pcre_utf8_table4[code[3] & 0x3f];      if (utf8 && code[1 + IMM2_SIZE] >= 0xc0) code += PRIV(utf8_table4)[code[1 + IMM2_SIZE] & 0x3f];
2427      break;      break;
2428  #endif  #endif
2429    
# Line 2416  Returns:      TRUE if what is matched co Line 2474  Returns:      TRUE if what is matched co
2474  */  */
2475    
2476  static BOOL  static BOOL
2477  could_be_empty(const uschar *code, const uschar *endcode, branch_chain *bcptr,  could_be_empty(const pcre_uchar *code, const pcre_uchar *endcode,
2478    BOOL utf8, compile_data *cd)    branch_chain *bcptr, BOOL utf8, compile_data *cd)
2479  {  {
2480  while (bcptr != NULL && bcptr->current_branch >= code)  while (bcptr != NULL && bcptr->current_branch >= code)
2481    {    {
# Line 2473  Returns:   TRUE or FALSE Line 2531  Returns:   TRUE or FALSE
2531  */  */
2532    
2533  static BOOL  static BOOL
2534  check_posix_syntax(const uschar *ptr, const uschar **endptr)  check_posix_syntax(const pcre_uchar *ptr, const pcre_uchar **endptr)
2535  {  {
2536  int terminator;          /* Don't combine these lines; the Solaris cc */  int terminator;          /* Don't combine these lines; the Solaris cc */
2537  terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */  terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */
# Line 2517  Returns:     a value representing the na Line 2575  Returns:     a value representing the na
2575  */  */
2576    
2577  static int  static int
2578  check_posix_name(const uschar *ptr, int len)  check_posix_name(const pcre_uchar *ptr, int len)
2579  {  {
2580  const char *pn = posix_names;  const char *pn = posix_names;
2581  register int yield = 0;  register int yield = 0;
2582  while (posix_name_lengths[yield] != 0)  while (posix_name_lengths[yield] != 0)
2583    {    {
2584    if (len == posix_name_lengths[yield] &&    if (len == posix_name_lengths[yield] &&
2585      strncmp((const char *)ptr, pn, len) == 0) return yield;      STRNCMP_UC_C8(ptr, pn, len) == 0) return yield;
2586    pn += posix_name_lengths[yield] + 1;    pn += posix_name_lengths[yield] + 1;
2587    yield++;    yield++;
2588    }    }
# Line 2564  Returns:     nothing Line 2622  Returns:     nothing
2622  */  */
2623    
2624  static void  static void
2625  adjust_recurse(uschar *group, int adjust, BOOL utf8, compile_data *cd,  adjust_recurse(pcre_uchar *group, int adjust, BOOL utf8, compile_data *cd,
2626    uschar *save_hwm)    pcre_uchar *save_hwm)
2627  {  {
2628  uschar *ptr = group;  pcre_uchar *ptr = group;
2629    
2630  while ((ptr = (uschar *)find_recurse(ptr, utf8)) != NULL)  while ((ptr = (pcre_uchar *)find_recurse(ptr, utf8)) != NULL)
2631    {    {
2632    int offset;    int offset;
2633    uschar *hc;    pcre_uchar *hc;
2634    
2635    /* See if this recursion is on the forward reference list. If so, adjust the    /* See if this recursion is on the forward reference list. If so, adjust the
2636    reference. */    reference. */
# Line 2617  Arguments: Line 2675  Arguments:
2675  Returns:         new code pointer  Returns:         new code pointer
2676  */  */
2677    
2678  static uschar *  static pcre_uchar *
2679  auto_callout(uschar *code, const uschar *ptr, compile_data *cd)  auto_callout(pcre_uchar *code, const pcre_uchar *ptr, compile_data *cd)
2680  {  {
2681  *code++ = OP_CALLOUT;  *code++ = OP_CALLOUT;
2682  *code++ = 255;  *code++ = 255;
2683  PUT(code, 0, (int)(ptr - cd->start_pattern));  /* Pattern offset */  PUT(code, 0, (int)(ptr - cd->start_pattern));  /* Pattern offset */
2684  PUT(code, LINK_SIZE, 0);                       /* Default length */  PUT(code, LINK_SIZE, 0);                       /* Default length */
2685  return code + 2*LINK_SIZE;  return code + 2 * LINK_SIZE;
2686  }  }
2687    
2688    
# Line 2646  Returns:             nothing Line 2704  Returns:             nothing
2704  */  */
2705    
2706  static void  static void
2707  complete_callout(uschar *previous_callout, const uschar *ptr, compile_data *cd)  complete_callout(pcre_uchar *previous_callout, const pcre_uchar *ptr, compile_data *cd)
2708  {  {
2709  int length = (int)(ptr - cd->start_pattern - GET(previous_callout, 2));  int length = (int)(ptr - cd->start_pattern - GET(previous_callout, 2));
2710  PUT(previous_callout, 2 + LINK_SIZE, length);  PUT(previous_callout, 2 + LINK_SIZE, length);
# Line 2729  switch(ptype) Line 2787  switch(ptype)
2787            prop->chartype == ucp_Lt) == negated;            prop->chartype == ucp_Lt) == negated;
2788    
2789    case PT_GC:    case PT_GC:
2790    return (pdata == _pcre_ucp_gentype[prop->chartype]) == negated;    return (pdata == PRIV(ucp_gentype)[prop->chartype]) == negated;
2791    
2792    case PT_PC:    case PT_PC:
2793    return (pdata == prop->chartype) == negated;    return (pdata == prop->chartype) == negated;
# Line 2740  switch(ptype) Line 2798  switch(ptype)
2798    /* These are specials */    /* These are specials */
2799    
2800    case PT_ALNUM:    case PT_ALNUM:
2801    return (_pcre_ucp_gentype[prop->chartype] == ucp_L ||    return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2802            _pcre_ucp_gentype[prop->chartype] == ucp_N) == negated;            PRIV(ucp_gentype)[prop->chartype] == ucp_N) == negated;
2803    
2804    case PT_SPACE:    /* Perl space */    case PT_SPACE:    /* Perl space */
2805    return (_pcre_ucp_gentype[prop->chartype] == ucp_Z ||    return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
2806            c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)            c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2807            == negated;            == negated;
2808    
2809    case PT_PXSPACE:  /* POSIX space */    case PT_PXSPACE:  /* POSIX space */
2810    return (_pcre_ucp_gentype[prop->chartype] == ucp_Z ||    return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
2811            c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||            c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2812            c == CHAR_FF || c == CHAR_CR)            c == CHAR_FF || c == CHAR_CR)
2813            == negated;            == negated;
2814    
2815    case PT_WORD:    case PT_WORD:
2816    return (_pcre_ucp_gentype[prop->chartype] == ucp_L ||    return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2817            _pcre_ucp_gentype[prop->chartype] == ucp_N ||            PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
2818            c == CHAR_UNDERSCORE) == negated;            c == CHAR_UNDERSCORE) == negated;
2819    }    }
2820  return FALSE;  return FALSE;
# Line 2784  Returns:        TRUE if possessifying is Line 2842  Returns:        TRUE if possessifying is
2842  */  */
2843    
2844  static BOOL  static BOOL
2845  check_auto_possessive(const uschar *previous, BOOL utf8, const uschar *ptr,  check_auto_possessive(const pcre_uchar *previous, BOOL utf8,
2846    int options, compile_data *cd)    const pcre_uchar *ptr, int options, compile_data *cd)
2847  {  {
2848  int c, next;  int c, next;
2849  int op_code = *previous++;  int op_code = *previous++;
# Line 2860  if ((options & PCRE_EXTENDED) != 0) Line 2918  if ((options & PCRE_EXTENDED) != 0)
2918  /* If the next thing is itself optional, we have to give up. */  /* If the next thing is itself optional, we have to give up. */
2919    
2920  if (*ptr == CHAR_ASTERISK || *ptr == CHAR_QUESTION_MARK ||  if (*ptr == CHAR_ASTERISK || *ptr == CHAR_QUESTION_MARK ||
2921    strncmp((char *)ptr, STR_LEFT_CURLY_BRACKET STR_0 STR_COMMA, 3) == 0)    STRNCMP_UC_C8(ptr, STR_LEFT_CURLY_BRACKET STR_0 STR_COMMA, 3) == 0)
2922      return FALSE;      return FALSE;
2923    
2924  /* Now compare the next item with the previous opcode. First, handle cases when  /* Now compare the next item with the previous opcode. First, handle cases when
# Line 3122  switch(op_code) Line 3180  switch(op_code)
3180        to the original \d etc. At this point, ptr will point to a zero byte. */        to the original \d etc. At this point, ptr will point to a zero byte. */
3181    
3182        if (*ptr == CHAR_ASTERISK || *ptr == CHAR_QUESTION_MARK ||        if (*ptr == CHAR_ASTERISK || *ptr == CHAR_QUESTION_MARK ||
3183          strncmp((char *)ptr, STR_LEFT_CURLY_BRACKET STR_0 STR_COMMA, 3) == 0)          STRNCMP_UC_C8(ptr, STR_LEFT_CURLY_BRACKET STR_0 STR_COMMA, 3) == 0)
3184            return FALSE;            return FALSE;
3185    
3186        /* Do the property check. */        /* Do the property check. */
# Line 3213  Returns:         TRUE on success Line 3271  Returns:         TRUE on success
3271  */  */
3272    
3273  static BOOL  static BOOL
3274  compile_branch(int *optionsptr, uschar **codeptr, const uschar **ptrptr,  compile_branch(int *optionsptr, pcre_uchar **codeptr,
3275    int *errorcodeptr, int *firstbyteptr, int *reqbyteptr, branch_chain *bcptr,    const pcre_uchar **ptrptr, int *errorcodeptr, int *firstbyteptr,
3276    int cond_depth, compile_data *cd, int *lengthptr)    int *reqbyteptr, branch_chain *bcptr, int cond_depth, compile_data *cd,
3277      int *lengthptr)
3278  {  {
3279  int repeat_type, op_type;  int repeat_type, op_type;
3280  int repeat_min = 0, repeat_max = 0;      /* To please picky compilers */  int repeat_min = 0, repeat_max = 0;      /* To please picky compilers */
# Line 3228  int options = *optionsptr; Line 3287  int options = *optionsptr;
3287  int after_manual_callout = 0;  int after_manual_callout = 0;
3288  int length_prevgroup = 0;  int length_prevgroup = 0;
3289  register int c;  register int c;
3290  register uschar *code = *codeptr;  register pcre_uchar *code = *codeptr;
3291  uschar *last_code = code;  pcre_uchar *last_code = code;
3292  uschar *orig_code = code;  pcre_uchar *orig_code = code;
3293  uschar *tempcode;  pcre_uchar *tempcode;
3294  BOOL inescq = FALSE;  BOOL inescq = FALSE;
3295  BOOL groupsetfirstbyte = FALSE;  BOOL groupsetfirstbyte = FALSE;
3296  const uschar *ptr = *ptrptr;  const pcre_uchar *ptr = *ptrptr;
3297  const uschar *tempptr;  const pcre_uchar *tempptr;
3298  const uschar *nestptr = NULL;  const pcre_uchar *nestptr = NULL;
3299  uschar *previous = NULL;  pcre_uchar *previous = NULL;
3300  uschar *previous_callout = NULL;  pcre_uchar *previous_callout = NULL;
3301  uschar *save_hwm = NULL;  pcre_uchar *save_hwm = NULL;
3302  uschar classbits[32];  pcre_uint8 classbits[32];
3303    
3304  /* We can fish out the UTF-8 setting once and for all into a BOOL, but we  /* We can fish out the UTF-8 setting once and for all into a BOOL, but we
3305  must not do this for other options (e.g. PCRE_EXTENDED) because they may change  must not do this for other options (e.g. PCRE_EXTENDED) because they may change
3306  dynamically as we process the pattern. */  dynamically as we process the pattern. */
3307    
3308  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
 BOOL class_utf8;  
3309  BOOL utf8 = (options & PCRE_UTF8) != 0;  BOOL utf8 = (options & PCRE_UTF8) != 0;
3310  uschar *class_utf8data;  pcre_uint8 utf8_char[6];
 uschar *class_utf8data_base;  
 uschar utf8_char[6];  
3311  #else  #else
3312  BOOL utf8 = FALSE;  BOOL utf8 = FALSE;
3313  #endif  #endif
3314    
3315    /* Helper variables for OP_XCLASS opcode (for characters > 255). */
3316    
3317    #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3318    BOOL xclass;
3319    pcre_uchar *class_uchardata;
3320    pcre_uchar *class_uchardata_base;
3321    #endif
3322    
3323  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
3324  if (lengthptr != NULL) DPRINTF((">> start branch\n"));  if (lengthptr != NULL) DPRINTF((">> start branch\n"));
3325  #endif  #endif
# Line 3305  for (;; ptr++) Line 3369  for (;; ptr++)
3369    int terminator;    int terminator;
3370    int mclength;    int mclength;
3371    int tempbracount;    int tempbracount;
3372    uschar mcbuffer[8];    pcre_uchar mcbuffer[8];
3373    
3374    /* Get next byte in the pattern */    /* Get next byte in the pattern */
3375    
# Line 3352  for (;; ptr++) Line 3416  for (;; ptr++)
3416        }        }
3417    
3418      *lengthptr += (int)(code - last_code);      *lengthptr += (int)(code - last_code);
3419      DPRINTF(("length=%d added %d c=%c\n", *lengthptr, code - last_code, c));      DPRINTF(("length=%d added %d c=%c\n", *lengthptr, (int)(code - last_code),
3420          c));
3421    
3422      /* If "previous" is set and it is not at the start of the work space, move      /* If "previous" is set and it is not at the start of the work space, move
3423      it back to there, in order to avoid filling up the work space. Otherwise,      it back to there, in order to avoid filling up the work space. Otherwise,
# Line 3362  for (;; ptr++) Line 3427  for (;; ptr++)
3427        {        {
3428        if (previous > orig_code)        if (previous > orig_code)
3429          {          {
3430          memmove(orig_code, previous, code - previous);          memmove(orig_code, previous, IN_UCHARS(code - previous));
3431          code -= previous - orig_code;          code -= previous - orig_code;
3432          previous = orig_code;          previous = orig_code;
3433          }          }
# Line 3560  for (;; ptr++) Line 3625  for (;; ptr++)
3625          {          {
3626          if (ptr[1] == CHAR_E)          if (ptr[1] == CHAR_E)
3627            ptr++;            ptr++;
3628          else if (strncmp((const char *)ptr+1,          else if (STRNCMP_UC_C8(ptr + 1, STR_Q STR_BACKSLASH STR_E, 3) == 0)
                           STR_Q STR_BACKSLASH STR_E, 3) == 0)  
3629            ptr += 3;            ptr += 3;
3630          else          else
3631            break;            break;
# Line 3603  for (;; ptr++) Line 3667  for (;; ptr++)
3667      than 256), because in that case the compiled code doesn't use the bit map.      than 256), because in that case the compiled code doesn't use the bit map.
3668      */      */
3669    
3670      memset(classbits, 0, 32 * sizeof(uschar));      memset(classbits, 0, 32 * sizeof(pcre_uint8));
3671    
3672  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3673      class_utf8 = FALSE;                       /* No chars >= 256 */      xclass = FALSE;                           /* No chars >= 256 */
3674      class_utf8data = code + LINK_SIZE + 2;    /* For UTF-8 items */      class_uchardata = code + LINK_SIZE + 2;   /* For UTF-8 items */
3675      class_utf8data_base = class_utf8data;     /* For resetting in pass 1 */      class_uchardata_base = class_uchardata;   /* For resetting in pass 1 */
3676  #endif  #endif
3677    
3678      /* Process characters until ] is reached. By writing this as a "do" it      /* Process characters until ] is reached. By writing this as a "do" it
# Line 3617  for (;; ptr++) Line 3681  for (;; ptr++)
3681    
3682      if (c != 0) do      if (c != 0) do
3683        {        {
3684        const uschar *oldptr;        const pcre_uchar *oldptr;
3685    
3686  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
3687        if (utf8 && c > 127)        if (utf8 && c > 127)
3688          {                           /* Braces are required because the */          {                           /* Braces are required because the */
3689          GETCHARLEN(c, ptr, ptr);    /* macro generates multiple statements */          GETCHARLEN(c, ptr, ptr);    /* macro generates multiple statements */
3690          }          }
3691    #endif
3692    
3693        /* In the pre-compile phase, accumulate the length of any UTF-8 extra  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3694          /* In the pre-compile phase, accumulate the length of any extra
3695        data and reset the pointer. This is so that very large classes that        data and reset the pointer. This is so that very large classes that
3696        contain a zillion UTF-8 characters no longer overwrite the work space        contain a zillion > 255 characters no longer overwrite the work space
3697        (which is on the stack). */        (which is on the stack). */
3698    
3699        if (lengthptr != NULL)        if (lengthptr != NULL)
3700          {          {
3701          *lengthptr += class_utf8data - class_utf8data_base;          *lengthptr += class_uchardata - class_uchardata_base;
3702          class_utf8data = class_utf8data_base;          class_uchardata = class_uchardata_base;
3703          }          }
   
3704  #endif  #endif
3705    
3706        /* Inside \Q...\E everything is literal except \E */        /* Inside \Q...\E everything is literal except \E */
# Line 3663  for (;; ptr++) Line 3728  for (;; ptr++)
3728          {          {
3729          BOOL local_negate = FALSE;          BOOL local_negate = FALSE;
3730          int posix_class, taboffset, tabopt;          int posix_class, taboffset, tabopt;
3731          register const uschar *cbits = cd->cbits;          register const pcre_uint8 *cbits = cd->cbits;
3732          uschar pbits[32];          pcre_uint8 pbits[32];
3733    
3734          if (ptr[1] != CHAR_COLON)          if (ptr[1] != CHAR_COLON)
3735            {            {
# Line 3719  for (;; ptr++) Line 3784  for (;; ptr++)
3784          /* Copy in the first table (always present) */          /* Copy in the first table (always present) */
3785    
3786          memcpy(pbits, cbits + posix_class_maps[posix_class],          memcpy(pbits, cbits + posix_class_maps[posix_class],
3787            32 * sizeof(uschar));            32 * sizeof(pcre_uint8));
3788    
3789          /* If there is a second table, add or remove it as required. */          /* If there is a second table, add or remove it as required. */
3790    
# Line 3781  for (;; ptr++) Line 3846  for (;; ptr++)
3846    
3847          if (c < 0)          if (c < 0)
3848            {            {
3849            register const uschar *cbits = cd->cbits;            register const pcre_uint8 *cbits = cd->cbits;
3850            class_charcount += 2;     /* Greater than 1 is what matters */            class_charcount += 2;     /* Greater than 1 is what matters */
3851    
3852            switch (-c)            switch (-c)
# Line 3836  for (;; ptr++) Line 3901  for (;; ptr++)
3901              SETBIT(classbits, 0x09); /* VT */              SETBIT(classbits, 0x09); /* VT */
3902              SETBIT(classbits, 0x20); /* SPACE */              SETBIT(classbits, 0x20); /* SPACE */
3903              SETBIT(classbits, 0xa0); /* NSBP */              SETBIT(classbits, 0xa0); /* NSBP */
3904  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3905              if (utf8)              if (utf8)
3906                {                {
3907                class_utf8 = TRUE;                xclass = TRUE;
3908                *class_utf8data++ = XCL_SINGLE;                *class_uchardata++ = XCL_SINGLE;
3909                class_utf8data += _pcre_ord2utf8(0x1680, class_utf8data);                class_uchardata += PRIV(ord2utf8)(0x1680, class_uchardata);
3910                *class_utf8data++ = XCL_SINGLE;                *class_uchardata++ = XCL_SINGLE;
3911                class_utf8data += _pcre_ord2utf8(0x180e, class_utf8data);                class_uchardata += PRIV(ord2utf8)(0x180e, class_uchardata);
3912                *class_utf8data++ = XCL_RANGE;                *class_uchardata++ = XCL_RANGE;
3913                class_utf8data += _pcre_ord2utf8(0x2000, class_utf8data);                class_uchardata += PRIV(ord2utf8)(0x2000, class_uchardata);
3914                class_utf8data += _pcre_ord2utf8(0x200A, class_utf8data);                class_uchardata += PRIV(ord2utf8)(0x200A, class_uchardata);
3915                *class_utf8data++ = XCL_SINGLE;                *class_uchardata++ = XCL_SINGLE;
3916                class_utf8data += _pcre_ord2utf8(0x202f, class_utf8data);                class_uchardata += PRIV(ord2utf8)(0x202f, class_uchardata);
3917                *class_utf8data++ = XCL_SINGLE;                *class_uchardata++ = XCL_SINGLE;
3918                class_utf8data += _pcre_ord2utf8(0x205f, class_utf8data);                class_uchardata += PRIV(ord2utf8)(0x205f, class_uchardata);
3919                *class_utf8data++ = XCL_SINGLE;                *class_uchardata++ = XCL_SINGLE;
3920                class_utf8data += _pcre_ord2utf8(0x3000, class_utf8data);                class_uchardata += PRIV(ord2utf8)(0x3000, class_uchardata);
3921                }                }
3922  #endif  #endif
3923              continue;              continue;
# Line 3871  for (;; ptr++) Line 3936  for (;; ptr++)
3936                classbits[c] |= x;                classbits[c] |= x;
3937                }                }
3938    
3939  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3940              if (utf8)              if (utf8)
3941                {                {
3942                class_utf8 = TRUE;                xclass = TRUE;
3943                *class_utf8data++ = XCL_RANGE;                *class_uchardata++ = XCL_RANGE;
3944                class_utf8data += _pcre_ord2utf8(0x0100, class_utf8data);                class_uchardata += PRIV(ord2utf8)(0x0100, class_uchardata);
3945                class_utf8data += _pcre_ord2utf8(0x167f, class_utf8data);                class_uchardata += PRIV(ord2utf8)(0x167f, class_uchardata);
3946                *class_utf8data++ = XCL_RANGE;                *class_uchardata++ = XCL_RANGE;
3947                class_utf8data += _pcre_ord2utf8(0x1681, class_utf8data);                class_uchardata += PRIV(ord2utf8)(0x1681, class_uchardata);
3948                class_utf8data += _pcre_ord2utf8(0x180d, class_utf8data);                class_uchardata += PRIV(ord2utf8)(0x180d, class_uchardata);
3949                *class_utf8data++ = XCL_RANGE;                *class_uchardata++ = XCL_RANGE;
3950                class_utf8data += _pcre_ord2utf8(0x180f, class_utf8data);                class_uchardata += PRIV(ord2utf8)(0x180f, class_uchardata);
3951                class_utf8data += _pcre_ord2utf8(0x1fff, class_utf8data);                class_uchardata += PRIV(ord2utf8)(0x1fff, class_uchardata);
3952                *class_utf8data++ = XCL_RANGE;                *class_uchardata++ = XCL_RANGE;
3953                class_utf8data += _pcre_ord2utf8(0x200B, class_utf8data);                class_uchardata += PRIV(ord2utf8)(0x200B, class_uchardata);
3954                class_utf8data += _pcre_ord2utf8(0x202e, class_utf8data);                class_uchardata += PRIV(ord2utf8)(0x202e, class_uchardata);
3955                *class_utf8data++ = XCL_RANGE;                *class_uchardata++ = XCL_RANGE;
3956                class_utf8data += _pcre_ord2utf8(0x2030, class_utf8data);                class_uchardata += PRIV(ord2utf8)(0x2030, class_uchardata);
3957                class_utf8data += _pcre_ord2utf8(0x205e, class_utf8data);                class_uchardata += PRIV(ord2utf8)(0x205e, class_uchardata);
3958                *class_utf8data++ = XCL_RANGE;                *class_uchardata++ = XCL_RANGE;
3959                class_utf8data += _pcre_ord2utf8(0x2060, class_utf8data);                class_uchardata += PRIV(ord2utf8)(0x2060, class_uchardata);
3960                class_utf8data += _pcre_ord2utf8(0x2fff, class_utf8data);                class_uchardata += PRIV(ord2utf8)(0x2fff, class_uchardata);
3961                *class_utf8data++ = XCL_RANGE;                *class_uchardata++ = XCL_RANGE;
3962                class_utf8data += _pcre_ord2utf8(0x3001, class_utf8data);                class_uchardata += PRIV(ord2utf8)(0x3001, class_uchardata);
3963                class_utf8data += _pcre_ord2utf8(0x7fffffff, class_utf8data);                class_uchardata += PRIV(ord2utf8)(0x7fffffff, class_uchardata);
3964                }                }
3965  #endif  #endif
3966              continue;              continue;
# Line 3906  for (;; ptr++) Line 3971  for (;; ptr++)
3971              SETBIT(classbits, 0x0c); /* FF */              SETBIT(classbits, 0x0c); /* FF */
3972              SETBIT(classbits, 0x0d); /* CR */              SETBIT(classbits, 0x0d); /* CR */
3973              SETBIT(classbits, 0x85); /* NEL */              SETBIT(classbits, 0x85); /* NEL */
3974  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3975              if (utf8)              if (utf8)
3976                {                {
3977                class_utf8 = TRUE;                xclass = TRUE;
3978                *class_utf8data++ = XCL_RANGE;                *class_uchardata++ = XCL_RANGE;
3979                class_utf8data += _pcre_ord2utf8(0x2028, class_utf8data);                class_uchardata += PRIV(ord2utf8)(0x2028, class_uchardata);
3980                class_utf8data += _pcre_ord2utf8(0x2029, class_utf8data);                class_uchardata += PRIV(ord2utf8)(0x2029, class_uchardata);
3981                }                }
3982  #endif  #endif
3983              continue;              continue;
# Line 3934  for (;; ptr++) Line 3999  for (;; ptr++)
3999                classbits[c] |= x;                classbits[c] |= x;
4000                }                }
4001    
4002  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
4003              if (utf8)              if (utf8)
4004                {                {
4005                class_utf8 = TRUE;                xclass = TRUE;
4006                *class_utf8data++ = XCL_RANGE;                *class_uchardata++ = XCL_RANGE;
4007                class_utf8data += _pcre_ord2utf8(0x0100, class_utf8data);                class_uchardata += PRIV(ord2utf8)(0x0100, class_uchardata);
4008                class_utf8data += _pcre_ord2utf8(0x2027, class_utf8data);                class_uchardata += PRIV(ord2utf8)(0x2027, class_uchardata);
4009                *class_utf8data++ = XCL_RANGE;                *class_uchardata++ = XCL_RANGE;
4010                class_utf8data += _pcre_ord2utf8(0x2029, class_utf8data);                class_uchardata += PRIV(ord2utf8)(0x2029, class_uchardata);
4011                class_utf8data += _pcre_ord2utf8(0x7fffffff, class_utf8data);                class_uchardata += PRIV(ord2utf8)(0x7fffffff, class_uchardata);
4012                }                }
4013  #endif  #endif
4014              continue;              continue;
# Line 3956  for (;; ptr++) Line 4021  for (;; ptr++)
4021                int pdata;                int pdata;
4022                int ptype = get_ucp(&ptr, &negated, &pdata, errorcodeptr);                int ptype = get_ucp(&ptr, &negated, &pdata, errorcodeptr);
4023                if (ptype < 0) goto FAILED;                if (ptype < 0) goto FAILED;
4024                class_utf8 = TRUE;                xclass = TRUE;
4025                *class_utf8data++ = ((-c == ESC_p) != negated)?                *class_uchardata++ = ((-c == ESC_p) != negated)?
4026                  XCL_PROP : XCL_NOTPROP;                  XCL_PROP : XCL_NOTPROP;
4027                *class_utf8data++ = ptype;                *class_uchardata++ = ptype;
4028                *class_utf8data++ = pdata;                *class_uchardata++ = pdata;
4029                class_charcount -= 2;   /* Not a < 256 character */                class_charcount -= 2;   /* Not a < 256 character */
4030                continue;                continue;
4031                }                }
# Line 3982  for (;; ptr++) Line 4047  for (;; ptr++)
4047            }            }
4048    
4049          /* Fall through if we have a single character (c >= 0). This may be          /* Fall through if we have a single character (c >= 0). This may be
4050          greater than 256 in UTF-8 mode. */          greater than 256 mode. */
4051    
4052          }   /* End of backslash handling */          }   /* End of backslash handling */
4053    
# Line 4080  for (;; ptr++) Line 4145  for (;; ptr++)
4145          matching for characters > 127 is available only if UCP support is          matching for characters > 127 is available only if UCP support is
4146          available. */          available. */
4147    
4148  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
4149          if (utf8 && (d > 255 || ((options & PCRE_CASELESS) != 0 && d > 127)))          if (utf8 && (d > 255 || ((options & PCRE_CASELESS) != 0 && d > 127)))
4150    #endif
4151    #ifndef COMPILE_PCRE8
4152            if (d > 255)
4153    #endif
4154    #if defined SUPPORT_UTF || defined COMPILE_PCRE16
4155            {            {
4156            class_utf8 = TRUE;            xclass = TRUE;
4157    
4158            /* With UCP support, we can find the other case equivalents of            /* With UCP support, we can find the other case equivalents of
4159            the relevant characters. There may be several ranges. Optimize how            the relevant characters. There may be several ranges. Optimize how
# Line 4116  for (;; ptr++) Line 4186  for (;; ptr++)
4186    
4187                if (occ == ocd)                if (occ == ocd)
4188                  {                  {
4189                  *class_utf8data++ = XCL_SINGLE;                  *class_uchardata++ = XCL_SINGLE;
4190                  }                  }
4191                else                else
4192                  {                  {
4193                  *class_utf8data++ = XCL_RANGE;                  *class_uchardata++ = XCL_RANGE;
4194                  class_utf8data += _pcre_ord2utf8(occ, class_utf8data);                  class_uchardata += PRIV(ord2utf8)(occ, class_uchardata);
4195                  }                  }
4196                class_utf8data += _pcre_ord2utf8(ocd, class_utf8data);                class_uchardata += PRIV(ord2utf8)(ocd, class_uchardata);
4197                }                }
4198              }              }
4199  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
# Line 4131  for (;; ptr++) Line 4201  for (;; ptr++)
4201            /* Now record the original range, possibly modified for UCP caseless            /* Now record the original range, possibly modified for UCP caseless
4202            overlapping ranges. */            overlapping ranges. */
4203    
4204            *class_utf8data++ = XCL_RANGE;            *class_uchardata++ = XCL_RANGE;
4205            class_utf8data += _pcre_ord2utf8(c, class_utf8data);  #ifdef SUPPORT_UTF
4206            class_utf8data += _pcre_ord2utf8(d, class_utf8data);            class_uchardata += PRIV(ord2utf8)(c, class_uchardata);
4207              class_uchardata += PRIV(ord2utf8)(d, class_uchardata);
4208    #else
4209              *class_uchardata++ = c;
4210              *class_uchardata++ = d;
4211    #endif
4212    
4213            /* With UCP support, we are done. Without UCP support, there is no            /* With UCP support, we are done. Without UCP support, there is no
4214            caseless matching for UTF-8 characters > 127; we can use the bit map            caseless matching for UTF characters > 127; we can use the bit map
4215            for the smaller ones. */            for the smaller ones. As for 16 bit characters without UTF, we
4216              can still use  */
4217    
4218  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
4219            continue;    /* With next character in the class */            continue;    /* With next character in the class */
4220  #else  #else
4221    #ifdef SUPPORT_UTF
4222            if ((options & PCRE_CASELESS) == 0 || c > 127) continue;            if ((options & PCRE_CASELESS) == 0 || c > 127) continue;
   
4223            /* Adjust upper limit and fall through to set up the map */            /* Adjust upper limit and fall through to set up the map */
   
4224            d = 127;            d = 127;
4225    #else
4226              if (c > 255) continue;
4227              /* Adjust upper limit and fall through to set up the map */
4228              d = 255;
4229    #endif  /* SUPPORT_UTF */
4230  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
4231            }            }
4232  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 || COMPILE_PCRE16 */
4233    
4234          /* We use the bit map for all cases when not in UTF-8 mode; else          /* We use the bit map for 8 bit mode, or when the characters fall
4235          ranges that lie entirely within 0-127 when there is UCP support; else          partially or entirely to [0-255] ([0-127] for UCP) ranges. */
         for partial ranges without UCP support. */  
4236    
4237          class_charcount += d - c + 1;          class_charcount += d - c + 1;
4238          class_lastchar = d;          class_lastchar = d;
# Line 4182  for (;; ptr++) Line 4260  for (;; ptr++)
4260    
4261        /* Handle a character that cannot go in the bit map */        /* Handle a character that cannot go in the bit map */
4262    
4263  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
4264        if (utf8 && (c > 255 || ((options & PCRE_CASELESS) != 0 && c > 127)))        if (utf8 && (c > 255 || ((options & PCRE_CASELESS) != 0 && c > 127)))
4265    #endif
4266    #ifndef COMPILE_PCRE8
4267          if (c > 255)
4268    #endif
4269    #if defined SUPPORT_UTF || defined COMPILE_PCRE16
4270          {          {
4271          class_utf8 = TRUE;          xclass = TRUE;
4272          *class_utf8data++ = XCL_SINGLE;          *class_uchardata++ = XCL_SINGLE;
4273          class_utf8data += _pcre_ord2utf8(c, class_utf8data);  #ifdef SUPPORT_UTF
4274            class_uchardata += PRIV(ord2utf8)(c, class_uchardata);
4275    #else
4276            *class_uchardata++ = c;
4277    #endif
4278    
4279  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
4280          if ((options & PCRE_CASELESS) != 0)          if ((options & PCRE_CASELESS) != 0)
# Line 4195  for (;; ptr++) Line 4282  for (;; ptr++)
4282            unsigned int othercase;            unsigned int othercase;
4283            if ((othercase = UCD_OTHERCASE(c)) != c)            if ((othercase = UCD_OTHERCASE(c)) != c)
4284              {              {
4285              *class_utf8data++ = XCL_SINGLE;              *class_uchardata++ = XCL_SINGLE;
4286              class_utf8data += _pcre_ord2utf8(othercase, class_utf8data);              class_uchardata += PRIV(ord2utf8)(othercase, class_uchardata);
4287              }              }
4288            }            }
4289  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
# Line 4252  for (;; ptr++) Line 4339  for (;; ptr++)
4339      char if this item is first, whatever repeat count may follow. In the case      char if this item is first, whatever repeat count may follow. In the case
4340      of reqbyte, save the previous value for reinstating. */      of reqbyte, save the previous value for reinstating. */
4341    
4342  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
4343      if (class_charcount == 1 && !class_utf8 &&      if (class_charcount == 1 && !xclass &&
4344        (!utf8 || !negate_class || class_lastchar < 128))        (!utf8 || !negate_class || class_lastchar < 128))
4345  #else  #elif defined COMPILE_PCRE8
4346      if (class_charcount == 1)      if (class_charcount == 1)
4347    #else
4348        if (class_charcount == 1 && !xclass)
4349  #endif  #endif
4350        {        {
4351        zeroreqbyte = reqbyte;        zeroreqbyte = reqbyte;
# Line 4277  for (;; ptr++) Line 4366  for (;; ptr++)
4366    
4367  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
4368        if (utf8 && class_lastchar > 127)        if (utf8 && class_lastchar > 127)
4369          mclength = _pcre_ord2utf8(class_lastchar, mcbuffer);          mclength = PRIV(ord2utf8)(class_lastchar, mcbuffer);
4370        else        else
4371  #endif  #endif
4372          {          {
# Line 4304  for (;; ptr++) Line 4393  for (;; ptr++)
4393      be listed) there are no characters < 256, we can omit the bitmap in the      be listed) there are no characters < 256, we can omit the bitmap in the
4394      actual compiled code. */      actual compiled code. */
4395    
4396  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
4397      if (class_utf8 && (!should_flip_negation || (options & PCRE_UCP) != 0))      if (xclass && (!should_flip_negation || (options & PCRE_UCP) != 0))
4398    #endif
4399    #ifndef COMPILE_PCRE8
4400        if (xclass && !should_flip_negation)
4401    #endif
4402    #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4403        {        {
4404        *class_utf8data++ = XCL_END;    /* Marks the end of extra data */        *class_uchardata++ = XCL_END;    /* Marks the end of extra data */
4405        *code++ = OP_XCLASS;        *code++ = OP_XCLASS;
4406        code += LINK_SIZE;        code += LINK_SIZE;
4407        *code = negate_class? XCL_NOT : 0;        *code = negate_class? XCL_NOT:0;
4408    
4409        /* If the map is required, move up the extra data to make room for it;        /* If the map is required, move up the extra data to make room for it;
4410        otherwise just move the code pointer to the end of the extra data. */        otherwise just move the code pointer to the end of the extra data. */
# Line 4318  for (;; ptr++) Line 4412  for (;; ptr++)
4412        if (class_charcount > 0)        if (class_charcount > 0)
4413          {          {
4414          *code++ |= XCL_MAP;          *code++ |= XCL_MAP;
4415          memmove(code + 32, code, class_utf8data - code);          memmove(code + (32 / sizeof(pcre_uchar)), code,
4416              IN_UCHARS(class_uchardata - code));
4417          memcpy(code, classbits, 32);          memcpy(code, classbits, 32);
4418          code = class_utf8data + 32;          code = class_uchardata + (32 / sizeof(pcre_uchar));
4419          }          }
4420        else code = class_utf8data;        else code = class_uchardata;
4421    
4422        /* Now fill in the complete length of the item */        /* Now fill in the complete length of the item */
4423    
# Line 4338  for (;; ptr++) Line 4433  for (;; ptr++)
4433      negating it if necessary. */      negating it if necessary. */
4434    
4435      *code++ = (negate_class == should_flip_negation) ? OP_CLASS : OP_NCLASS;      *code++ = (negate_class == should_flip_negation) ? OP_CLASS : OP_NCLASS;
4436      if (negate_class)      if (lengthptr == NULL)    /* Save time in the pre-compile phase */
       {  
       if (lengthptr == NULL)    /* Save time in the pre-compile phase */  
         for (c = 0; c < 32; c++) code[c] = ~classbits[c];  
       }  
     else  
4437        {        {
4438          if (negate_class)
4439            for (c = 0; c < 32; c++) classbits[c] = ~classbits[c];
4440        memcpy(code, classbits, 32);        memcpy(code, classbits, 32);
4441        }        }
4442      code += 32;      code += 32 / sizeof(pcre_uchar);
4443      break;      break;
4444    
4445    
# Line 4427  for (;; ptr++) Line 4519  for (;; ptr++)
4519    
4520      if (*previous == OP_RECURSE)      if (*previous == OP_RECURSE)
4521        {        {
4522        memmove(previous + 1 + LINK_SIZE, previous, 1 + LINK_SIZE);        memmove(previous + 1 + LINK_SIZE, previous, IN_UCHARS(1 + LINK_SIZE));
4523        *previous = OP_ONCE;        *previous = OP_ONCE;
4524        PUT(previous, 1, 2 + 2*LINK_SIZE);        PUT(previous, 1, 2 + 2*LINK_SIZE);
4525        previous[2 + 2*LINK_SIZE] = OP_KET;        previous[2 + 2*LINK_SIZE] = OP_KET;
# Line 4466  for (;; ptr++) Line 4558  for (;; ptr++)
4558  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
4559        if (utf8 && (code[-1] & 0x80) != 0)        if (utf8 && (code[-1] & 0x80) != 0)
4560          {          {
4561          uschar *lastchar = code - 1;          pcre_uchar *lastchar = code - 1;
4562          while((*lastchar & 0xc0) == 0x80) lastchar--;          while((*lastchar & 0xc0) == 0x80) lastchar--;
4563          c = code - lastchar;            /* Length of UTF-8 character */          c = code - lastchar;            /* Length of UTF-8 character */
4564          memcpy(utf8_char, lastchar, c); /* Save the char */          memcpy(utf8_char, lastchar, c); /* Save the char */
# Line 4528  for (;; ptr++) Line 4620  for (;; ptr++)
4620    
4621      else if (*previous < OP_EODN)      else if (*previous < OP_EODN)
4622        {        {
4623        uschar *oldcode;        pcre_uchar *oldcode;
4624        int prop_type, prop_value;        int prop_type, prop_value;
4625        op_type = OP_TYPESTAR - OP_STAR;  /* Use type opcodes */        op_type = OP_TYPESTAR - OP_STAR;  /* Use type opcodes */
4626        c = *previous;        c = *previous;
# Line 4701  for (;; ptr++) Line 4793  for (;; ptr++)
4793    
4794      else if (*previous == OP_CLASS ||      else if (*previous == OP_CLASS ||
4795               *previous == OP_NCLASS ||               *previous == OP_NCLASS ||
4796  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF8 || !defined COMPILE_PCRE8
4797               *previous == OP_XCLASS ||               *previous == OP_XCLASS ||
4798  #endif  #endif
4799               *previous == OP_REF ||               *previous == OP_REF ||
# Line 4750  for (;; ptr++) Line 4842  for (;; ptr++)
4842        {        {
4843        register int i;        register int i;
4844        int len = (int)(code - previous);        int len = (int)(code - previous);
4845        uschar *bralink = NULL;        pcre_uchar *bralink = NULL;
4846        uschar *brazeroptr = NULL;        pcre_uchar *brazeroptr = NULL;
4847    
4848        /* Repeating a DEFINE group is pointless, but Perl allows the syntax, so        /* Repeating a DEFINE group is pointless, but Perl allows the syntax, so
4849        we just ignore the repeat. */        we just ignore the repeat. */
# Line 4805  for (;; ptr++) Line 4897  for (;; ptr++)
4897            {            {
4898            *code = OP_END;            *code = OP_END;
4899            adjust_recurse(previous, 1, utf8, cd, save_hwm);            adjust_recurse(previous, 1, utf8, cd, save_hwm);
4900            memmove(previous+1, previous, len);            memmove(previous + 1, previous, IN_UCHARS(len));
4901            code++;            code++;
4902            if (repeat_max == 0)            if (repeat_max == 0)
4903              {              {
# Line 4829  for (;; ptr++) Line 4921  for (;; ptr++)
4921            int offset;            int offset;
4922            *code = OP_END;            *code = OP_END;
4923            adjust_recurse(previous, 2 + LINK_SIZE, utf8, cd, save_hwm);            adjust_recurse(previous, 2 + LINK_SIZE, utf8, cd, save_hwm);
4924            memmove(previous + 2 + LINK_SIZE, previous, len);            memmove(previous + 2 + LINK_SIZE, previous, IN_UCHARS(len));
4925            code += 2 + LINK_SIZE;            code += 2 + LINK_SIZE;
4926            *previous++ = OP_BRAZERO + repeat_type;            *previous++ = OP_BRAZERO + repeat_type;
4927            *previous++ = OP_BRA;            *previous++ = OP_BRA;
# Line 4882  for (;; ptr++) Line 4974  for (;; ptr++)
4974              if (groupsetfirstbyte && reqbyte < 0) reqbyte = firstbyte;              if (groupsetfirstbyte && reqbyte < 0) reqbyte = firstbyte;
4975              for (i = 1; i < repeat_min; i++)              for (i = 1; i < repeat_min; i++)
4976                {                {
4977                uschar *hc;                pcre_uchar *hc;
4978                uschar *this_hwm = cd->hwm;                pcre_uchar *this_hwm = cd->hwm;
4979                memcpy(code, previous, len);                memcpy(code, previous, IN_UCHARS(len));
4980                for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE)                for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE)
4981                  {                  {
4982                  PUT(cd->hwm, 0, GET(hc, 0) + len);                  PUT(cd->hwm, 0, GET(hc, 0) + len);
# Line 4934  for (;; ptr++) Line 5026  for (;; ptr++)
5026    
5027          else for (i = repeat_max - 1; i >= 0; i--)          else for (i = repeat_max - 1; i >= 0; i--)
5028            {            {
5029            uschar *hc;            pcre_uchar *hc;
5030            uschar *this_hwm = cd->hwm;            pcre_uchar *this_hwm = cd->hwm;
5031    
5032            *code++ = OP_BRAZERO + repeat_type;            *code++ = OP_BRAZERO + repeat_type;
5033    
# Line 4951  for (;; ptr++) Line 5043  for (;; ptr++)
5043              PUTINC(code, 0, offset);              PUTINC(code, 0, offset);
5044              }              }
5045    
5046            memcpy(code, previous, len);            memcpy(code, previous, IN_UCHARS(len));
5047            for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE)            for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE)
5048              {              {
5049              PUT(cd->hwm, 0, GET(hc, 0) + len + ((i != 0)? 2+LINK_SIZE : 1));              PUT(cd->hwm, 0, GET(hc, 0) + len + ((i != 0)? 2+LINK_SIZE : 1));
# Line 4968  for (;; ptr++) Line 5060  for (;; ptr++)
5060            {            {
5061            int oldlinkoffset;            int oldlinkoffset;
5062            int offset = (int)(code - bralink + 1);            int offset = (int)(code - bralink + 1);
5063            uschar *bra = code - offset;            pcre_uchar *bra = code - offset;
5064            oldlinkoffset = GET(bra, 1);            oldlinkoffset = GET(bra, 1);
5065            bralink = (oldlinkoffset == 0)? NULL : bralink - oldlinkoffset;            bralink = (oldlinkoffset == 0)? NULL : bralink - oldlinkoffset;
5066            *code++ = OP_KET;            *code++ = OP_KET;
# Line 4994  for (;; ptr++) Line 5086  for (;; ptr++)
5086        KETRPOS. (It turns out to be convenient at runtime to detect this kind of        KETRPOS. (It turns out to be convenient at runtime to detect this kind of
5087        subpattern at both the start and at the end.) The use of special opcodes        subpattern at both the start and at the end.) The use of special opcodes
5088        makes it possible to reduce greatly the stack usage in pcre_exec(). If        makes it possible to reduce greatly the stack usage in pcre_exec(). If
5089        the group is preceded by OP_BRAZERO, convert this to OP_BRAPOSZERO. Then        the group is preceded by OP_BRAZERO, convert this to OP_BRAPOSZERO.
5090        cancel the possessive flag so that the default action below, of wrapping  
5091        everything inside atomic brackets, does not happen. */        Then, if the minimum number of matches is 1 or 0, cancel the possessive
5092          flag so that the default action below, of wrapping everything inside
5093          atomic brackets, does not happen. When the minimum is greater than 1,
5094          there will be earlier copies of the group, and so we still have to wrap
5095          the whole thing. */
5096    
5097        else        else
5098          {          {
5099          uschar *ketcode = code - 1 - LINK_SIZE;          pcre_uchar *ketcode = code - 1 - LINK_SIZE;
5100          uschar *bracode = ketcode - GET(ketcode, 1);          pcre_uchar *bracode = ketcode - GET(ketcode, 1);
5101    
5102          /* Convert possessive ONCE brackets to non-capturing */          /* Convert possessive ONCE brackets to non-capturing */
5103    
# Line 5023  for (;; ptr++) Line 5119  for (;; ptr++)
5119    
5120            if (lengthptr == NULL)            if (lengthptr == NULL)
5121              {              {
5122              uschar *scode = bracode;              pcre_uchar *scode = bracode;
5123              do              do
5124                {                {
5125                if (could_be_empty_branch(scode, ketcode, utf8, cd))                if (could_be_empty_branch(scode, ketcode, utf8, cd))
# Line 5050  for (;; ptr++) Line 5146  for (;; ptr++)
5146                int nlen = (int)(code - bracode);                int nlen = (int)(code - bracode);
5147                *code = OP_END;                *code = OP_END;
5148                adjust_recurse(bracode, 1 + LINK_SIZE, utf8, cd, save_hwm);                adjust_recurse(bracode, 1 + LINK_SIZE, utf8, cd, save_hwm);
5149                memmove(bracode + 1+LINK_SIZE, bracode, nlen);                memmove(bracode + 1 + LINK_SIZE, bracode, IN_UCHARS(nlen));
5150                code += 1 + LINK_SIZE;                code += 1 + LINK_SIZE;
5151                nlen += 1 + LINK_SIZE;                nlen += 1 + LINK_SIZE;
5152                *bracode = OP_BRAPOS;                *bracode = OP_BRAPOS;
# Line 5068  for (;; ptr++) Line 5164  for (;; ptr++)
5164                }                }
5165    
5166              /* If the minimum is zero, mark it as possessive, then unset the              /* If the minimum is zero, mark it as possessive, then unset the
5167              possessive flag. */              possessive flag when the minimum is 0 or 1. */
5168    
5169              if (brazeroptr != NULL) *brazeroptr = OP_BRAPOSZERO;              if (brazeroptr != NULL) *brazeroptr = OP_BRAPOSZERO;
5170              possessive_quantifier = FALSE;              if (repeat_min < 2) possessive_quantifier = FALSE;
5171              }              }
5172    
5173            /* Non-possessive quantifier */            /* Non-possessive quantifier */
# Line 5103  for (;; ptr++) Line 5199  for (;; ptr++)
5199      notation is just syntactic sugar, taken from Sun's Java package, but the      notation is just syntactic sugar, taken from Sun's Java package, but the
5200      special opcodes can optimize it.      special opcodes can optimize it.
5201    
5202      Possessively repeated subpatterns have already been handled in the code      Some (but not all) possessively repeated subpatterns have already been
5203      just above, so possessive_quantifier is always FALSE for them at this      completely handled in the code just above. For them, possessive_quantifier
5204      stage.      is always FALSE at this stage.
5205    
5206      Note that the repeated item starts at tempcode, not at previous, which      Note that the repeated item starts at tempcode, not at previous, which
5207      might be the first part of a string whose (former) last char we repeated.      might be the first part of a string whose (former) last char we repeated.
# Line 5119  for (;; ptr++) Line 5215  for (;; ptr++)
5215        int len;        int len;
5216    
5217        if (*tempcode == OP_TYPEEXACT)        if (*tempcode == OP_TYPEEXACT)
5218          tempcode += _pcre_OP_lengths[*tempcode] +          tempcode += PRIV(OP_lengths)[*tempcode] +
5219            ((tempcode[3] == OP_PROP || tempcode[3] == OP_NOTPROP)? 2 : 0);            ((tempcode[1 + IMM2_SIZE] == OP_PROP
5220              || tempcode[1 + IMM2_SIZE] == OP_NOTPROP)? 2 : 0);
5221    
5222        else if (*tempcode == OP_EXACT || *tempcode == OP_NOTEXACT)        else if (*tempcode == OP_EXACT || *tempcode == OP_NOTEXACT)
5223          {          {
5224          tempcode += _pcre_OP_lengths[*tempcode];          tempcode += PRIV(OP_lengths)[*tempcode];
5225  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
5226          if (utf8 && tempcode[-1] >= 0xc0)          if (utf8 && tempcode[-1] >= 0xc0)
5227            tempcode += _pcre_utf8_table4[tempcode[-1] & 0x3f];            tempcode += PRIV(utf8_table4)[tempcode[-1] & 0x3f];
5228  #endif  #endif
5229          }          }
5230    
# Line 5165  for (;; ptr++) Line 5262  for (;; ptr++)
5262          default:          default:
5263          *code = OP_END;          *code = OP_END;
5264          adjust_recurse(tempcode, 1 + LINK_SIZE, utf8, cd, save_hwm);          adjust_recurse(tempcode, 1 + LINK_SIZE, utf8, cd, save_hwm);
5265          memmove(tempcode + 1+LINK_SIZE, tempcode, len);          memmove(tempcode + 1 + LINK_SIZE, tempcode, IN_UCHARS(len));
5266          code += 1 + LINK_SIZE;          code += 1 + LINK_SIZE;
5267          len += 1 + LINK_SIZE;          len += 1 + LINK_SIZE;
5268          tempcode[0] = OP_ONCE;          tempcode[0] = OP_ONCE;
# Line 5206  for (;; ptr++) Line 5303  for (;; ptr++)
5303        int i, namelen;        int i, namelen;
5304        int arglen = 0;        int arglen = 0;
5305        const char *vn = verbnames;        const char *vn = verbnames;
5306        const uschar *name = ptr + 1;        const pcre_uchar *name = ptr + 1;
5307        const uschar *arg = NULL;        const pcre_uchar *arg = NULL;
5308        previous = NULL;        previous = NULL;
5309        while ((cd->ctypes[*++ptr] & ctype_letter) != 0) {};        while ((cd->ctypes[*++ptr] & ctype_letter) != 0) {};
5310        namelen = (int)(ptr - name);        namelen = (int)(ptr - name);
# Line 5234  for (;; ptr++) Line 5331  for (;; ptr++)
5331        for (i = 0; i < verbcount; i++)        for (i = 0; i < verbcount; i++)
5332          {          {
5333          if (namelen == verbs[i].len &&          if (namelen == verbs[i].len &&
5334              strncmp((char *)name, vn, namelen) == 0)              STRNCMP_UC_C8(name, vn, namelen) == 0)
5335            {            {
5336            /* Check for open captures before ACCEPT and convert it to            /* Check for open captures before ACCEPT and convert it to
5337            ASSERT_ACCEPT if in an assertion. */            ASSERT_ACCEPT if in an assertion. */
# Line 5282  for (;; ptr++) Line 5379  for (;; ptr++)
5379              *code = verbs[i].op_arg;              *code = verbs[i].op_arg;
5380              if (*code++ == OP_THEN_ARG) cd->external_flags |= PCRE_HASTHEN;              if (*code++ == OP_THEN_ARG) cd->external_flags |= PCRE_HASTHEN;
5381              *code++ = arglen;              *code++ = arglen;
5382              memcpy(code, arg, arglen);              memcpy(code, arg, IN_UCHARS(arglen));
5383              code += arglen;              code += arglen;
5384              *code++ = 0;              *code++ = 0;
5385              }              }
# Line 5305  for (;; ptr++) Line 5402  for (;; ptr++)
5402        {        {
5403        int i, set, unset, namelen;        int i, set, unset, namelen;
5404        int *optset;        int *optset;
5405        const uschar *name;        const pcre_uchar *name;
5406        uschar *slot;        pcre_uchar *slot;
5407    
5408        switch (*(++ptr))        switch (*(++ptr))
5409          {          {
# Line 5359  for (;; ptr++) Line 5456  for (;; ptr++)
5456            break;            break;
5457    
5458          /* Most other conditions use OP_CREF (a couple change to OP_RREF          /* Most other conditions use OP_CREF (a couple change to OP_RREF
5459          below), and all need to skip 3 bytes at the start of the group. */          below), and all need to skip 1+IMM2_SIZE bytes at the start of the group. */
5460    
5461          code[1+LINK_SIZE] = OP_CREF;          code[1+LINK_SIZE] = OP_CREF;
5462          skipbytes = 3;          skipbytes = 1+IMM2_SIZE;
5463          refsign = -1;          refsign = -1;
5464    
5465          /* Check for a test for recursion in a named group. */          /* Check for a test for recursion in a named group. */
# Line 5458  for (;; ptr++) Line 5555  for (;; ptr++)
5555          slot = cd->name_table;          slot = cd->name_table;
5556          for (i = 0; i < cd->names_found; i++)          for (i = 0; i < cd->names_found; i++)
5557            {            {
5558            if (strncmp((char *)name, (char *)slot+2, namelen) == 0) break;            if (STRNCMP_UC_UC(name, slot+IMM2_SIZE, namelen) == 0) break;
5559            slot += cd->name_entry_size;            slot += cd->name_entry_size;
5560            }            }
5561    
# Line 5515  for (;; ptr++) Line 5612  for (;; ptr++)
5612          /* Similarly, check for the (?(DEFINE) "condition", which is always          /* Similarly, check for the (?(DEFINE) "condition", which is always
5613          false. */          false. */
5614    
5615          else if (namelen == 6 && strncmp((char *)name, STRING_DEFINE, 6) == 0)          else if (namelen == 6 && STRNCMP_UC_C8(name, STRING_DEFINE, 6) == 0)
5616            {            {
5617            code[1+LINK_SIZE] = OP_DEF;            code[1+LINK_SIZE] = OP_DEF;
5618            skipbytes = 1;            skipbytes = 1;
# Line 5595  for (;; ptr++) Line 5692  for (;; ptr++)
5692    
5693          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
5694          case CHAR_C:                 /* Callout - may be followed by digits; */          case CHAR_C:                 /* Callout - may be followed by digits; */
5695          previous_callout = code;  /* Save for later completion */          previous_callout = code;     /* Save for later completion */
5696          after_manual_callout = 1; /* Skip one item before completing */          after_manual_callout = 1;    /* Skip one item before completing */
5697          *code++ = OP_CALLOUT;          *code++ = OP_CALLOUT;
5698            {            {
5699            int n = 0;            int n = 0;
# Line 5718  for (;; ptr++) Line 5815  for (;; ptr++)
5815                if (crc < 0)                if (crc < 0)
5816                  {                  {
5817                  memmove(slot + cd->name_entry_size, slot,                  memmove(slot + cd->name_entry_size, slot,
5818                    (cd->names_found - i) * cd->name_entry_size);                    IN_UCHARS((cd->names_found - i) * cd->name_entry_size));
5819                  break;                  break;
5820                  }                  }
5821    
# Line 5732  for (;; ptr++) Line 5829  for (;; ptr++)
5829    
5830              if (!dupname)              if (!dupname)
5831                {                {
5832                uschar *cslot = cd->name_table;                pcre_uchar *cslot = cd->name_table;
5833                for (i = 0; i < cd->names_found; i++)                for (i = 0; i < cd->names_found; i++)
5834                  {                  {
5835                  if (cslot != slot)                  if (cslot != slot)
# Line 5749  for (;; ptr++) Line 5846  for (;; ptr++)
5846                }                }
5847    
5848              PUT2(slot, 0, cd->bracount + 1);              PUT2(slot, 0, cd->bracount + 1);
5849              memcpy(slot + 2, name, namelen);              memcpy(slot + 2, name, IN_UCHARS(namelen));
5850              slot[2+namelen] = 0;              slot[2 + namelen] = 0;
5851              }              }
5852            }            }
5853    
# Line 5788  for (;; ptr++) Line 5885  for (;; ptr++)
5885    
5886          if (lengthptr != NULL)          if (lengthptr != NULL)
5887            {            {
5888            const uschar *temp;            const pcre_uchar *temp;
5889    
5890            if (namelen == 0)            if (namelen == 0)
5891              {              {
# Line 5833  for (;; ptr++) Line 5930  for (;; ptr++)
5930            slot = cd->name_table;            slot = cd->name_table;
5931            for (i = 0; i < cd->names_found; i++)            for (i = 0; i < cd->names_found; i++)
5932              {              {
5933              if (strncmp((char *)name, (char *)slot+2, namelen) == 0 &&              if (STRNCMP_UC_UC(name, slot+IMM2_SIZE, namelen) == 0 &&
5934                  slot[2+namelen] == 0)                  slot[2+namelen] == 0)
5935                break;                break;
5936              slot += cd->name_entry_size;              slot += cd->name_entry_size;
# Line 5870  for (;; ptr++) Line 5967  for (;; ptr++)
5967          case CHAR_0: case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4:          case CHAR_0: case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4:
5968          case CHAR_5: case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9:          case CHAR_5: case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9:
5969            {            {
5970            const uschar *called;            const pcre_uchar *called;
5971            terminator = CHAR_RIGHT_PARENTHESIS;            terminator = CHAR_RIGHT_PARENTHESIS;
5972    
5973            /* Come here from the \g<...> and \g'...' code (Oniguruma            /* Come here from the \g<...> and \g'...' code (Oniguruma
# Line 5949  for (;; ptr++) Line 6046  for (;; ptr++)
6046              {              {
6047              *code = OP_END;              *code = OP_END;
6048              if (recno != 0)              if (recno != 0)
6049                called = _pcre_find_bracket(cd->start_code, utf8, recno);                called = PRIV(find_bracket)(cd->start_code, utf8, recno);
6050    
6051              /* Forward reference */              /* Forward reference */
6052    
# Line 6103  for (;; ptr++) Line 6200  for (;; ptr++)
6200        NUMBERED_GROUP:        NUMBERED_GROUP:
6201        cd->bracount += 1;        cd->bracount += 1;
6202        PUT2(code, 1+LINK_SIZE, cd->bracount);        PUT2(code, 1+LINK_SIZE, cd->bracount);
6203        skipbytes = 2;        skipbytes = IMM2_SIZE;
6204        }        }
6205    
6206      /* Process nested bracketed regex. Assertions used not to be repeatable,      /* Process nested bracketed regex. Assertions used not to be repeatable,
# Line 6158  for (;; ptr++) Line 6255  for (;; ptr++)
6255    
6256      if (bravalue == OP_COND && lengthptr == NULL)      if (bravalue == OP_COND && lengthptr == NULL)
6257        {        {
6258        uschar *tc = code;        pcre_uchar *tc = code;
6259        int condcount = 0;        int condcount = 0;
6260    
6261        do {        do {
# Line 6333  for (;; ptr++) Line 6430  for (;; ptr++)
6430    
6431        if (-c == ESC_g)        if (-c == ESC_g)
6432          {          {
6433          const uschar *p;          const pcre_uchar *p;
6434          save_hwm = cd->hwm;   /* Normally this is set when '(' is read */          save_hwm = cd->hwm;   /* Normally this is set when '(' is read */
6435          terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?          terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?
6436            CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;            CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;
# Line 6473  for (;; ptr++) Line 6570  for (;; ptr++)
6570            }            }
6571          else          else
6572  #endif  #endif
6573            {          /* In non-UTF-8 mode, we turn \C into OP_ALLANY instead of OP_ANYBYTE
6574            so that it works in DFA mode and in lookbehinds. */
6575    
6576              {
6577            previous = (-c > ESC_b && -c < ESC_Z)? code : NULL;            previous = (-c > ESC_b && -c < ESC_Z)? code : NULL;
6578            *code++ = -c;            *code++ = (!utf8 && c == -ESC_C)? OP_ALLANY : -c;
6579            }            }
6580          }          }
6581        continue;        continue;
# Line 6487  for (;; ptr++) Line 6587  for (;; ptr++)
6587    
6588  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
6589      if (utf8 && c > 127)      if (utf8 && c > 127)
6590        mclength = _pcre_ord2utf8(c, mcbuffer);        mclength = PRIV(ord2utf8)(c, mcbuffer);
6591      else      else
6592  #endif  #endif
6593    
# Line 6609  Returns:         TRUE on success Line 6709  Returns:         TRUE on success
6709  */  */
6710    
6711  static BOOL  static BOOL
6712  compile_regex(int options, uschar **codeptr, const uschar **ptrptr,  compile_regex(int options, pcre_uchar **codeptr, const pcre_uchar **ptrptr,
6713    int *errorcodeptr, BOOL lookbehind, BOOL reset_bracount, int skipbytes,    int *errorcodeptr, BOOL lookbehind, BOOL reset_bracount, int skipbytes,
6714    int cond_depth, int *firstbyteptr, int *reqbyteptr, branch_chain *bcptr,    int cond_depth, int *firstbyteptr, int *reqbyteptr, branch_chain *bcptr,
6715    compile_data *cd, int *lengthptr)    compile_data *cd, int *lengthptr)
6716  {  {
6717  const uschar *ptr = *ptrptr;  const pcre_uchar *ptr = *ptrptr;
6718  uschar *code = *codeptr;  pcre_uchar *code = *codeptr;
6719  uschar *last_branch = code;  pcre_uchar *last_branch = code;
6720  uschar *start_bracket = code;  pcre_uchar *start_bracket = code;
6721  uschar *reverse_count = NULL;  pcre_uchar *reverse_count = NULL;
6722  open_capitem capitem;  open_capitem capitem;
6723  int capnumber = 0;  int capnumber = 0;
6724  int firstbyte, reqbyte;  int firstbyte, reqbyte;
# Line 6813  for (;;) Line 6913  for (;;)
6913        if (cd->open_caps->flag)        if (cd->open_caps->flag)
6914          {          {
6915          memmove(start_bracket + 1 + LINK_SIZE, start_bracket,          memmove(start_bracket + 1 + LINK_SIZE, start_bracket,
6916            code - start_bracket);            IN_UCHARS(code - start_bracket));
6917          *start_bracket = OP_ONCE;          *start_bracket = OP_ONCE;
6918          code += 1 + LINK_SIZE;          code += 1 + LINK_SIZE;
6919          PUT(start_bracket, 1, (int)(code - start_bracket));          PUT(start_bracket, 1, (int)(code - start_bracket));
# Line 6915  Returns:     TRUE or FALSE Line 7015  Returns:     TRUE or FALSE
7015  */  */
7016    
7017  static BOOL  static BOOL
7018  is_anchored(register const uschar *code, unsigned int bracket_map,  is_anchored(register const pcre_uchar *code, unsigned int bracket_map,
7019    unsigned int backref_map)    unsigned int backref_map)
7020  {  {
7021  do {  do {
7022     const uschar *scode = first_significant_code(code + _pcre_OP_lengths[*code],     const pcre_uchar *scode = first_significant_code(
7023       FALSE);       code + PRIV(OP_lengths)[*code], FALSE);
7024     register int op = *scode;     register int op = *scode;
7025    
7026     /* Non-capturing brackets */     /* Non-capturing brackets */
# Line 6992  Returns:         TRUE or FALSE Line 7092  Returns:         TRUE or FALSE
7092  */  */
7093    
7094  static BOOL  static BOOL
7095  is_startline(const uschar *code, unsigned int bracket_map,  is_startline(const pcre_uchar *code, unsigned int bracket_map,
7096    unsigned int backref_map)    unsigned int backref_map)
7097  {  {
7098  do {  do {
7099     const uschar *scode = first_significant_code(code + _pcre_OP_lengths[*code],     const pcre_uchar *scode = first_significant_code(
7100       FALSE);       code + PRIV(OP_lengths)[*code], FALSE);
7101     register int op = *scode;     register int op = *scode;
7102    
7103     /* If we are at the start of a conditional assertion group, *both* the     /* If we are at the start of a conditional assertion group, *both* the
# Line 7008  do { Line 7108  do {
7108     if (op == OP_COND)     if (op == OP_COND)
7109       {       {
7110       scode += 1 + LINK_SIZE;       scode += 1 + LINK_SIZE;
7111       if (*scode == OP_CALLOUT) scode += _pcre_OP_lengths[OP_CALLOUT];       if (*scode == OP_CALLOUT) scode += PRIV(OP_lengths)[OP_CALLOUT];
7112       switch (*scode)       switch (*scode)
7113         {         {
7114         case OP_CREF:         case OP_CREF:
# Line 7095  Returns:     -1 or the fixed first char Line 7195  Returns:     -1 or the fixed first char
7195  */  */
7196    
7197  static int  static int
7198  find_firstassertedchar(const uschar *code, BOOL inassert)  find_firstassertedchar(const pcre_uchar *code, BOOL inassert)
7199  {  {
7200  register int c = -1;  register int c = -1;
7201  do {  do {
7202     int d;     int d;
7203     int xl = (*code == OP_CBRA || *code == OP_SCBRA ||     int xl = (*code == OP_CBRA || *code == OP_SCBRA ||
7204               *code == OP_CBRAPOS || *code == OP_SCBRAPOS)? 2:0;               *code == OP_CBRAPOS || *code == OP_SCBRAPOS)? IMM2_SIZE:0;
7205     const uschar *scode = first_significant_code(code + 1+LINK_SIZE + xl, TRUE);     const pcre_uchar *scode = first_significant_code(code + 1+LINK_SIZE + xl,
7206         TRUE);
7207     register int op = *scode;     register int op = *scode;
7208    
7209     switch(op)     switch(op)
# Line 7126  do { Line 7227  do {
7227       break;       break;
7228    
7229       case OP_EXACT:       case OP_EXACT:
7230       scode += 2;       scode += IMM2_SIZE;
7231       /* Fall through */       /* Fall through */
7232    
7233       case OP_CHAR:       case OP_CHAR:
# Line 7139  do { Line 7240  do {
7240       break;       break;
7241    
7242       case OP_EXACTI:       case OP_EXACTI:
7243       scode += 2;       scode += IMM2_SIZE;
7244       /* Fall through */       /* Fall through */
7245    
7246       case OP_CHARI:       case OP_CHARI:
# Line 7182  Returns:        pointer to compiled data Line 7283  Returns:        pointer to compiled data
7283                  with errorptr and erroroffset set                  with errorptr and erroroffset set
7284  */  */
7285    
7286    #ifdef COMPILE_PCRE8
7287  PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION  PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION
7288  pcre_compile(const char *pattern, int options, const char **errorptr,  pcre_compile(const char *pattern, int options, const char **errorptr,
7289    int *erroroffset, const unsigned char *tables)    int *erroroffset, const unsigned char *tables)
7290    #else
7291    PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION
7292    pcre16_compile(PCRE_SPTR16 pattern, int options, const char **errorptr,
7293      int *erroroffset, const unsigned char *tables)
7294    #endif
7295  {  {
7296    #ifdef COMPILE_PCRE8
7297  return pcre_compile2(pattern, options, NULL, errorptr, erroroffset, tables);  return pcre_compile2(pattern, options, NULL, errorptr, erroroffset, tables);
7298    #else
7299    return pcre16_compile2(pattern, options, NULL, errorptr, erroroffset, tables);
7300    #endif
7301  }  }
7302    
7303    
7304    #ifdef COMPILE_PCRE8
7305  PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION  PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION
7306  pcre_compile2(const char *pattern, int options, int *errorcodeptr,  pcre_compile2(const char *pattern, int options, int *errorcodeptr,
7307    const char **errorptr, int *erroroffset, const unsigned char *tables)    const char **errorptr, int *erroroffset, const unsigned char *tables)
7308    #else
7309    PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION
7310    pcre16_compile2(PCRE_SPTR16 pattern, int options, int *errorcodeptr,
7311      const char **errorptr, int *erroroffset, const unsigned char *tables)
7312    #endif
7313  {  {
7314  real_pcre *re;  real_pcre *re;
7315  int length = 1;  /* For final END opcode */  int length = 1;  /* For final END opcode */
# Line 7201  int errorcode = 0; Line 7318  int errorcode = 0;
7318  int skipatstart = 0;  int skipatstart = 0;
7319  BOOL utf8;  BOOL utf8;
7320  size_t size;  size_t size;
7321  uschar *code;  pcre_uchar *code;
7322  const uschar *codestart;  const pcre_uchar *codestart;
7323  const uschar *ptr;  const pcre_uchar *ptr;
7324  compile_data compile_block;  compile_data compile_block;
7325  compile_data *cd = &compile_block;  compile_data *cd = &compile_block;
7326    
# Line 7213  as soon as possible, so that a fairly la Line 7330  as soon as possible, so that a fairly la
7330  this purpose. The same space is used in the second phase for remembering where  this purpose. The same space is used in the second phase for remembering where
7331  to fill in forward references to subpatterns. */  to fill in forward references to subpatterns. */
7332    
7333  uschar cworkspace[COMPILE_WORK_SIZE];  pcre_uchar cworkspace[COMPILE_WORK_SIZE];
7334    
7335  /* Set this early so that early errors get offset 0. */  /* Set this early so that early errors get offset 0. */
7336    
7337  ptr = (const uschar *)pattern;  ptr = (const pcre_uchar *)pattern;
7338    
7339  /* We can't pass back an error message if errorptr is NULL; I guess the best we  /* We can't pass back an error message if errorptr is NULL; I guess the best we
7340  can do is just return NULL, but we can set a code value if there is a code  can do is just return NULL, but we can set a code value if there is a code
# Line 7244  if (erroroffset == NULL) Line 7361  if (erroroffset == NULL)
7361    
7362  /* Set up pointers to the individual character tables */  /* Set up pointers to the individual character tables */
7363    
7364  if (tables == NULL) tables = _pcre_default_tables;  if (tables == NULL) tables = PRIV(default_tables);
7365  cd->lcc = tables + lcc_offset;  cd->lcc = tables + lcc_offset;
7366  cd->fcc = tables + fcc_offset;  cd->fcc = tables + fcc_offset;
7367  cd->cbits = tables + cbits_offset;  cd->cbits = tables + cbits_offset;
# Line 7267  while (ptr[skipatstart] == CHAR_LEFT_PAR Line 7384  while (ptr[skipatstart] == CHAR_LEFT_PAR
7384    int newnl = 0;    int newnl = 0;
7385    int newbsr = 0;    int newbsr = 0;
7386    
7387    if (strncmp((char *)(ptr+skipatstart+2), STRING_UTF8_RIGHTPAR, 5) == 0)    if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF8_RIGHTPAR, 5) == 0)
7388      { skipatstart += 7; options |= PCRE_UTF8; continue; }      { skipatstart += 7; options |= PCRE_UTF8; continue; }
7389    else if (strncmp((char *)(ptr+skipatstart+2), STRING_UCP_RIGHTPAR, 4) == 0)    else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UCP_RIGHTPAR, 4) == 0)
7390      { skipatstart += 6; options |= PCRE_UCP; continue; }      { skipatstart += 6; options |= PCRE_UCP; continue; }
7391    else if (strncmp((char *)(ptr+skipatstart+2), STRING_NO_START_OPT_RIGHTPAR, 13) == 0)    else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_NO_START_OPT_RIGHTPAR, 13) == 0)
7392      { skipatstart += 15; options |= PCRE_NO_START_OPTIMIZE; continue; }      { skipatstart += 15; options |= PCRE_NO_START_OPTIMIZE; continue; }
7393    
7394    if (strncmp((char *)(ptr+skipatstart+2), STRING_CR_RIGHTPAR, 3) == 0)    if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_CR_RIGHTPAR, 3) == 0)
7395      { skipatstart += 5; newnl = PCRE_NEWLINE_CR; }      { skipatstart += 5; newnl = PCRE_NEWLINE_CR; }
7396    else if (strncmp((char *)(ptr+skipatstart+2), STRING_LF_RIGHTPAR, 3)  == 0)    else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_LF_RIGHTPAR, 3)  == 0)
7397      { skipatstart += 5; newnl = PCRE_NEWLINE_LF; }      { skipatstart += 5; newnl = PCRE_NEWLINE_LF; }
7398    else if (strncmp((char *)(ptr+skipatstart+2), STRING_CRLF_RIGHTPAR, 5)  == 0)    else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_CRLF_RIGHTPAR, 5)  == 0)
7399      { skipatstart += 7; newnl = PCRE_NEWLINE_CR + PCRE_NEWLINE_LF; }      { skipatstart += 7; newnl = PCRE_NEWLINE_CR + PCRE_NEWLINE_LF; }
7400    else if (strncmp((char *)(ptr+skipatstart+2), STRING_ANY_RIGHTPAR, 4) == 0)    else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_ANY_RIGHTPAR, 4) == 0)
7401      { skipatstart += 6; newnl = PCRE_NEWLINE_ANY; }      { skipatstart += 6; newnl = PCRE_NEWLINE_ANY; }
7402    else if (strncmp((char *)(ptr+skipatstart+2), STRING_ANYCRLF_RIGHTPAR, 8) == 0)    else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_ANYCRLF_RIGHTPAR, 8) == 0)
7403      { skipatstart += 10; newnl = PCRE_NEWLINE_ANYCRLF; }      { skipatstart += 10; newnl = PCRE_NEWLINE_ANYCRLF; }
7404    
7405    else if (strncmp((char *)(ptr+skipatstart+2), STRING_BSR_ANYCRLF_RIGHTPAR, 12) == 0)    else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_BSR_ANYCRLF_RIGHTPAR, 12) == 0)
7406      { skipatstart += 14; newbsr = PCRE_BSR_ANYCRLF; }      { skipatstart += 14; newbsr = PCRE_BSR_ANYCRLF; }
7407    else if (strncmp((char *)(ptr+skipatstart+2), STRING_BSR_UNICODE_RIGHTPAR, 12) == 0)    else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_BSR_UNICODE_RIGHTPAR, 12) == 0)
7408      { skipatstart += 14; newbsr = PCRE_BSR_UNICODE; }      { skipatstart += 14; newbsr = PCRE_BSR_UNICODE; }
7409    
7410    if (newnl != 0)    if (newnl != 0)
# Line 7300  while (ptr[skipatstart] == CHAR_LEFT_PAR Line 7417  while (ptr[skipatstart] == CHAR_LEFT_PAR
7417  utf8 = (options & PCRE_UTF8) != 0;  utf8 = (options & PCRE_UTF8) != 0;
7418    
7419  /* Can't support UTF8 unless PCRE has been compiled to include the code. The  /* Can't support UTF8 unless PCRE has been compiled to include the code. The
7420  return of an error code from _pcre_valid_utf8() is a new feature, introduced in  return of an error code from PRIV(valid_utf8)() is a new feature, introduced in
7421  release 8.13. It is passed back from pcre_[dfa_]exec(), but at the moment is  release 8.13. It is passed back from pcre_[dfa_]exec(), but at the moment is
7422  not used here. */  not used here. */
7423    
7424  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
7425  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 &&  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 &&
7426       (errorcode = _pcre_valid_utf8((USPTR)pattern, -1, erroroffset)) != 0)       (errorcode = PRIV(valid_utf8)((PCRE_PUCHAR)pattern, -1, erroroffset)) != 0)
7427    {    {
7428    errorcode = ERR44;    errorcode = ERR44;
7429    goto PCRE_EARLY_ERROR_RETURN2;    goto PCRE_EARLY_ERROR_RETURN2;
# Line 7388  cd->backref_map = 0; Line 7505  cd->backref_map = 0;
7505  /* Reflect pattern for debugging output */  /* Reflect pattern for debugging output */
7506    
7507  DPRINTF(("------------------------------------------------------------------\n"));  DPRINTF(("------------------------------------------------------------------\n"));
7508  DPRINTF(("%s\n", pattern));  #ifdef PCRE_DEBUG
7509    print_puchar(stdout, (PCRE_PUCHAR)pattern);
7510    #endif
7511    DPRINTF(("\n"));
7512    
7513  /* Pretend to compile the pattern while actually just accumulating the length  /* Pretend to compile the pattern while actually just accumulating the length
7514  of memory required. This behaviour is triggered by passing a non-NULL final  of memory required. This behaviour is triggered by passing a non-NULL final
# Line 7404  cd->name_table = NULL; Line 7524  cd->name_table = NULL;
7524  cd->start_workspace = cworkspace;  cd->start_workspace = cworkspace;
7525  cd->start_code = cworkspace;  cd->start_code = cworkspace;
7526  cd->hwm = cworkspace;  cd->hwm = cworkspace;
7527  cd->start_pattern = (const uschar *)pattern;  cd->start_pattern = (const pcre_uchar *)pattern;
7528  cd->end_pattern = (const uschar *)(pattern + strlen(pattern));  cd->end_pattern = (const pcre_uchar *)(pattern + STRLEN_UC((const pcre_uchar *)pattern));
7529  cd->req_varyopt = 0;  cd->req_varyopt = 0;
7530  cd->external_options = options;  cd->external_options = options;
7531  cd->external_flags = 0;  cd->external_flags = 0;
# Line 7425  code = cworkspace; Line 7545  code = cworkspace;
7545  if (errorcode != 0) goto PCRE_EARLY_ERROR_RETURN;  if (errorcode != 0) goto PCRE_EARLY_ERROR_RETURN;
7546    
7547  DPRINTF(("end pre-compile: length=%d workspace=%d\n", length,  DPRINTF(("end pre-compile: length=%d workspace=%d\n", length,
7548    cd->hwm - cworkspace));    (int)(cd->hwm - cworkspace)));
7549    
7550  if (length > MAX_PATTERN_SIZE)  if (length > MAX_PATTERN_SIZE)
7551    {    {
# Line 7438  externally provided function. Integer ov Line 7558  externally provided function. Integer ov
7558  because nowadays we limit the maximum value of cd->names_found and  because nowadays we limit the maximum value of cd->names_found and
7559  cd->name_entry_size. */  cd->name_entry_size. */
7560    
7561  size = length + sizeof(real_pcre) + cd->names_found * (cd->name_entry_size + 3);  size = sizeof(real_pcre) + (length + cd->names_found * (cd->name_entry_size + 3)) * sizeof(pcre_uchar);
7562  re = (real_pcre *)(pcre_malloc)(size);  re = (real_pcre *)(pcre_malloc)(size);
7563    
7564  if (re == NULL)  if (re == NULL)
# Line 7460  re->flags = cd->external_flags; Line 7580  re->flags = cd->external_flags;
7580  re->dummy1 = 0;  re->dummy1 = 0;
7581  re->first_byte = 0;  re->first_byte = 0;
7582  re->req_byte = 0;  re->req_byte = 0;
7583  re->name_table_offset = sizeof(real_pcre);  re->name_table_offset = sizeof(real_pcre) / sizeof(pcre_uchar);
7584  re->name_entry_size = cd->name_entry_size;  re->name_entry_size = cd->name_entry_size;
7585  re->name_count = cd->names_found;  re->name_count = cd->names_found;
7586  re->ref_count = 0;  re->ref_count = 0;
7587  re->tables = (tables == _pcre_default_tables)? NULL : tables;  re->tables = (tables == PRIV(default_tables))? NULL : tables;
7588  re->nullpad = NULL;  re->nullpad = NULL;
7589    
7590  /* The starting points of the name/number translation table and of the code are  /* The starting points of the name/number translation table and of the code are
# Line 7478  cd->final_bracount = cd->bracount;  /* S Line 7598  cd->final_bracount = cd->bracount;  /* S
7598  cd->assert_depth = 0;  cd->assert_depth = 0;
7599  cd->bracount = 0;  cd->bracount = 0;
7600  cd->names_found = 0;  cd->names_found = 0;
7601  cd->name_table = (uschar *)re + re->name_table_offset;  cd->name_table = (pcre_uchar *)re + re->name_table_offset;
7602  codestart = cd->name_table + re->name_entry_size * re->name_count;  codestart = cd->name_table + re->name_entry_size * re->name_count;
7603  cd->start_code = codestart;  cd->start_code = codestart;
7604  cd->hwm = cworkspace;  cd->hwm = cworkspace;
# Line 7491  cd->open_caps = NULL; Line 7611  cd->open_caps = NULL;
7611  error, errorcode will be set non-zero, so we don't need to look at the result  error, errorcode will be set non-zero, so we don't need to look at the result
7612  of the function here. */  of the function here. */
7613    
7614  ptr = (const uschar *)pattern + skipatstart;  ptr = (const pcre_uchar *)pattern + skipatstart;
7615  code = (uschar *)codestart;  code = (pcre_uchar *)codestart;
7616  *code = OP_BRA;  *code = OP_BRA;
7617  (void)compile_regex(re->options, &code, &ptr, &errorcode, FALSE, FALSE, 0, 0,  (void)compile_regex(re->options, &code, &ptr, &errorcode, FALSE, FALSE, 0, 0,
7618    &firstbyte, &reqbyte, NULL, cd, NULL);    &firstbyte, &reqbyte, NULL, cd, NULL);
# Line 7520  if (code - codestart > length) errorcode Line 7640  if (code - codestart > length) errorcode
7640  while (errorcode == 0 && cd->hwm > cworkspace)  while (errorcode == 0 && cd->hwm > cworkspace)
7641    {    {
7642    int offset, recno;    int offset, recno;
7643    const uschar *groupptr;    const pcre_uchar *groupptr;
7644    cd->hwm -= LINK_SIZE;    cd->hwm -= LINK_SIZE;
7645    offset = GET(cd->hwm, 0);    offset = GET(cd->hwm, 0);
7646    recno = GET(codestart, offset);    recno = GET(codestart, offset);
7647    groupptr = _pcre_find_bracket(codestart, utf8, recno);    groupptr = PRIV(find_bracket)(codestart, utf8, recno);
7648    if (groupptr == NULL) errorcode = ERR53;    if (groupptr == NULL) errorcode = ERR53;
7649      else PUT(((uschar *)codestart), offset, (int)(groupptr - codestart));      else PUT(((pcre_uchar *)codestart), offset, (int)(groupptr - codestart));
7650    }    }
7651    
7652  /* Give an error if there's back reference to a non-existent capturing  /* Give an error if there's back reference to a non-existent capturing
# Line 7544  length, and set their lengths. */ Line 7664  length, and set their lengths. */
7664    
7665  if (cd->check_lookbehind)  if (cd->check_lookbehind)
7666    {    {
7667    uschar *cc = (uschar *)codestart;    pcre_uchar *cc = (pcre_uchar *)codestart;
7668    
7669    /* Loop, searching for OP_REVERSE items, and process those that do not have    /* Loop, searching for OP_REVERSE items, and process those that do not have
7670    their length set. (Actually, it will also re-process any that have a length    their length set. (Actually, it will also re-process any that have a length
7671    of zero, but that is a pathological case, and it does no harm.) When we find    of zero, but that is a pathological case, and it does no harm.) When we find
7672    one, we temporarily terminate the branch it is in while we scan it. */    one, we temporarily terminate the branch it is in while we scan it. */
7673    
7674    for (cc = (uschar *)_pcre_find_bracket(codestart, utf8, -1);    for (cc = (pcre_uchar *)PRIV(find_bracket)(codestart, utf8, -1);
7675         cc != NULL;         cc != NULL;
7676         cc = (uschar *)_pcre_find_bracket(cc, utf8, -1))         cc = (pcre_uchar *)PRIV(find_bracket)(cc, utf8, -1))
7677      {      {
7678      if (GET(cc, 1) == 0)      if (GET(cc, 1) == 0)
7679        {        {
7680        int fixed_length;        int fixed_length;
7681        uschar *be = cc - 1 - LINK_SIZE + GET(cc, -LINK_SIZE);        pcre_uchar *be = cc - 1 - LINK_SIZE + GET(cc, -LINK_SIZE);
7682        int end_op = *be;        int end_op = *be;
7683        *be = OP_END;        *be = OP_END;
7684        fixed_length = find_fixedlength(cc, (re->options & PCRE_UTF8) != 0, TRUE,        fixed_length = find_fixedlength(cc, (re->options & PCRE_UTF8) != 0, TRUE,
# Line 7583  if (errorcode != 0) Line 7703  if (errorcode != 0)
7703    {    {
7704    (pcre_free)(re);    (pcre_free)(re);
7705    PCRE_EARLY_ERROR_RETURN:    PCRE_EARLY_ERROR_RETURN:
7706    *erroroffset = (int)(ptr - (const uschar *)pattern);    *erroroffset = (int)(ptr - (const pcre_uchar *)pattern);
7707    PCRE_EARLY_ERROR_RETURN2:    PCRE_EARLY_ERROR_RETURN2:
7708    *errorptr = find_error_text(errorcode);    *errorptr = find_error_text(errorcode);
7709    if (errorcodeptr != NULL) *errorcodeptr = errorcode;    if (errorcodeptr != NULL) *errorcodeptr = errorcode;
# Line 7669  if (code - codestart > length) Line 7789  if (code - codestart > length)
7789    {    {
7790    (pcre_free)(re);    (pcre_free)(re);
7791    *errorptr = find_error_text(ERR23);    *errorptr = find_error_text(ERR23);
7792    *erroroffset = ptr - (uschar *)pattern;    *erroroffset = ptr - (pcre_uchar *)pattern;
7793    if (errorcodeptr != NULL) *errorcodeptr = ERR23;    if (errorcodeptr != NULL) *errorcodeptr = ERR23;
7794    return NULL;    return NULL;
7795    }    }

Legend:
Removed from v.749  
changed lines
  Added in v.770

  ViewVC Help
Powered by ViewVC 1.1.5