/[pcre]/code/branches/pcre16/pcre_compile.c
ViewVC logotype

Diff of /code/branches/pcre16/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

code/trunk/pcre_compile.c revision 654 by ph10, Tue Aug 2 11:00:40 2011 UTC code/branches/pcre16/pcre_compile.c revision 767 by zherczeg, Sat Nov 26 12:48:56 2011 UTC
# Line 231  static const char posix_names[] = Line 231  static const char posix_names[] =
231    STRING_graph0 STRING_print0 STRING_punct0 STRING_space0    STRING_graph0 STRING_print0 STRING_punct0 STRING_space0
232    STRING_word0  STRING_xdigit;    STRING_word0  STRING_xdigit;
233    
234  static const uschar posix_name_lengths[] = {  static const pcre_uint8 posix_name_lengths[] = {
235    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 };    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 };
236    
237  /* Table of class bit maps for each POSIX class. Each class is formed from a  /* Table of class bit maps for each POSIX class. Each class is formed from a
# Line 266  substitutes must be in the order of the Line 266  substitutes must be in the order of the
266  both positive and negative cases. NULL means no substitute. */  both positive and negative cases. NULL means no substitute. */
267    
268  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
269  static const uschar *substitutes[] = {  static const pcre_uchar string_PNd[]  = {
270    (uschar *)"\\P{Nd}",    /* \D */    CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
271    (uschar *)"\\p{Nd}",    /* \d */    CHAR_N, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' };
272    (uschar *)"\\P{Xsp}",   /* \S */       /* NOTE: Xsp is Perl space */  static const pcre_uchar string_pNd[]  = {
273    (uschar *)"\\p{Xsp}",   /* \s */    CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
274    (uschar *)"\\P{Xwd}",   /* \W */    CHAR_N, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' };
275    (uschar *)"\\p{Xwd}"    /* \w */  static const pcre_uchar string_PXsp[] = {
276      CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
277      CHAR_X, CHAR_s, CHAR_p, CHAR_RIGHT_CURLY_BRACKET, '\0' };
278    static const pcre_uchar string_pXsp[] = {
279      CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
280      CHAR_X, CHAR_s, CHAR_p, CHAR_RIGHT_CURLY_BRACKET, '\0' };
281    static const pcre_uchar string_PXwd[] = {
282      CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
283      CHAR_X, CHAR_w, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' };
284    static const pcre_uchar string_pXwd[] = {
285      CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
286      CHAR_X, CHAR_w, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' };
287    
288    static const pcre_uchar *substitutes[] = {
289      string_PNd,           /* \D */
290      string_pNd,           /* \d */
291      string_PXsp,          /* \S */       /* NOTE: Xsp is Perl space */
292      string_pXsp,          /* \s */
293      string_PXwd,          /* \W */
294      string_pXwd           /* \w */
295  };  };
296    
297  static const uschar *posix_substitutes[] = {  static const pcre_uchar string_pL[] =   {
298    (uschar *)"\\p{L}",     /* alpha */    CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
299    (uschar *)"\\p{Ll}",    /* lower */    CHAR_L, CHAR_RIGHT_CURLY_BRACKET, '\0' };
300    (uschar *)"\\p{Lu}",    /* upper */  static const pcre_uchar string_pLl[] =  {
301    (uschar *)"\\p{Xan}",   /* alnum */    CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
302    NULL,                   /* ascii */    CHAR_L, CHAR_l, CHAR_RIGHT_CURLY_BRACKET, '\0' };
303    (uschar *)"\\h",        /* blank */  static const pcre_uchar string_pLu[] =  {
304    NULL,                   /* cntrl */    CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
305    (uschar *)"\\p{Nd}",    /* digit */    CHAR_L, CHAR_u, CHAR_RIGHT_CURLY_BRACKET, '\0' };
306    NULL,                   /* graph */  static const pcre_uchar string_pXan[] = {
307    NULL,                   /* print */    CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
308    NULL,                   /* punct */    CHAR_X, CHAR_a, CHAR_n, CHAR_RIGHT_CURLY_BRACKET, '\0' };
309    (uschar *)"\\p{Xps}",   /* space */    /* NOTE: Xps is POSIX space */  static const pcre_uchar string_h[] =    {
310    (uschar *)"\\p{Xwd}",   /* word */    CHAR_BACKSLASH, CHAR_h, '\0' };
311    NULL,                   /* xdigit */  static const pcre_uchar string_pXps[] = {
312      CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
313      CHAR_X, CHAR_p, CHAR_s, CHAR_RIGHT_CURLY_BRACKET, '\0' };
314    static const pcre_uchar string_PL[] =   {
315      CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
316      CHAR_L, CHAR_RIGHT_CURLY_BRACKET, '\0' };
317    static const pcre_uchar string_PLl[] =  {
318      CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
319      CHAR_L, CHAR_l, CHAR_RIGHT_CURLY_BRACKET, '\0' };
320    static const pcre_uchar string_PLu[] =  {
321      CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
322      CHAR_L, CHAR_u, CHAR_RIGHT_CURLY_BRACKET, '\0' };
323    static const pcre_uchar string_PXan[] = {
324      CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
325      CHAR_X, CHAR_a, CHAR_n, CHAR_RIGHT_CURLY_BRACKET, '\0' };
326    static const pcre_uchar string_H[] =    {
327      CHAR_BACKSLASH, CHAR_H, '\0' };
328    static const pcre_uchar string_PXps[] = {
329      CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
330      CHAR_X, CHAR_p, CHAR_s, CHAR_RIGHT_CURLY_BRACKET, '\0' };
331    
332    static const pcre_uchar *posix_substitutes[] = {
333      string_pL,            /* alpha */
334      string_pLl,           /* lower */
335      string_pLu,           /* upper */
336      string_pXan,          /* alnum */
337      NULL,                 /* ascii */
338      string_h,             /* blank */
339      NULL,                 /* cntrl */
340      string_pNd,           /* digit */
341      NULL,                 /* graph */
342      NULL,                 /* print */
343      NULL,                 /* punct */
344      string_pXps,          /* space */    /* NOTE: Xps is POSIX space */
345      string_pXwd,          /* word */
346      NULL,                 /* xdigit */
347    /* Negated cases */    /* Negated cases */
348    (uschar *)"\\P{L}",     /* ^alpha */    string_PL,            /* ^alpha */
349    (uschar *)"\\P{Ll}",    /* ^lower */    string_PLl,           /* ^lower */
350    (uschar *)"\\P{Lu}",    /* ^upper */    string_PLu,           /* ^upper */
351    (uschar *)"\\P{Xan}",   /* ^alnum */    string_PXan,          /* ^alnum */
352    NULL,                   /* ^ascii */    NULL,                 /* ^ascii */
353    (uschar *)"\\H",        /* ^blank */    string_H,             /* ^blank */
354    NULL,                   /* ^cntrl */    NULL,                 /* ^cntrl */
355    (uschar *)"\\P{Nd}",    /* ^digit */    string_PNd,           /* ^digit */
356    NULL,                   /* ^graph */    NULL,                 /* ^graph */
357    NULL,                   /* ^print */    NULL,                 /* ^print */
358    NULL,                   /* ^punct */    NULL,                 /* ^punct */
359    (uschar *)"\\P{Xps}",   /* ^space */   /* NOTE: Xps is POSIX space */    string_PXps,          /* ^space */   /* NOTE: Xps is POSIX space */
360    (uschar *)"\\P{Xwd}",   /* ^word */    string_PXwd,          /* ^word */
361    NULL                    /* ^xdigit */    NULL                  /* ^xdigit */
362  };  };
363  #define POSIX_SUBSIZE (sizeof(posix_substitutes)/sizeof(uschar *))  #define POSIX_SUBSIZE (sizeof(posix_substitutes) / sizeof(pcre_uchar *))
364  #endif  #endif
365    
366  #define STRING(a)  # a  #define STRING(a)  # a
# Line 410  static const char error_texts[] = Line 464  static const char error_texts[] =
464    "this version of PCRE is not compiled with PCRE_UCP support\0"    "this version of PCRE is not compiled with PCRE_UCP support\0"
465    "\\c must be followed by an ASCII character\0"    "\\c must be followed by an ASCII character\0"
466    "\\k is not followed by a braced, angle-bracketed, or quoted name\0"    "\\k is not followed by a braced, angle-bracketed, or quoted name\0"
467      /* 70 */
468      "internal error: unknown opcode in find_fixedlength()\0"
469    ;    ;
470    
471  /* Table to identify digits and hex digits. This is used when compiling  /* Table to identify digits and hex digits. This is used when compiling
# Line 546  static const unsigned char ebcdic_charta Line 602  static const unsigned char ebcdic_charta
602  /* Definition to allow mutual recursion */  /* Definition to allow mutual recursion */
603    
604  static BOOL  static BOOL
605    compile_regex(int, uschar **, const uschar **, int *, BOOL, BOOL, int, int,    compile_regex(int, pcre_uchar **, const pcre_uchar **, int *, BOOL, BOOL, int, int,
606      int *, int *, branch_chain *, compile_data *, int *);      int *, int *, branch_chain *, compile_data *, int *);
607    
608    
# Line 593  Returns:    TRUE or FALSE Line 649  Returns:    TRUE or FALSE
649  */  */
650    
651  static BOOL  static BOOL
652  is_counted_repeat(const uschar *p)  is_counted_repeat(const pcre_uchar *p)
653  {  {
654  if ((digitab[*p++] & ctype_digit) == 0) return FALSE;  if ((digitab[*p++] & ctype_digit) == 0) return FALSE;
655  while ((digitab[*p] & ctype_digit) != 0) p++;  while ((digitab[*p] & ctype_digit) != 0) p++;
# Line 635  Returns:         zero or positive => a d Line 691  Returns:         zero or positive => a d
691  */  */
692    
693  static int  static int
694  check_escape(const uschar **ptrptr, int *errorcodeptr, int bracount,  check_escape(const pcre_uchar **ptrptr, int *errorcodeptr, int bracount,
695    int options, BOOL isclass)    int options, BOOL isclass)
696  {  {
697  BOOL utf8 = (options & PCRE_UTF8) != 0;  BOOL utf8 = (options & PCRE_UTF8) != 0;
698  const uschar *ptr = *ptrptr + 1;  const pcre_uchar *ptr = *ptrptr + 1;
699  int c, i;  int c, i;
700    
701  GETCHARINCTEST(c, ptr);           /* Get character value, increment pointer */  GETCHARINCTEST(c, ptr);           /* Get character value, increment pointer */
# Line 666  else if ((i = escapes[c - 0x48]) != 0) Line 722  else if ((i = escapes[c - 0x48]) != 0)
722    
723  else  else
724    {    {
725    const uschar *oldptr;    const pcre_uchar *oldptr;
726    BOOL braced, negated;    BOOL braced, negated;
727    
728    switch (c)    switch (c)
# Line 676  else Line 732  else
732    
733      case CHAR_l:      case CHAR_l:
734      case CHAR_L:      case CHAR_L:
735        *errorcodeptr = ERR37;
736        break;
737    
738      case CHAR_u:      case CHAR_u:
739        if ((options & PCRE_JAVASCRIPT_COMPAT) != 0)
740          {
741          /* In JavaScript, \u must be followed by four hexadecimal numbers.
742          Otherwise it is a lowercase u letter. */
743          if ((digitab[ptr[1]] & ctype_xdigit) != 0 && (digitab[ptr[2]] & ctype_xdigit) != 0
744               && (digitab[ptr[3]] & ctype_xdigit) != 0 && (digitab[ptr[4]] & ctype_xdigit) != 0)
745            {
746            c = 0;
747            for (i = 0; i < 4; ++i)
748              {
749              register int cc = *(++ptr);
750    #ifndef EBCDIC  /* ASCII/UTF-8 coding */
751              if (cc >= CHAR_a) cc -= 32;               /* Convert to upper case */
752              c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
753    #else           /* EBCDIC coding */
754              if (cc >= CHAR_a && cc <= CHAR_z) cc += 64;  /* Convert to upper case */
755              c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
756    #endif
757              }
758            }
759          }
760        else
761          *errorcodeptr = ERR37;
762        break;
763    
764      case CHAR_U:      case CHAR_U:
765      *errorcodeptr = ERR37;      /* In JavaScript, \U is an uppercase U letter. */
766        if ((options & PCRE_JAVASCRIPT_COMPAT) == 0) *errorcodeptr = ERR37;
767      break;      break;
768    
769      /* In a character class, \g is just a literal "g". Outside a character      /* In a character class, \g is just a literal "g". Outside a character
# Line 710  else Line 795  else
795    
796      if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)      if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)
797        {        {
798        const uschar *p;        const pcre_uchar *p;
799        for (p = ptr+2; *p != 0 && *p != CHAR_RIGHT_CURLY_BRACKET; p++)        for (p = ptr+2; *p != 0 && *p != CHAR_RIGHT_CURLY_BRACKET; p++)
800          if (*p != CHAR_MINUS && (digitab[*p] & ctype_digit) == 0) break;          if (*p != CHAR_MINUS && (digitab[*p] & ctype_digit) == 0) break;
801        if (*p != 0 && *p != CHAR_RIGHT_CURLY_BRACKET)        if (*p != 0 && *p != CHAR_RIGHT_CURLY_BRACKET)
# Line 828  else Line 913  else
913      treated as a data character. */      treated as a data character. */
914    
915      case CHAR_x:      case CHAR_x:
916        if ((options & PCRE_JAVASCRIPT_COMPAT) != 0)
917          {
918          /* In JavaScript, \x must be followed by two hexadecimal numbers.
919          Otherwise it is a lowercase x letter. */
920          if ((digitab[ptr[1]] & ctype_xdigit) != 0 && (digitab[ptr[2]] & ctype_xdigit) != 0)
921            {
922            c = 0;
923            for (i = 0; i < 2; ++i)
924              {
925              register int cc = *(++ptr);
926    #ifndef EBCDIC  /* ASCII/UTF-8 coding */
927              if (cc >= CHAR_a) cc -= 32;               /* Convert to upper case */
928              c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
929    #else           /* EBCDIC coding */
930              if (cc >= CHAR_a && cc <= CHAR_z) cc += 64;  /* Convert to upper case */
931              c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
932    #endif
933              }
934            }
935          break;
936          }
937    
938      if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)      if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)
939        {        {
940        const uschar *pt = ptr + 2;        const pcre_uchar *pt = ptr + 2;
941        int count = 0;        int count = 0;
942    
943        c = 0;        c = 0;
# Line 961  Returns:         type value from ucp_typ Line 1068  Returns:         type value from ucp_typ
1068  */  */
1069    
1070  static int  static int
1071  get_ucp(const uschar **ptrptr, BOOL *negptr, int *dptr, int *errorcodeptr)  get_ucp(const pcre_uchar **ptrptr, BOOL *negptr, int *dptr, int *errorcodeptr)
1072  {  {
1073  int c, i, bot, top;  int c, i, bot, top;
1074  const uschar *ptr = *ptrptr;  const pcre_uchar *ptr = *ptrptr;
1075  char name[32];  pcre_uchar name[32];
1076    
1077  c = *(++ptr);  c = *(++ptr);
1078  if (c == 0) goto ERROR_RETURN;  if (c == 0) goto ERROR_RETURN;
# Line 1006  else Line 1113  else
1113  /* Search for a recognized property name using binary chop */  /* Search for a recognized property name using binary chop */
1114    
1115  bot = 0;  bot = 0;
1116  top = _pcre_utt_size;  top = PRIV(utt_size);
1117    
1118  while (bot < top)  while (bot < top)
1119    {    {
1120    i = (bot + top) >> 1;    i = (bot + top) >> 1;
1121    c = strcmp(name, _pcre_utt_names + _pcre_utt[i].name_offset);    c = STRCMP_UC_C8(name, PRIV(utt_names) + PRIV(utt)[i].name_offset);
1122    if (c == 0)    if (c == 0)
1123      {      {
1124      *dptr = _pcre_utt[i].value;      *dptr = PRIV(utt)[i].value;
1125      return _pcre_utt[i].type;      return PRIV(utt)[i].type;
1126      }      }
1127    if (c > 0) bot = i + 1; else top = i;    if (c > 0) bot = i + 1; else top = i;
1128    }    }
# Line 1053  Returns:         pointer to '}' on succe Line 1160  Returns:         pointer to '}' on succe
1160                   current ptr on error, with errorcodeptr set non-zero                   current ptr on error, with errorcodeptr set non-zero
1161  */  */
1162    
1163  static const uschar *  static const pcre_uchar *
1164  read_repeat_counts(const uschar *p, int *minp, int *maxp, int *errorcodeptr)  read_repeat_counts(const pcre_uchar *p, int *minp, int *maxp, int *errorcodeptr)
1165  {  {
1166  int min = 0;  int min = 0;
1167  int max = -1;  int max = -1;
# Line 1139  Returns:       the number of the named s Line 1246  Returns:       the number of the named s
1246  */  */
1247    
1248  static int  static int
1249  find_parens_sub(uschar **ptrptr, compile_data *cd, const uschar *name, int lorn,  find_parens_sub(pcre_uchar **ptrptr, compile_data *cd, const pcre_uchar *name, int lorn,
1250    BOOL xmode, BOOL utf8, int *count)    BOOL xmode, BOOL utf8, int *count)
1251  {  {
1252  uschar *ptr = *ptrptr;  pcre_uchar *ptr = *ptrptr;
1253  int start_count = *count;  int start_count = *count;
1254  int hwm_count = start_count;  int hwm_count = start_count;
1255  BOOL dup_parens = FALSE;  BOOL dup_parens = FALSE;
# Line 1209  if (ptr[0] == CHAR_LEFT_PARENTHESIS) Line 1316  if (ptr[0] == CHAR_LEFT_PARENTHESIS)
1316          ptr[1] != CHAR_EQUALS_SIGN) || *ptr == CHAR_APOSTROPHE)          ptr[1] != CHAR_EQUALS_SIGN) || *ptr == CHAR_APOSTROPHE)
1317        {        {
1318        int term;        int term;
1319        const uschar *thisname;        const pcre_uchar *thisname;
1320        *count += 1;        *count += 1;
1321        if (name == NULL && *count == lorn) return *count;        if (name == NULL && *count == lorn) return *count;
1322        term = *ptr++;        term = *ptr++;
# Line 1217  if (ptr[0] == CHAR_LEFT_PARENTHESIS) Line 1324  if (ptr[0] == CHAR_LEFT_PARENTHESIS)
1324        thisname = ptr;        thisname = ptr;
1325        while (*ptr != term) ptr++;        while (*ptr != term) ptr++;
1326        if (name != NULL && lorn == ptr - thisname &&        if (name != NULL && lorn == ptr - thisname &&
1327            strncmp((const char *)name, (const char *)thisname, lorn) == 0)            STRNCMP_UC_UC(name, thisname, lorn) == 0)
1328          return *count;          return *count;
1329        term++;        term++;
1330        }        }
# Line 1260  for (; ptr < cd->end_pattern; ptr++) Line 1367  for (; ptr < cd->end_pattern; ptr++)
1367          {          {
1368          if (ptr[2] == CHAR_E)          if (ptr[2] == CHAR_E)
1369            ptr+= 2;            ptr+= 2;
1370          else if (strncmp((const char *)ptr+2,          else if (STRNCMP_UC_C8(ptr + 2,
1371                   STR_Q STR_BACKSLASH STR_E, 3) == 0)                   STR_Q STR_BACKSLASH STR_E, 3) == 0)
1372            ptr += 4;            ptr += 4;
1373          else          else
# Line 1372  Returns:       the number of the found s Line 1479  Returns:       the number of the found s
1479  */  */
1480    
1481  static int  static int
1482  find_parens(compile_data *cd, const uschar *name, int lorn, BOOL xmode,  find_parens(compile_data *cd, const pcre_uchar *name, int lorn, BOOL xmode,
1483    BOOL utf8)    BOOL utf8)
1484  {  {
1485  uschar *ptr = (uschar *)cd->start_pattern;  pcre_uchar *ptr = (pcre_uchar *)cd->start_pattern;
1486  int count = 0;  int count = 0;
1487  int rc;  int rc;
1488    
# Line 1413  Arguments: Line 1520  Arguments:
1520  Returns:       pointer to the first significant opcode  Returns:       pointer to the first significant opcode
1521  */  */
1522    
1523  static const uschar*  static const pcre_uchar*
1524  first_significant_code(const uschar *code, BOOL skipassert)  first_significant_code(const pcre_uchar *code, BOOL skipassert)
1525  {  {
1526  for (;;)  for (;;)
1527    {    {
# Line 1425  for (;;) Line 1532  for (;;)
1532      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
1533      if (!skipassert) return code;      if (!skipassert) return code;
1534      do code += GET(code, 1); while (*code == OP_ALT);      do code += GET(code, 1); while (*code == OP_ALT);
1535      code += _pcre_OP_lengths[*code];      code += PRIV(OP_lengths)[*code];
1536      break;      break;
1537    
1538      case OP_WORD_BOUNDARY:      case OP_WORD_BOUNDARY:
# Line 1439  for (;;) Line 1546  for (;;)
1546      case OP_RREF:      case OP_RREF:
1547      case OP_NRREF:      case OP_NRREF:
1548      case OP_DEF:      case OP_DEF:
1549      code += _pcre_OP_lengths[*code];      code += PRIV(OP_lengths)[*code];
1550      break;      break;
1551    
1552      default:      default:
# Line 1475  Arguments: Line 1582  Arguments:
1582    
1583  Returns:   the fixed length,  Returns:   the fixed length,
1584               or -1 if there is no fixed length,               or -1 if there is no fixed length,
1585               or -2 if \C was encountered               or -2 if \C was encountered (in UTF-8 mode only)
1586               or -3 if an OP_RECURSE item was encountered and atend is FALSE               or -3 if an OP_RECURSE item was encountered and atend is FALSE
1587                 or -4 if an unknown opcode was encountered (internal error)
1588  */  */
1589    
1590  static int  static int
1591  find_fixedlength(uschar *code, BOOL utf8, BOOL atend, compile_data *cd)  find_fixedlength(pcre_uchar *code, BOOL utf8, BOOL atend, compile_data *cd)
1592  {  {
1593  int length = -1;  int length = -1;
1594    
1595  register int branchlength = 0;  register int branchlength = 0;
1596  register uschar *cc = code + 1 + LINK_SIZE;  register pcre_uchar *cc = code + 1 + LINK_SIZE;
1597    
1598  /* Scan along the opcodes for this branch. If we get to the end of the  /* Scan along the opcodes for this branch. If we get to the end of the
1599  branch, check the length against that of the other branches. */  branch, check the length against that of the other branches. */
# Line 1493  branch, check the length against that of Line 1601  branch, check the length against that of
1601  for (;;)  for (;;)
1602    {    {
1603    int d;    int d;
1604    uschar *ce, *cs;    pcre_uchar *ce, *cs;
1605    register int op = *cc;    register int op = *cc;
1606    switch (op)    switch (op)
1607      {      {
1608      /* We only need to continue for OP_CBRA (normal capturing bracket) and      /* We only need to continue for OP_CBRA (normal capturing bracket) and
1609      OP_BRA (normal non-capturing bracket) because the other variants of these      OP_BRA (normal non-capturing bracket) because the other variants of these
1610      opcodes are all concerned with unlimited repeated groups, which of course      opcodes are all concerned with unlimited repeated groups, which of course
1611      are not of fixed length. They will cause a -1 response from the default      are not of fixed length. */
     case of this switch. */  
1612    
1613      case OP_CBRA:      case OP_CBRA:
1614      case OP_BRA:      case OP_BRA:
1615      case OP_ONCE:      case OP_ONCE:
1616        case OP_ONCE_NC:
1617      case OP_COND:      case OP_COND:
1618      d = find_fixedlength(cc + ((op == OP_CBRA)? 2:0), utf8, atend, cd);      d = find_fixedlength(cc + ((op == OP_CBRA)? 2:0), utf8, atend, cd);
1619      if (d < 0) return d;      if (d < 0) return d;
# Line 1514  for (;;) Line 1622  for (;;)
1622      cc += 1 + LINK_SIZE;      cc += 1 + LINK_SIZE;
1623      break;      break;
1624    
1625      /* Reached end of a branch; if it's a ket it is the end of a nested      /* Reached end of a branch; if it's a ket it is the end of a nested call.
1626      call. If it's ALT it is an alternation in a nested call. If it is      If it's ALT it is an alternation in a nested call. An ACCEPT is effectively
1627      END it's the end of the outer call. All can be handled by the same code.      an ALT. If it is END it's the end of the outer call. All can be handled by
1628      Note that we must not include the OP_KETRxxx opcodes here, because they      the same code. Note that we must not include the OP_KETRxxx opcodes here,
1629      all imply an unlimited repeat. */      because they all imply an unlimited repeat. */
1630    
1631      case OP_ALT:      case OP_ALT:
1632      case OP_KET:      case OP_KET:
1633      case OP_END:      case OP_END:
1634        case OP_ACCEPT:
1635        case OP_ASSERT_ACCEPT:
1636      if (length < 0) length = branchlength;      if (length < 0) length = branchlength;
1637        else if (length != branchlength) return -1;        else if (length != branchlength) return -1;
1638      if (*cc != OP_ALT) return length;      if (*cc != OP_ALT) return length;
# Line 1536  for (;;) Line 1646  for (;;)
1646    
1647      case OP_RECURSE:      case OP_RECURSE:
1648      if (!atend) return -3;      if (!atend) return -3;
1649      cs = ce = (uschar *)cd->start_code + GET(cc, 1);  /* Start subpattern */      cs = ce = (pcre_uchar *)cd->start_code + GET(cc, 1);  /* Start subpattern */
1650      do ce += GET(ce, 1); while (*ce == OP_ALT);       /* End subpattern */      do ce += GET(ce, 1); while (*ce == OP_ALT);           /* End subpattern */
1651      if (cc > cs && cc < ce) return -1;                /* Recursion */      if (cc > cs && cc < ce) return -1;                    /* Recursion */
1652      d = find_fixedlength(cs + 2, utf8, atend, cd);      d = find_fixedlength(cs + 2, utf8, atend, cd);
1653      if (d < 0) return d;      if (d < 0) return d;
1654      branchlength += d;      branchlength += d;
# Line 1556  for (;;) Line 1666  for (;;)
1666    
1667      /* Skip over things that don't match chars */      /* Skip over things that don't match chars */
1668    
1669      case OP_REVERSE:      case OP_MARK:
1670      case OP_CREF:      case OP_PRUNE_ARG:
1671      case OP_NCREF:      case OP_SKIP_ARG:
1672      case OP_RREF:      case OP_THEN_ARG:
1673      case OP_NRREF:      cc += cc[1] + PRIV(OP_lengths)[*cc];
1674      case OP_DEF:      break;
1675    
1676      case OP_CALLOUT:      case OP_CALLOUT:
     case OP_SOD:  
     case OP_SOM:  
     case OP_SET_SOM:  
     case OP_EOD:  
     case OP_EODN:  
1677      case OP_CIRC:      case OP_CIRC:
1678      case OP_CIRCM:      case OP_CIRCM:
1679        case OP_CLOSE:
1680        case OP_COMMIT:
1681        case OP_CREF:
1682        case OP_DEF:
1683      case OP_DOLL:      case OP_DOLL:
1684      case OP_DOLLM:      case OP_DOLLM:
1685        case OP_EOD:
1686        case OP_EODN:
1687        case OP_FAIL:
1688        case OP_NCREF:
1689        case OP_NRREF:
1690      case OP_NOT_WORD_BOUNDARY:      case OP_NOT_WORD_BOUNDARY:
1691        case OP_PRUNE:
1692        case OP_REVERSE:
1693        case OP_RREF:
1694        case OP_SET_SOM:
1695        case OP_SKIP:
1696        case OP_SOD:
1697        case OP_SOM:
1698        case OP_THEN:
1699      case OP_WORD_BOUNDARY:      case OP_WORD_BOUNDARY:
1700      cc += _pcre_OP_lengths[*cc];      cc += PRIV(OP_lengths)[*cc];
1701      break;      break;
1702    
1703      /* Handle literal characters */      /* Handle literal characters */
# Line 1586  for (;;) Line 1709  for (;;)
1709      branchlength++;      branchlength++;
1710      cc += 2;      cc += 2;
1711  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1712      if (utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f];      if (utf8 && cc[-1] >= 0xc0) cc += PRIV(utf8_table4)[cc[-1] & 0x3f];
1713  #endif  #endif
1714      break;      break;
1715    
# Line 1594  for (;;) Line 1717  for (;;)
1717      need to skip over a multibyte character in UTF8 mode.  */      need to skip over a multibyte character in UTF8 mode.  */
1718    
1719      case OP_EXACT:      case OP_EXACT:
1720        case OP_EXACTI:
1721        case OP_NOTEXACT:
1722        case OP_NOTEXACTI:
1723      branchlength += GET2(cc,1);      branchlength += GET2(cc,1);
1724      cc += 4;      cc += 4;
1725  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1726      if (utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f];      if (utf8 && cc[-1] >= 0xc0) cc += PRIV(utf8_table4)[cc[-1] & 0x3f];
1727  #endif  #endif
1728      break;      break;
1729    
# Line 1614  for (;;) Line 1740  for (;;)
1740      cc += 2;      cc += 2;
1741      /* Fall through */      /* Fall through */
1742    
1743        case OP_HSPACE:
1744        case OP_VSPACE:
1745        case OP_NOT_HSPACE:
1746        case OP_NOT_VSPACE:
1747      case OP_NOT_DIGIT:      case OP_NOT_DIGIT:
1748      case OP_DIGIT:      case OP_DIGIT:
1749      case OP_NOT_WHITESPACE:      case OP_NOT_WHITESPACE:
# Line 1626  for (;;) Line 1756  for (;;)
1756      cc++;      cc++;
1757      break;      break;
1758    
1759      /* The single-byte matcher isn't allowed */      /* The single-byte matcher isn't allowed. This only happens in UTF-8 mode;
1760        otherwise \C is coded as OP_ALLANY. */
1761    
1762      case OP_ANYBYTE:      case OP_ANYBYTE:
1763      return -2;      return -2;
# Line 1645  for (;;) Line 1776  for (;;)
1776    
1777      switch (*cc)      switch (*cc)
1778        {        {
1779          case OP_CRPLUS:
1780          case OP_CRMINPLUS:
1781        case OP_CRSTAR:        case OP_CRSTAR:
1782        case OP_CRMINSTAR:        case OP_CRMINSTAR:
1783        case OP_CRQUERY:        case OP_CRQUERY:
# Line 1665  for (;;) Line 1798  for (;;)
1798    
1799      /* Anything else is variable length */      /* Anything else is variable length */
1800    
1801      default:      case OP_ANYNL:
1802        case OP_BRAMINZERO:
1803        case OP_BRAPOS:
1804        case OP_BRAPOSZERO:
1805        case OP_BRAZERO:
1806        case OP_CBRAPOS:
1807        case OP_EXTUNI:
1808        case OP_KETRMAX:
1809        case OP_KETRMIN:
1810        case OP_KETRPOS:
1811        case OP_MINPLUS:
1812        case OP_MINPLUSI:
1813        case OP_MINQUERY:
1814        case OP_MINQUERYI:
1815        case OP_MINSTAR:
1816        case OP_MINSTARI:
1817        case OP_MINUPTO:
1818        case OP_MINUPTOI:
1819        case OP_NOTMINPLUS:
1820        case OP_NOTMINPLUSI:
1821        case OP_NOTMINQUERY:
1822        case OP_NOTMINQUERYI:
1823        case OP_NOTMINSTAR:
1824        case OP_NOTMINSTARI:
1825        case OP_NOTMINUPTO:
1826        case OP_NOTMINUPTOI:
1827        case OP_NOTPLUS:
1828        case OP_NOTPLUSI:
1829        case OP_NOTPOSPLUS:
1830        case OP_NOTPOSPLUSI:
1831        case OP_NOTPOSQUERY:
1832        case OP_NOTPOSQUERYI:
1833        case OP_NOTPOSSTAR:
1834        case OP_NOTPOSSTARI:
1835        case OP_NOTPOSUPTO:
1836        case OP_NOTPOSUPTOI:
1837        case OP_NOTQUERY:
1838        case OP_NOTQUERYI:
1839        case OP_NOTSTAR:
1840        case OP_NOTSTARI:
1841        case OP_NOTUPTO:
1842        case OP_NOTUPTOI:
1843        case OP_PLUS:
1844        case OP_PLUSI:
1845        case OP_POSPLUS:
1846        case OP_POSPLUSI:
1847        case OP_POSQUERY:
1848        case OP_POSQUERYI:
1849        case OP_POSSTAR:
1850        case OP_POSSTARI:
1851        case OP_POSUPTO:
1852        case OP_POSUPTOI:
1853        case OP_QUERY:
1854        case OP_QUERYI:
1855        case OP_REF:
1856        case OP_REFI:
1857        case OP_SBRA:
1858        case OP_SBRAPOS:
1859        case OP_SCBRA:
1860        case OP_SCBRAPOS:
1861        case OP_SCOND:
1862        case OP_SKIPZERO:
1863        case OP_STAR:
1864        case OP_STARI:
1865        case OP_TYPEMINPLUS:
1866        case OP_TYPEMINQUERY:
1867        case OP_TYPEMINSTAR:
1868        case OP_TYPEMINUPTO:
1869        case OP_TYPEPLUS:
1870        case OP_TYPEPOSPLUS:
1871        case OP_TYPEPOSQUERY:
1872        case OP_TYPEPOSSTAR:
1873        case OP_TYPEPOSUPTO:
1874        case OP_TYPEQUERY:
1875        case OP_TYPESTAR:
1876        case OP_TYPEUPTO:
1877        case OP_UPTO:
1878        case OP_UPTOI:
1879      return -1;      return -1;
1880    
1881        /* Catch unrecognized opcodes so that when new ones are added they
1882        are not forgotten, as has happened in the past. */
1883    
1884        default:
1885        return -4;
1886      }      }
1887    }    }
1888  /* Control never gets here */  /* Control never gets here */
# Line 1693  Arguments: Line 1909  Arguments:
1909  Returns:      pointer to the opcode for the bracket, or NULL if not found  Returns:      pointer to the opcode for the bracket, or NULL if not found
1910  */  */
1911    
1912  const uschar *  const pcre_uchar *
1913  _pcre_find_bracket(const uschar *code, BOOL utf8, int number)  PRIV(find_bracket)(const pcre_uchar *code, BOOL utf8, int number)
1914  {  {
1915  for (;;)  for (;;)
1916    {    {
# Line 1712  for (;;) Line 1928  for (;;)
1928    
1929    else if (c == OP_REVERSE)    else if (c == OP_REVERSE)
1930      {      {
1931      if (number < 0) return (uschar *)code;      if (number < 0) return (pcre_uchar *)code;
1932      code += _pcre_OP_lengths[c];      code += PRIV(OP_lengths)[c];
1933      }      }
1934    
1935    /* Handle capturing bracket */    /* Handle capturing bracket */
# Line 1722  for (;;) Line 1938  for (;;)
1938             c == OP_CBRAPOS || c == OP_SCBRAPOS)             c == OP_CBRAPOS || c == OP_SCBRAPOS)
1939      {      {
1940      int n = GET2(code, 1+LINK_SIZE);      int n = GET2(code, 1+LINK_SIZE);
1941      if (n == number) return (uschar *)code;      if (n == number) return (pcre_uchar *)code;
1942      code += _pcre_OP_lengths[c];      code += PRIV(OP_lengths)[c];
1943      }      }
1944    
1945    /* Otherwise, we can get the item's length from the table, except that for    /* Otherwise, we can get the item's length from the table, except that for
# Line 1761  for (;;) Line 1977  for (;;)
1977        break;        break;
1978    
1979        case OP_THEN_ARG:        case OP_THEN_ARG:
1980        code += code[1+LINK_SIZE];        code += code[1];
1981        break;        break;
1982        }        }
1983    
1984      /* Add in the fixed length from the table */      /* Add in the fixed length from the table */
1985    
1986      code += _pcre_OP_lengths[c];      code += PRIV(OP_lengths)[c];
1987    
1988    /* In UTF-8 mode, opcodes that are followed by a character may be followed by    /* In UTF-8 mode, opcodes that are followed by a character may be followed by
1989    a multi-byte character. The length in the table is a minimum, so we have to    a multi-byte character. The length in the table is a minimum, so we have to
# Line 1804  for (;;) Line 2020  for (;;)
2020        case OP_MINQUERYI:        case OP_MINQUERYI:
2021        case OP_POSQUERY:        case OP_POSQUERY:
2022        case OP_POSQUERYI:        case OP_POSQUERYI:
2023        if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];        if (code[-1] >= 0xc0) code += PRIV(utf8_table4)[code[-1] & 0x3f];
2024        break;        break;
2025        }        }
2026  #else  #else
# Line 1830  Arguments: Line 2046  Arguments:
2046  Returns:      pointer to the opcode for OP_RECURSE, or NULL if not found  Returns:      pointer to the opcode for OP_RECURSE, or NULL if not found
2047  */  */
2048    
2049  static const uschar *  static const pcre_uchar *
2050  find_recurse(const uschar *code, BOOL utf8)  find_recurse(const pcre_uchar *code, BOOL utf8)
2051  {  {
2052  for (;;)  for (;;)
2053    {    {
# Line 1880  for (;;) Line 2096  for (;;)
2096        break;        break;
2097    
2098        case OP_THEN_ARG:        case OP_THEN_ARG:
2099        code += code[1+LINK_SIZE];        code += code[1];
2100        break;        break;
2101        }        }
2102    
2103      /* Add in the fixed length from the table */      /* Add in the fixed length from the table */
2104    
2105      code += _pcre_OP_lengths[c];      code += PRIV(OP_lengths)[c];
2106    
2107      /* In UTF-8 mode, opcodes that are followed by a character may be followed      /* In UTF-8 mode, opcodes that are followed by a character may be followed
2108      by a multi-byte character. The length in the table is a minimum, so we have      by a multi-byte character. The length in the table is a minimum, so we have
# Line 1923  for (;;) Line 2139  for (;;)
2139        case OP_MINQUERYI:        case OP_MINQUERYI:
2140        case OP_POSQUERY:        case OP_POSQUERY:
2141        case OP_POSQUERYI:        case OP_POSQUERYI:
2142        if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];        if (code[-1] >= 0xc0) code += PRIV(utf8_table4)[code[-1] & 0x3f];
2143        break;        break;
2144        }        }
2145  #else  #else
# Line 1957  Returns:      TRUE if what is matched co Line 2173  Returns:      TRUE if what is matched co
2173  */  */
2174    
2175  static BOOL  static BOOL
2176  could_be_empty_branch(const uschar *code, const uschar *endcode, BOOL utf8,  could_be_empty_branch(const pcre_uchar *code, const pcre_uchar *endcode,
2177    compile_data *cd)    BOOL utf8, compile_data *cd)
2178  {  {
2179  register int c;  register int c;
2180  for (code = first_significant_code(code + _pcre_OP_lengths[*code], TRUE);  for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
2181       code < endcode;       code < endcode;
2182       code = first_significant_code(code + _pcre_OP_lengths[c], TRUE))       code = first_significant_code(code + PRIV(OP_lengths)[c], TRUE))
2183    {    {
2184    const uschar *ccode;    const pcre_uchar *ccode;
2185    
2186    c = *code;    c = *code;
2187    
# Line 1988  for (code = first_significant_code(code Line 2204  for (code = first_significant_code(code
2204    
2205    if (c == OP_RECURSE)    if (c == OP_RECURSE)
2206      {      {
2207      const uschar *scode;      const pcre_uchar *scode;
2208      BOOL empty_branch;      BOOL empty_branch;
2209    
2210      /* Test for forward reference */      /* Test for forward reference */
# Line 2024  for (code = first_significant_code(code Line 2240  for (code = first_significant_code(code
2240    if (c == OP_BRAZERO || c == OP_BRAMINZERO || c == OP_SKIPZERO ||    if (c == OP_BRAZERO || c == OP_BRAMINZERO || c == OP_SKIPZERO ||
2241        c == OP_BRAPOSZERO)        c == OP_BRAPOSZERO)
2242      {      {
2243      code += _pcre_OP_lengths[c];      code += PRIV(OP_lengths)[c];
2244      do code += GET(code, 1); while (*code == OP_ALT);      do code += GET(code, 1); while (*code == OP_ALT);
2245      c = *code;      c = *code;
2246      continue;      continue;
# Line 2045  for (code = first_significant_code(code Line 2261  for (code = first_significant_code(code
2261    
2262    if (c == OP_BRA  || c == OP_BRAPOS ||    if (c == OP_BRA  || c == OP_BRAPOS ||
2263        c == OP_CBRA || c == OP_CBRAPOS ||        c == OP_CBRA || c == OP_CBRAPOS ||
2264        c == OP_ONCE || c == OP_COND)        c == OP_ONCE || c == OP_ONCE_NC ||
2265          c == OP_COND)
2266      {      {
2267      BOOL empty_branch;      BOOL empty_branch;
2268      if (GET(code, 1) == 0) return TRUE;    /* Hit unclosed bracket */      if (GET(code, 1) == 0) return TRUE;    /* Hit unclosed bracket */
# Line 2079  for (code = first_significant_code(code Line 2296  for (code = first_significant_code(code
2296      {      {
2297      /* Check for quantifiers after a class. XCLASS is used for classes that      /* Check for quantifiers after a class. XCLASS is used for classes that
2298      cannot be represented just by a bit map. This includes negated single      cannot be represented just by a bit map. This includes negated single
2299      high-valued characters. The length in _pcre_OP_lengths[] is zero; the      high-valued characters. The length in PRIV(OP_lengths)[] is zero; the
2300      actual length is stored in the compiled code, so we must update "code"      actual length is stored in the compiled code, so we must update "code"
2301      here. */      here. */
2302    
# Line 2194  for (code = first_significant_code(code Line 2411  for (code = first_significant_code(code
2411      case OP_MINQUERYI:      case OP_MINQUERYI:
2412      case OP_POSQUERY:      case OP_POSQUERY:
2413      case OP_POSQUERYI:      case OP_POSQUERYI:
2414      if (utf8 && code[1] >= 0xc0) code += _pcre_utf8_table4[code[1] & 0x3f];      if (utf8 && code[1] >= 0xc0) code += PRIV(utf8_table4)[code[1] & 0x3f];
2415      break;      break;
2416    
2417      case OP_UPTO:      case OP_UPTO:
# Line 2203  for (code = first_significant_code(code Line 2420  for (code = first_significant_code(code
2420      case OP_MINUPTOI:      case OP_MINUPTOI:
2421      case OP_POSUPTO:      case OP_POSUPTO:
2422      case OP_POSUPTOI:      case OP_POSUPTOI:
2423      if (utf8 && code[3] >= 0xc0) code += _pcre_utf8_table4[code[3] & 0x3f];      if (utf8 && code[3] >= 0xc0) code += PRIV(utf8_table4)[code[3] & 0x3f];
2424      break;      break;
2425  #endif  #endif
2426    
# Line 2217  for (code = first_significant_code(code Line 2434  for (code = first_significant_code(code
2434      break;      break;
2435    
2436      case OP_THEN_ARG:      case OP_THEN_ARG:
2437      code += code[1+LINK_SIZE];      code += code[1];
2438      break;      break;
2439    
2440      /* None of the remaining opcodes are required to match a character. */      /* None of the remaining opcodes are required to match a character. */
# Line 2254  Returns:      TRUE if what is matched co Line 2471  Returns:      TRUE if what is matched co
2471  */  */
2472    
2473  static BOOL  static BOOL
2474  could_be_empty(const uschar *code, const uschar *endcode, branch_chain *bcptr,  could_be_empty(const pcre_uchar *code, const pcre_uchar *endcode,
2475    BOOL utf8, compile_data *cd)    branch_chain *bcptr, BOOL utf8, compile_data *cd)
2476  {  {
2477  while (bcptr != NULL && bcptr->current_branch >= code)  while (bcptr != NULL && bcptr->current_branch >= code)
2478    {    {
# Line 2295  I think. Line 2512  I think.
2512  A user pointed out that PCRE was rejecting [:a[:digit:]] whereas Perl was not.  A user pointed out that PCRE was rejecting [:a[:digit:]] whereas Perl was not.
2513  It seems that the appearance of a nested POSIX class supersedes an apparent  It seems that the appearance of a nested POSIX class supersedes an apparent
2514  external class. For example, [:a[:digit:]b:] matches "a", "b", ":", or  external class. For example, [:a[:digit:]b:] matches "a", "b", ":", or
2515  a digit. Also, unescaped square brackets may also appear as part of class  a digit.
2516  names. For example, [:a[:abc]b:] gives unknown class "[:abc]b:]"in Perl.  
2517    In Perl, unescaped square brackets may also appear as part of class names. For
2518    example, [:a[:abc]b:] gives unknown POSIX class "[:abc]b:]". However, for
2519    [:a[:abc]b][b:] it gives unknown POSIX class "[:abc]b][b:]", which does not
2520    seem right at all. PCRE does not allow closing square brackets in POSIX class
2521    names.
2522    
2523  Arguments:  Arguments:
2524    ptr      pointer to the initial [    ptr      pointer to the initial [
# Line 2306  Returns:   TRUE or FALSE Line 2528  Returns:   TRUE or FALSE
2528  */  */
2529    
2530  static BOOL  static BOOL
2531  check_posix_syntax(const uschar *ptr, const uschar **endptr)  check_posix_syntax(const pcre_uchar *ptr, const pcre_uchar **endptr)
2532  {  {
2533  int terminator;          /* Don't combine these lines; the Solaris cc */  int terminator;          /* Don't combine these lines; the Solaris cc */
2534  terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */  terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */
# Line 2314  for (++ptr; *ptr != 0; ptr++) Line 2536  for (++ptr; *ptr != 0; ptr++)
2536    {    {
2537    if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)    if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
2538      ptr++;      ptr++;
2539      else if (*ptr == CHAR_RIGHT_SQUARE_BRACKET) return FALSE;
2540    else    else
2541      {      {
2542      if (*ptr == terminator && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)      if (*ptr == terminator && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
# Line 2349  Returns:     a value representing the na Line 2572  Returns:     a value representing the na
2572  */  */
2573    
2574  static int  static int
2575  check_posix_name(const uschar *ptr, int len)  check_posix_name(const pcre_uchar *ptr, int len)
2576  {  {
2577  const char *pn = posix_names;  const char *pn = posix_names;
2578  register int yield = 0;  register int yield = 0;
2579  while (posix_name_lengths[yield] != 0)  while (posix_name_lengths[yield] != 0)
2580    {    {
2581    if (len == posix_name_lengths[yield] &&    if (len == posix_name_lengths[yield] &&
2582      strncmp((const char *)ptr, pn, len) == 0) return yield;      STRNCMP_UC_C8(ptr, pn, len) == 0) return yield;
2583    pn += posix_name_lengths[yield] + 1;    pn += posix_name_lengths[yield] + 1;
2584    yield++;    yield++;
2585    }    }
# Line 2396  Returns:     nothing Line 2619  Returns:     nothing
2619  */  */
2620    
2621  static void  static void
2622  adjust_recurse(uschar *group, int adjust, BOOL utf8, compile_data *cd,  adjust_recurse(pcre_uchar *group, int adjust, BOOL utf8, compile_data *cd,
2623    uschar *save_hwm)    pcre_uchar *save_hwm)
2624  {  {
2625  uschar *ptr = group;  pcre_uchar *ptr = group;
2626    
2627  while ((ptr = (uschar *)find_recurse(ptr, utf8)) != NULL)  while ((ptr = (pcre_uchar *)find_recurse(ptr, utf8)) != NULL)
2628    {    {
2629    int offset;    int offset;
2630    uschar *hc;    pcre_uchar *hc;
2631    
2632    /* See if this recursion is on the forward reference list. If so, adjust the    /* See if this recursion is on the forward reference list. If so, adjust the
2633    reference. */    reference. */
# Line 2449  Arguments: Line 2672  Arguments:
2672  Returns:         new code pointer  Returns:         new code pointer
2673  */  */
2674    
2675  static uschar *  static pcre_uchar *
2676  auto_callout(uschar *code, const uschar *ptr, compile_data *cd)  auto_callout(pcre_uchar *code, const pcre_uchar *ptr, compile_data *cd)
2677  {  {
2678  *code++ = OP_CALLOUT;  *code++ = OP_CALLOUT;
2679  *code++ = 255;  *code++ = 255;
2680  PUT(code, 0, (int)(ptr - cd->start_pattern));  /* Pattern offset */  PUT(code, 0, (int)(ptr - cd->start_pattern));  /* Pattern offset */
2681  PUT(code, LINK_SIZE, 0);                       /* Default length */  PUT(code, LINK_SIZE, 0);                       /* Default length */
2682  return code + 2*LINK_SIZE;  return code + 2 * LINK_SIZE;
2683  }  }
2684    
2685    
# Line 2478  Returns:             nothing Line 2701  Returns:             nothing
2701  */  */
2702    
2703  static void  static void
2704  complete_callout(uschar *previous_callout, const uschar *ptr, compile_data *cd)  complete_callout(pcre_uchar *previous_callout, const pcre_uchar *ptr, compile_data *cd)
2705  {  {
2706  int length = (int)(ptr - cd->start_pattern - GET(previous_callout, 2));  int length = (int)(ptr - cd->start_pattern - GET(previous_callout, 2));
2707  PUT(previous_callout, 2 + LINK_SIZE, length);  PUT(previous_callout, 2 + LINK_SIZE, length);
# Line 2561  switch(ptype) Line 2784  switch(ptype)
2784            prop->chartype == ucp_Lt) == negated;            prop->chartype == ucp_Lt) == negated;
2785    
2786    case PT_GC:    case PT_GC:
2787    return (pdata == _pcre_ucp_gentype[prop->chartype]) == negated;    return (pdata == PRIV(ucp_gentype)[prop->chartype]) == negated;
2788    
2789    case PT_PC:    case PT_PC:
2790    return (pdata == prop->chartype) == negated;    return (pdata == prop->chartype) == negated;
# Line 2572  switch(ptype) Line 2795  switch(ptype)
2795    /* These are specials */    /* These are specials */
2796    
2797    case PT_ALNUM:    case PT_ALNUM:
2798    return (_pcre_ucp_gentype[prop->chartype] == ucp_L ||    return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2799            _pcre_ucp_gentype[prop->chartype] == ucp_N) == negated;            PRIV(ucp_gentype)[prop->chartype] == ucp_N) == negated;
2800    
2801    case PT_SPACE:    /* Perl space */    case PT_SPACE:    /* Perl space */
2802    return (_pcre_ucp_gentype[prop->chartype] == ucp_Z ||    return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
2803            c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)            c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2804            == negated;            == negated;
2805    
2806    case PT_PXSPACE:  /* POSIX space */    case PT_PXSPACE:  /* POSIX space */
2807    return (_pcre_ucp_gentype[prop->chartype] == ucp_Z ||    return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
2808            c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||            c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2809            c == CHAR_FF || c == CHAR_CR)            c == CHAR_FF || c == CHAR_CR)
2810            == negated;            == negated;
2811    
2812    case PT_WORD:    case PT_WORD:
2813    return (_pcre_ucp_gentype[prop->chartype] == ucp_L ||    return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2814            _pcre_ucp_gentype[prop->chartype] == ucp_N ||            PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
2815            c == CHAR_UNDERSCORE) == negated;            c == CHAR_UNDERSCORE) == negated;
2816    }    }
2817  return FALSE;  return FALSE;
# Line 2616  Returns:        TRUE if possessifying is Line 2839  Returns:        TRUE if possessifying is
2839  */  */
2840    
2841  static BOOL  static BOOL
2842  check_auto_possessive(const uschar *previous, BOOL utf8, const uschar *ptr,  check_auto_possessive(const pcre_uchar *previous, BOOL utf8,
2843    int options, compile_data *cd)    const pcre_uchar *ptr, int options, compile_data *cd)
2844  {  {
2845  int c, next;  int c, next;
2846  int op_code = *previous++;  int op_code = *previous++;
# Line 2692  if ((options & PCRE_EXTENDED) != 0) Line 2915  if ((options & PCRE_EXTENDED) != 0)
2915  /* If the next thing is itself optional, we have to give up. */  /* If the next thing is itself optional, we have to give up. */
2916    
2917  if (*ptr == CHAR_ASTERISK || *ptr == CHAR_QUESTION_MARK ||  if (*ptr == CHAR_ASTERISK || *ptr == CHAR_QUESTION_MARK ||
2918    strncmp((char *)ptr, STR_LEFT_CURLY_BRACKET STR_0 STR_COMMA, 3) == 0)    STRNCMP_UC_C8(ptr, STR_LEFT_CURLY_BRACKET STR_0 STR_COMMA, 3) == 0)
2919      return FALSE;      return FALSE;
2920    
2921  /* Now compare the next item with the previous opcode. First, handle cases when  /* Now compare the next item with the previous opcode. First, handle cases when
# Line 2954  switch(op_code) Line 3177  switch(op_code)
3177        to the original \d etc. At this point, ptr will point to a zero byte. */        to the original \d etc. At this point, ptr will point to a zero byte. */
3178    
3179        if (*ptr == CHAR_ASTERISK || *ptr == CHAR_QUESTION_MARK ||        if (*ptr == CHAR_ASTERISK || *ptr == CHAR_QUESTION_MARK ||
3180          strncmp((char *)ptr, STR_LEFT_CURLY_BRACKET STR_0 STR_COMMA, 3) == 0)          STRNCMP_UC_C8(ptr, STR_LEFT_CURLY_BRACKET STR_0 STR_COMMA, 3) == 0)
3181            return FALSE;            return FALSE;
3182    
3183        /* Do the property check. */        /* Do the property check. */
# Line 3045  Returns:         TRUE on success Line 3268  Returns:         TRUE on success
3268  */  */
3269    
3270  static BOOL  static BOOL
3271  compile_branch(int *optionsptr, uschar **codeptr, const uschar **ptrptr,  compile_branch(int *optionsptr, pcre_uchar **codeptr,
3272    int *errorcodeptr, int *firstbyteptr, int *reqbyteptr, branch_chain *bcptr,    const pcre_uchar **ptrptr, int *errorcodeptr, int *firstbyteptr,
3273    int cond_depth, compile_data *cd, int *lengthptr)    int *reqbyteptr, branch_chain *bcptr, int cond_depth, compile_data *cd,
3274      int *lengthptr)
3275  {  {
3276  int repeat_type, op_type;  int repeat_type, op_type;
3277  int repeat_min = 0, repeat_max = 0;      /* To please picky compilers */  int repeat_min = 0, repeat_max = 0;      /* To please picky compilers */
# Line 3060  int options = *optionsptr; Line 3284  int options = *optionsptr;
3284  int after_manual_callout = 0;  int after_manual_callout = 0;
3285  int length_prevgroup = 0;  int length_prevgroup = 0;
3286  register int c;  register int c;
3287  register uschar *code = *codeptr;  register pcre_uchar *code = *codeptr;
3288  uschar *last_code = code;  pcre_uchar *last_code = code;
3289  uschar *orig_code = code;  pcre_uchar *orig_code = code;
3290  uschar *tempcode;  pcre_uchar *tempcode;
3291  BOOL inescq = FALSE;  BOOL inescq = FALSE;
3292  BOOL groupsetfirstbyte = FALSE;  BOOL groupsetfirstbyte = FALSE;
3293  const uschar *ptr = *ptrptr;  const pcre_uchar *ptr = *ptrptr;
3294  const uschar *tempptr;  const pcre_uchar *tempptr;
3295  const uschar *nestptr = NULL;  const pcre_uchar *nestptr = NULL;
3296  uschar *previous = NULL;  pcre_uchar *previous = NULL;
3297  uschar *previous_callout = NULL;  pcre_uchar *previous_callout = NULL;
3298  uschar *save_hwm = NULL;  pcre_uchar *save_hwm = NULL;
3299  uschar classbits[32];  pcre_uchar classbits[32];
3300    
3301  /* We can fish out the UTF-8 setting once and for all into a BOOL, but we  /* We can fish out the UTF-8 setting once and for all into a BOOL, but we
3302  must not do this for other options (e.g. PCRE_EXTENDED) because they may change  must not do this for other options (e.g. PCRE_EXTENDED) because they may change
# Line 3081  dynamically as we process the pattern. * Line 3305  dynamically as we process the pattern. *
3305  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
3306  BOOL class_utf8;  BOOL class_utf8;
3307  BOOL utf8 = (options & PCRE_UTF8) != 0;  BOOL utf8 = (options & PCRE_UTF8) != 0;
3308  uschar *class_utf8data;  pcre_uint8 *class_utf8data;
3309  uschar *class_utf8data_base;  pcre_uint8 *class_utf8data_base;
3310  uschar utf8_char[6];  pcre_uint8 utf8_char[6];
3311  #else  #else
3312  BOOL utf8 = FALSE;  BOOL utf8 = FALSE;
 uschar *utf8_char = NULL;  
3313  #endif  #endif
3314    
3315  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
# Line 3137  for (;; ptr++) Line 3360  for (;; ptr++)
3360    int subfirstbyte;    int subfirstbyte;
3361    int terminator;    int terminator;
3362    int mclength;    int mclength;
3363    uschar mcbuffer[8];    int tempbracount;
3364      pcre_uchar mcbuffer[8];
3365    
3366    /* Get next byte in the pattern */    /* Get next byte in the pattern */
3367    
# Line 3184  for (;; ptr++) Line 3408  for (;; ptr++)
3408        }        }
3409    
3410      *lengthptr += (int)(code - last_code);      *lengthptr += (int)(code - last_code);
3411      DPRINTF(("length=%d added %d c=%c\n", *lengthptr, code - last_code, c));      DPRINTF(("length=%d added %d c=%c\n", *lengthptr, (int)(code - last_code),
3412          c));
3413    
3414      /* If "previous" is set and it is not at the start of the work space, move      /* If "previous" is set and it is not at the start of the work space, move
3415      it back to there, in order to avoid filling up the work space. Otherwise,      it back to there, in order to avoid filling up the work space. Otherwise,
# Line 3194  for (;; ptr++) Line 3419  for (;; ptr++)
3419        {        {
3420        if (previous > orig_code)        if (previous > orig_code)
3421          {          {
3422          memmove(orig_code, previous, code - previous);          memmove(orig_code, previous, IN_UCHARS(code - previous));
3423          code -= previous - orig_code;          code -= previous - orig_code;
3424          previous = orig_code;          previous = orig_code;
3425          }          }
# Line 3392  for (;; ptr++) Line 3617  for (;; ptr++)
3617          {          {
3618          if (ptr[1] == CHAR_E)          if (ptr[1] == CHAR_E)
3619            ptr++;            ptr++;
3620          else if (strncmp((const char *)ptr+1,          else if (STRNCMP_UC_C8(ptr + 1,
3621                            STR_Q STR_BACKSLASH STR_E, 3) == 0)                            STR_Q STR_BACKSLASH STR_E, 3) == 0)
3622            ptr += 3;            ptr += 3;
3623          else          else
# Line 3435  for (;; ptr++) Line 3660  for (;; ptr++)
3660      than 256), because in that case the compiled code doesn't use the bit map.      than 256), because in that case the compiled code doesn't use the bit map.
3661      */      */
3662    
3663      memset(classbits, 0, 32 * sizeof(uschar));      memset(classbits, 0, 32 * sizeof(pcre_uint8));
3664    
3665  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
3666      class_utf8 = FALSE;                       /* No chars >= 256 */      class_utf8 = FALSE;                       /* No chars >= 256 */
# Line 3449  for (;; ptr++) Line 3674  for (;; ptr++)
3674    
3675      if (c != 0) do      if (c != 0) do
3676        {        {
3677        const uschar *oldptr;        const pcre_uchar *oldptr;
3678    
3679  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
3680        if (utf8 && c > 127)        if (utf8 && c > 127)
# Line 3495  for (;; ptr++) Line 3720  for (;; ptr++)
3720          {          {
3721          BOOL local_negate = FALSE;          BOOL local_negate = FALSE;
3722          int posix_class, taboffset, tabopt;          int posix_class, taboffset, tabopt;
3723          register const uschar *cbits = cd->cbits;          register const pcre_uint8 *cbits = cd->cbits;
3724          uschar pbits[32];          pcre_uint8 pbits[32];
3725    
3726          if (ptr[1] != CHAR_COLON)          if (ptr[1] != CHAR_COLON)
3727            {            {
# Line 3551  for (;; ptr++) Line 3776  for (;; ptr++)
3776          /* Copy in the first table (always present) */          /* Copy in the first table (always present) */
3777    
3778          memcpy(pbits, cbits + posix_class_maps[posix_class],          memcpy(pbits, cbits + posix_class_maps[posix_class],
3779            32 * sizeof(uschar));            32 * sizeof(pcre_uint8));
3780    
3781          /* If there is a second table, add or remove it as required. */          /* If there is a second table, add or remove it as required. */
3782    
# Line 3613  for (;; ptr++) Line 3838  for (;; ptr++)
3838    
3839          if (c < 0)          if (c < 0)
3840            {            {
3841            register const uschar *cbits = cd->cbits;            register const pcre_uint8 *cbits = cd->cbits;
3842            class_charcount += 2;     /* Greater than 1 is what matters */            class_charcount += 2;     /* Greater than 1 is what matters */
3843    
3844            switch (-c)            switch (-c)
# Line 3673  for (;; ptr++) Line 3898  for (;; ptr++)
3898                {                {
3899                class_utf8 = TRUE;                class_utf8 = TRUE;
3900                *class_utf8data++ = XCL_SINGLE;                *class_utf8data++ = XCL_SINGLE;
3901                class_utf8data += _pcre_ord2utf8(0x1680, class_utf8data);                class_utf8data += PRIV(ord2utf8)(0x1680, class_utf8data);
3902                *class_utf8data++ = XCL_SINGLE;                *class_utf8data++ = XCL_SINGLE;
3903                class_utf8data += _pcre_ord2utf8(0x180e, class_utf8data);                class_utf8data += PRIV(ord2utf8)(0x180e, class_utf8data);
3904                *class_utf8data++ = XCL_RANGE;                *class_utf8data++ = XCL_RANGE;
3905                class_utf8data += _pcre_ord2utf8(0x2000, class_utf8data);                class_utf8data += PRIV(ord2utf8)(0x2000, class_utf8data);
3906                class_utf8data += _pcre_ord2utf8(0x200A, class_utf8data);                class_utf8data += PRIV(ord2utf8)(0x200A, class_utf8data);
3907                *class_utf8data++ = XCL_SINGLE;                *class_utf8data++ = XCL_SINGLE;
3908                class_utf8data += _pcre_ord2utf8(0x202f, class_utf8data);                class_utf8data += PRIV(ord2utf8)(0x202f, class_utf8data);
3909                *class_utf8data++ = XCL_SINGLE;                *class_utf8data++ = XCL_SINGLE;
3910                class_utf8data += _pcre_ord2utf8(0x205f, class_utf8data);                class_utf8data += PRIV(ord2utf8)(0x205f, class_utf8data);
3911                *class_utf8data++ = XCL_SINGLE;                *class_utf8data++ = XCL_SINGLE;
3912                class_utf8data += _pcre_ord2utf8(0x3000, class_utf8data);                class_utf8data += PRIV(ord2utf8)(0x3000, class_utf8data);
3913                }                }
3914  #endif  #endif
3915              continue;              continue;
# Line 3708  for (;; ptr++) Line 3933  for (;; ptr++)
3933                {                {
3934                class_utf8 = TRUE;                class_utf8 = TRUE;
3935                *class_utf8data++ = XCL_RANGE;                *class_utf8data++ = XCL_RANGE;
3936                class_utf8data += _pcre_ord2utf8(0x0100, class_utf8data);                class_utf8data += PRIV(ord2utf8)(0x0100, class_utf8data);
3937                class_utf8data += _pcre_ord2utf8(0x167f, class_utf8data);                class_utf8data += PRIV(ord2utf8)(0x167f, class_utf8data);
3938                *class_utf8data++ = XCL_RANGE;                *class_utf8data++ = XCL_RANGE;
3939                class_utf8data += _pcre_ord2utf8(0x1681, class_utf8data);                class_utf8data += PRIV(ord2utf8)(0x1681, class_utf8data);
3940                class_utf8data += _pcre_ord2utf8(0x180d, class_utf8data);                class_utf8data += PRIV(ord2utf8)(0x180d, class_utf8data);
3941                *class_utf8data++ = XCL_RANGE;                *class_utf8data++ = XCL_RANGE;
3942                class_utf8data += _pcre_ord2utf8(0x180f, class_utf8data);                class_utf8data += PRIV(ord2utf8)(0x180f, class_utf8data);
3943                class_utf8data += _pcre_ord2utf8(0x1fff, class_utf8data);                class_utf8data += PRIV(ord2utf8)(0x1fff, class_utf8data);
3944                *class_utf8data++ = XCL_RANGE;                *class_utf8data++ = XCL_RANGE;
3945                class_utf8data += _pcre_ord2utf8(0x200B, class_utf8data);                class_utf8data += PRIV(ord2utf8)(0x200B, class_utf8data);
3946                class_utf8data += _pcre_ord2utf8(0x202e, class_utf8data);                class_utf8data += PRIV(ord2utf8)(0x202e, class_utf8data);
3947                *class_utf8data++ = XCL_RANGE;                *class_utf8data++ = XCL_RANGE;
3948                class_utf8data += _pcre_ord2utf8(0x2030, class_utf8data);                class_utf8data += PRIV(ord2utf8)(0x2030, class_utf8data);
3949                class_utf8data += _pcre_ord2utf8(0x205e, class_utf8data);                class_utf8data += PRIV(ord2utf8)(0x205e, class_utf8data);
3950                *class_utf8data++ = XCL_RANGE;                *class_utf8data++ = XCL_RANGE;
3951                class_utf8data += _pcre_ord2utf8(0x2060, class_utf8data);                class_utf8data += PRIV(ord2utf8)(0x2060, class_utf8data);
3952                class_utf8data += _pcre_ord2utf8(0x2fff, class_utf8data);                class_utf8data += PRIV(ord2utf8)(0x2fff, class_utf8data);
3953                *class_utf8data++ = XCL_RANGE;                *class_utf8data++ = XCL_RANGE;
3954                class_utf8data += _pcre_ord2utf8(0x3001, class_utf8data);                class_utf8data += PRIV(ord2utf8)(0x3001, class_utf8data);
3955                class_utf8data += _pcre_ord2utf8(0x7fffffff, class_utf8data);                class_utf8data += PRIV(ord2utf8)(0x7fffffff, class_utf8data);
3956                }                }
3957  #endif  #endif
3958              continue;              continue;
# Line 3743  for (;; ptr++) Line 3968  for (;; ptr++)
3968                {                {
3969                class_utf8 = TRUE;                class_utf8 = TRUE;
3970                *class_utf8data++ = XCL_RANGE;                *class_utf8data++ = XCL_RANGE;
3971                class_utf8data += _pcre_ord2utf8(0x2028, class_utf8data);                class_utf8data += PRIV(ord2utf8)(0x2028, class_utf8data);
3972                class_utf8data += _pcre_ord2utf8(0x2029, class_utf8data);                class_utf8data += PRIV(ord2utf8)(0x2029, class_utf8data);
3973                }                }
3974  #endif  #endif
3975              continue;              continue;
# Line 3771  for (;; ptr++) Line 3996  for (;; ptr++)
3996                {                {
3997                class_utf8 = TRUE;                class_utf8 = TRUE;
3998                *class_utf8data++ = XCL_RANGE;                *class_utf8data++ = XCL_RANGE;
3999                class_utf8data += _pcre_ord2utf8(0x0100, class_utf8data);                class_utf8data += PRIV(ord2utf8)(0x0100, class_utf8data);
4000                class_utf8data += _pcre_ord2utf8(0x2027, class_utf8data);                class_utf8data += PRIV(ord2utf8)(0x2027, class_utf8data);
4001                *class_utf8data++ = XCL_RANGE;                *class_utf8data++ = XCL_RANGE;
4002                class_utf8data += _pcre_ord2utf8(0x2029, class_utf8data);                class_utf8data += PRIV(ord2utf8)(0x2029, class_utf8data);
4003                class_utf8data += _pcre_ord2utf8(0x7fffffff, class_utf8data);                class_utf8data += PRIV(ord2utf8)(0x7fffffff, class_utf8data);
4004                }                }
4005  #endif  #endif
4006              continue;              continue;
# Line 3953  for (;; ptr++) Line 4178  for (;; ptr++)
4178                else                else
4179                  {                  {
4180                  *class_utf8data++ = XCL_RANGE;                  *class_utf8data++ = XCL_RANGE;
4181                  class_utf8data += _pcre_ord2utf8(occ, class_utf8data);                  class_utf8data += PRIV(ord2utf8)(occ, class_utf8data);
4182                  }                  }
4183                class_utf8data += _pcre_ord2utf8(ocd, class_utf8data);                class_utf8data += PRIV(ord2utf8)(ocd, class_utf8data);
4184                }                }
4185              }              }
4186  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
# Line 3964  for (;; ptr++) Line 4189  for (;; ptr++)
4189            overlapping ranges. */            overlapping ranges. */
4190    
4191            *class_utf8data++ = XCL_RANGE;            *class_utf8data++ = XCL_RANGE;
4192            class_utf8data += _pcre_ord2utf8(c, class_utf8data);            class_utf8data += PRIV(ord2utf8)(c, class_utf8data);
4193            class_utf8data += _pcre_ord2utf8(d, class_utf8data);            class_utf8data += PRIV(ord2utf8)(d, class_utf8data);
4194    
4195            /* With UCP support, we are done. Without UCP support, there is no            /* With UCP support, we are done. Without UCP support, there is no
4196            caseless matching for UTF-8 characters > 127; we can use the bit map            caseless matching for UTF-8 characters > 127; we can use the bit map
# Line 4019  for (;; ptr++) Line 4244  for (;; ptr++)
4244          {          {
4245          class_utf8 = TRUE;          class_utf8 = TRUE;
4246          *class_utf8data++ = XCL_SINGLE;          *class_utf8data++ = XCL_SINGLE;
4247          class_utf8data += _pcre_ord2utf8(c, class_utf8data);          class_utf8data += PRIV(ord2utf8)(c, class_utf8data);
4248    
4249  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
4250          if ((options & PCRE_CASELESS) != 0)          if ((options & PCRE_CASELESS) != 0)
# Line 4028  for (;; ptr++) Line 4253  for (;; ptr++)
4253            if ((othercase = UCD_OTHERCASE(c)) != c)            if ((othercase = UCD_OTHERCASE(c)) != c)
4254              {              {
4255              *class_utf8data++ = XCL_SINGLE;              *class_utf8data++ = XCL_SINGLE;
4256              class_utf8data += _pcre_ord2utf8(othercase, class_utf8data);              class_utf8data += PRIV(ord2utf8)(othercase, class_utf8data);
4257              }              }
4258            }            }
4259  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
# Line 4109  for (;; ptr++) Line 4334  for (;; ptr++)
4334    
4335  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
4336        if (utf8 && class_lastchar > 127)        if (utf8 && class_lastchar > 127)
4337          mclength = _pcre_ord2utf8(class_lastchar, mcbuffer);          mclength = PRIV(ord2utf8)(class_lastchar, mcbuffer);
4338        else        else
4339  #endif  #endif
4340          {          {
# Line 4256  for (;; ptr++) Line 4481  for (;; ptr++)
4481      past, but it no longer happens for non-repeated recursions. In fact, the      past, but it no longer happens for non-repeated recursions. In fact, the
4482      repeated ones could be re-implemented independently so as not to need this,      repeated ones could be re-implemented independently so as not to need this,
4483      but for the moment we rely on the code for repeating groups. */      but for the moment we rely on the code for repeating groups. */
4484    
4485      if (*previous == OP_RECURSE)      if (*previous == OP_RECURSE)
4486        {        {
4487        memmove(previous + 1 + LINK_SIZE, previous, 1 + LINK_SIZE);        memmove(previous + 1 + LINK_SIZE, previous, IN_UCHARS(1 + LINK_SIZE));
4488        *previous = OP_ONCE;        *previous = OP_ONCE;
4489        PUT(previous, 1, 2 + 2*LINK_SIZE);        PUT(previous, 1, 2 + 2*LINK_SIZE);
4490        previous[2 + 2*LINK_SIZE] = OP_KET;        previous[2 + 2*LINK_SIZE] = OP_KET;
# Line 4298  for (;; ptr++) Line 4523  for (;; ptr++)
4523  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
4524        if (utf8 && (code[-1] & 0x80) != 0)        if (utf8 && (code[-1] & 0x80) != 0)
4525          {          {
4526          uschar *lastchar = code - 1;          pcre_uchar *lastchar = code - 1;
4527          while((*lastchar & 0xc0) == 0x80) lastchar--;          while((*lastchar & 0xc0) == 0x80) lastchar--;
4528          c = code - lastchar;            /* Length of UTF-8 character */          c = code - lastchar;            /* Length of UTF-8 character */
4529          memcpy(utf8_char, lastchar, c); /* Save the char */          memcpy(utf8_char, lastchar, c); /* Save the char */
# Line 4360  for (;; ptr++) Line 4585  for (;; ptr++)
4585    
4586      else if (*previous < OP_EODN)      else if (*previous < OP_EODN)
4587        {        {
4588        uschar *oldcode;        pcre_uchar *oldcode;
4589        int prop_type, prop_value;        int prop_type, prop_value;
4590        op_type = OP_TYPESTAR - OP_STAR;  /* Use type opcodes */        op_type = OP_TYPESTAR - OP_STAR;  /* Use type opcodes */
4591        c = *previous;        c = *previous;
# Line 4582  for (;; ptr++) Line 4807  for (;; ptr++)
4807        {        {
4808        register int i;        register int i;
4809        int len = (int)(code - previous);        int len = (int)(code - previous);
4810        uschar *bralink = NULL;        pcre_uchar *bralink = NULL;
4811        uschar *brazeroptr = NULL;        pcre_uchar *brazeroptr = NULL;
4812    
4813        /* Repeating a DEFINE group is pointless, but Perl allows the syntax, so        /* Repeating a DEFINE group is pointless, but Perl allows the syntax, so
4814        we just ignore the repeat. */        we just ignore the repeat. */
# Line 4637  for (;; ptr++) Line 4862  for (;; ptr++)
4862            {            {
4863            *code = OP_END;            *code = OP_END;
4864            adjust_recurse(previous, 1, utf8, cd, save_hwm);            adjust_recurse(previous, 1, utf8, cd, save_hwm);
4865            memmove(previous+1, previous, len);            memmove(previous + 1, previous, IN_UCHARS(len));
4866            code++;            code++;
4867            if (repeat_max == 0)            if (repeat_max == 0)
4868              {              {
# Line 4661  for (;; ptr++) Line 4886  for (;; ptr++)
4886            int offset;            int offset;
4887            *code = OP_END;            *code = OP_END;
4888            adjust_recurse(previous, 2 + LINK_SIZE, utf8, cd, save_hwm);            adjust_recurse(previous, 2 + LINK_SIZE, utf8, cd, save_hwm);
4889            memmove(previous + 2 + LINK_SIZE, previous, len);            memmove(previous + 2 + LINK_SIZE, previous, IN_UCHARS(len));
4890            code += 2 + LINK_SIZE;            code += 2 + LINK_SIZE;
4891            *previous++ = OP_BRAZERO + repeat_type;            *previous++ = OP_BRAZERO + repeat_type;
4892            *previous++ = OP_BRA;            *previous++ = OP_BRA;
# Line 4714  for (;; ptr++) Line 4939  for (;; ptr++)
4939              if (groupsetfirstbyte && reqbyte < 0) reqbyte = firstbyte;              if (groupsetfirstbyte && reqbyte < 0) reqbyte = firstbyte;
4940              for (i = 1; i < repeat_min; i++)              for (i = 1; i < repeat_min; i++)
4941                {                {
4942                uschar *hc;                pcre_uchar *hc;
4943                uschar *this_hwm = cd->hwm;                pcre_uchar *this_hwm = cd->hwm;
4944                memcpy(code, previous, len);                memcpy(code, previous, IN_UCHARS(len));
4945                for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE)                for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE)
4946                  {                  {
4947                  PUT(cd->hwm, 0, GET(hc, 0) + len);                  PUT(cd->hwm, 0, GET(hc, 0) + len);
# Line 4766  for (;; ptr++) Line 4991  for (;; ptr++)
4991    
4992          else for (i = repeat_max - 1; i >= 0; i--)          else for (i = repeat_max - 1; i >= 0; i--)
4993            {            {
4994            uschar *hc;            pcre_uchar *hc;
4995            uschar *this_hwm = cd->hwm;            pcre_uchar *this_hwm = cd->hwm;
4996    
4997            *code++ = OP_BRAZERO + repeat_type;            *code++ = OP_BRAZERO + repeat_type;
4998    
# Line 4783  for (;; ptr++) Line 5008  for (;; ptr++)
5008              PUTINC(code, 0, offset);              PUTINC(code, 0, offset);
5009              }              }
5010    
5011            memcpy(code, previous, len);            memcpy(code, previous, IN_UCHARS(len));
5012            for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE)            for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE)
5013              {              {
5014              PUT(cd->hwm, 0, GET(hc, 0) + len + ((i != 0)? 2+LINK_SIZE : 1));              PUT(cd->hwm, 0, GET(hc, 0) + len + ((i != 0)? 2+LINK_SIZE : 1));
# Line 4800  for (;; ptr++) Line 5025  for (;; ptr++)
5025            {            {
5026            int oldlinkoffset;            int oldlinkoffset;
5027            int offset = (int)(code - bralink + 1);            int offset = (int)(code - bralink + 1);
5028            uschar *bra = code - offset;            pcre_uchar *bra = code - offset;
5029            oldlinkoffset = GET(bra, 1);            oldlinkoffset = GET(bra, 1);
5030            bralink = (oldlinkoffset == 0)? NULL : bralink - oldlinkoffset;            bralink = (oldlinkoffset == 0)? NULL : bralink - oldlinkoffset;
5031            *code++ = OP_KET;            *code++ = OP_KET;
# Line 4814  for (;; ptr++) Line 5039  for (;; ptr++)
5039        ONCE brackets can be converted into non-capturing brackets, as the        ONCE brackets can be converted into non-capturing brackets, as the
5040        behaviour of (?:xx)++ is the same as (?>xx)++ and this saves having to        behaviour of (?:xx)++ is the same as (?>xx)++ and this saves having to
5041        deal with possessive ONCEs specially.        deal with possessive ONCEs specially.
5042    
5043        Otherwise, if the quantifier was possessive, we convert the BRA code to        Otherwise, when we are doing the actual compile phase, check to see
5044        the POS form, and the KET code to KETRPOS. (It turns out to be convenient        whether this group is one that could match an empty string. If so,
5045        at runtime to detect this kind of subpattern at both the start and at the        convert the initial operator to the S form (e.g. OP_BRA -> OP_SBRA) so
5046        end.) The use of special opcodes makes it possible to reduce greatly the        that runtime checking can be done. [This check is also applied to ONCE
5047        stack usage in pcre_exec(). If the group is preceded by OP_BRAZERO,        groups at runtime, but in a different way.]
5048        convert this to OP_BRAPOSZERO. Then cancel the possessive flag so that  
5049        the default action below, of wrapping everything inside atomic brackets,        Then, if the quantifier was possessive and the bracket is not a
5050        does not happen.        conditional, we convert the BRA code to the POS form, and the KET code to
5051          KETRPOS. (It turns out to be convenient at runtime to detect this kind of
5052        Then, when we are doing the actual compile phase, check to see whether        subpattern at both the start and at the end.) The use of special opcodes
5053        this group is one that could match an empty string. If so, convert the        makes it possible to reduce greatly the stack usage in pcre_exec(). If
5054        initial operator to the S form (e.g. OP_BRA -> OP_SBRA) so that runtime        the group is preceded by OP_BRAZERO, convert this to OP_BRAPOSZERO.
5055        checking can be done. [This check is also applied to ONCE groups at  
5056        runtime, but in a different way.] */        Then, if the minimum number of matches is 1 or 0, cancel the possessive
5057          flag so that the default action below, of wrapping everything inside
5058          atomic brackets, does not happen. When the minimum is greater than 1,
5059          there will be earlier copies of the group, and so we still have to wrap
5060          the whole thing. */
5061    
5062        else        else
5063          {          {
5064          uschar *ketcode = code - 1 - LINK_SIZE;          pcre_uchar *ketcode = code - 1 - LINK_SIZE;
5065          uschar *bracode = ketcode - GET(ketcode, 1);          pcre_uchar *bracode = ketcode - GET(ketcode, 1);
5066    
5067          if (*bracode == OP_ONCE && possessive_quantifier) *bracode = OP_BRA;          /* Convert possessive ONCE brackets to non-capturing */
5068          if (*bracode == OP_ONCE)  
5069            if ((*bracode == OP_ONCE || *bracode == OP_ONCE_NC) &&
5070                possessive_quantifier) *bracode = OP_BRA;
5071    
5072            /* For non-possessive ONCE brackets, all we need to do is to
5073            set the KET. */
5074    
5075            if (*bracode == OP_ONCE || *bracode == OP_ONCE_NC)
5076            *ketcode = OP_KETRMAX + repeat_type;            *ketcode = OP_KETRMAX + repeat_type;
5077    
5078            /* Handle non-ONCE brackets and possessive ONCEs (which have been
5079            converted to non-capturing above). */
5080    
5081          else          else
5082            {            {
5083            if (possessive_quantifier)            /* In the compile phase, check for empty string matching. */
5084              {  
             *bracode += 1;                   /* Switch to xxxPOS opcodes */  
             *ketcode = OP_KETRPOS;  
             if (brazeroptr != NULL) *brazeroptr = OP_BRAPOSZERO;  
             possessive_quantifier = FALSE;  
             }  
           else *ketcode = OP_KETRMAX + repeat_type;  
   
5085            if (lengthptr == NULL)            if (lengthptr == NULL)
5086              {              {
5087              uschar *scode = bracode;              pcre_uchar *scode = bracode;
5088              do              do
5089                {                {
5090                if (could_be_empty_branch(scode, ketcode, utf8, cd))                if (could_be_empty_branch(scode, ketcode, utf8, cd))
# Line 4863  for (;; ptr++) Line 5096  for (;; ptr++)
5096                }                }
5097              while (*scode == OP_ALT);              while (*scode == OP_ALT);
5098              }              }
5099    
5100              /* Handle possessive quantifiers. */
5101    
5102              if (possessive_quantifier)
5103                {
5104                /* For COND brackets, we wrap the whole thing in a possessively
5105                repeated non-capturing bracket, because we have not invented POS
5106                versions of the COND opcodes. Because we are moving code along, we
5107                must ensure that any pending recursive references are updated. */
5108    
5109                if (*bracode == OP_COND || *bracode == OP_SCOND)
5110                  {
5111                  int nlen = (int)(code - bracode);
5112                  *code = OP_END;
5113                  adjust_recurse(bracode, 1 + LINK_SIZE, utf8, cd, save_hwm);
5114                  memmove(bracode + 1 + LINK_SIZE, bracode, IN_UCHARS(nlen));
5115                  code += 1 + LINK_SIZE;
5116                  nlen += 1 + LINK_SIZE;
5117                  *bracode = OP_BRAPOS;
5118                  *code++ = OP_KETRPOS;
5119                  PUTINC(code, 0, nlen);
5120                  PUT(bracode, 1, nlen);
5121                  }
5122    
5123                /* For non-COND brackets, we modify the BRA code and use KETRPOS. */
5124    
5125                else
5126                  {
5127                  *bracode += 1;              /* Switch to xxxPOS opcodes */
5128                  *ketcode = OP_KETRPOS;
5129                  }
5130    
5131                /* If the minimum is zero, mark it as possessive, then unset the
5132                possessive flag when the minimum is 0 or 1. */
5133    
5134                if (brazeroptr != NULL) *brazeroptr = OP_BRAPOSZERO;
5135                if (repeat_min < 2) possessive_quantifier = FALSE;
5136                }
5137    
5138              /* Non-possessive quantifier */
5139    
5140              else *ketcode = OP_KETRMAX + repeat_type;
5141            }            }
5142          }          }
5143        }        }
# Line 4889  for (;; ptr++) Line 5164  for (;; ptr++)
5164      notation is just syntactic sugar, taken from Sun's Java package, but the      notation is just syntactic sugar, taken from Sun's Java package, but the
5165      special opcodes can optimize it.      special opcodes can optimize it.
5166    
5167      Possessively repeated subpatterns have already been handled in the code      Some (but not all) possessively repeated subpatterns have already been
5168      just above, so possessive_quantifier is always FALSE for them at this      completely handled in the code just above. For them, possessive_quantifier
5169      stage.      is always FALSE at this stage.
5170    
5171      Note that the repeated item starts at tempcode, not at previous, which      Note that the repeated item starts at tempcode, not at previous, which
5172      might be the first part of a string whose (former) last char we repeated.      might be the first part of a string whose (former) last char we repeated.
# Line 4905  for (;; ptr++) Line 5180  for (;; ptr++)
5180        int len;        int len;
5181    
5182        if (*tempcode == OP_TYPEEXACT)        if (*tempcode == OP_TYPEEXACT)
5183          tempcode += _pcre_OP_lengths[*tempcode] +          tempcode += PRIV(OP_lengths)[*tempcode] +
5184            ((tempcode[3] == OP_PROP || tempcode[3] == OP_NOTPROP)? 2 : 0);            ((tempcode[3] == OP_PROP || tempcode[3] == OP_NOTPROP)? 2 : 0);
5185    
5186        else if (*tempcode == OP_EXACT || *tempcode == OP_NOTEXACT)        else if (*tempcode == OP_EXACT || *tempcode == OP_NOTEXACT)
5187          {          {
5188          tempcode += _pcre_OP_lengths[*tempcode];          tempcode += PRIV(OP_lengths)[*tempcode];
5189  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
5190          if (utf8 && tempcode[-1] >= 0xc0)          if (utf8 && tempcode[-1] >= 0xc0)
5191            tempcode += _pcre_utf8_table4[tempcode[-1] & 0x3f];            tempcode += PRIV(utf8_table4)[tempcode[-1] & 0x3f];
5192  #endif  #endif
5193          }          }
5194    
# Line 4951  for (;; ptr++) Line 5226  for (;; ptr++)
5226          default:          default:
5227          *code = OP_END;          *code = OP_END;
5228          adjust_recurse(tempcode, 1 + LINK_SIZE, utf8, cd, save_hwm);          adjust_recurse(tempcode, 1 + LINK_SIZE, utf8, cd, save_hwm);
5229          memmove(tempcode + 1+LINK_SIZE, tempcode, len);          memmove(tempcode + 1 + LINK_SIZE, tempcode, IN_UCHARS(len));
5230          code += 1 + LINK_SIZE;          code += 1 + LINK_SIZE;
5231          len += 1 + LINK_SIZE;          len += 1 + LINK_SIZE;
5232          tempcode[0] = OP_ONCE;          tempcode[0] = OP_ONCE;
# Line 4992  for (;; ptr++) Line 5267  for (;; ptr++)
5267        int i, namelen;        int i, namelen;
5268        int arglen = 0;        int arglen = 0;
5269        const char *vn = verbnames;        const char *vn = verbnames;
5270        const uschar *name = ptr + 1;        const pcre_uchar *name = ptr + 1;
5271        const uschar *arg = NULL;        const pcre_uchar *arg = NULL;
5272        previous = NULL;        previous = NULL;
5273        while ((cd->ctypes[*++ptr] & ctype_letter) != 0) {};        while ((cd->ctypes[*++ptr] & ctype_letter) != 0) {};
5274        namelen = (int)(ptr - name);        namelen = (int)(ptr - name);
# Line 5020  for (;; ptr++) Line 5295  for (;; ptr++)
5295        for (i = 0; i < verbcount; i++)        for (i = 0; i < verbcount; i++)
5296          {          {
5297          if (namelen == verbs[i].len &&          if (namelen == verbs[i].len &&
5298              strncmp((char *)name, vn, namelen) == 0)              STRNCMP_UC_C8(name, vn, namelen) == 0)
5299            {            {
5300            /* Check for open captures before ACCEPT and convert it to            /* Check for open captures before ACCEPT and convert it to
5301            ASSERT_ACCEPT if in an assertion. */            ASSERT_ACCEPT if in an assertion. */
# Line 5040  for (;; ptr++) Line 5315  for (;; ptr++)
5315                PUT2INC(code, 0, oc->number);                PUT2INC(code, 0, oc->number);
5316                }                }
5317              *code++ = (cd->assert_depth > 0)? OP_ASSERT_ACCEPT : OP_ACCEPT;              *code++ = (cd->assert_depth > 0)? OP_ASSERT_ACCEPT : OP_ACCEPT;
5318    
5319                /* Do not set firstbyte after *ACCEPT */
5320                if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
5321              }              }
5322    
5323            /* Handle other cases with/without an argument */            /* Handle other cases with/without an argument */
# Line 5052  for (;; ptr++) Line 5330  for (;; ptr++)
5330                goto FAILED;                goto FAILED;
5331                }                }
5332              *code = verbs[i].op;              *code = verbs[i].op;
5333              if (*code++ == OP_THEN)              if (*code++ == OP_THEN) cd->external_flags |= PCRE_HASTHEN;
               {  
               PUT(code, 0, code - bcptr->current_branch - 1);  
               code += LINK_SIZE;  
               }  
5334              }              }
5335    
5336            else            else
# Line 5067  for (;; ptr++) Line 5341  for (;; ptr++)
5341                goto FAILED;                goto FAILED;
5342                }                }
5343              *code = verbs[i].op_arg;              *code = verbs[i].op_arg;
5344              if (*code++ == OP_THEN_ARG)              if (*code++ == OP_THEN_ARG) cd->external_flags |= PCRE_HASTHEN;
               {  
               PUT(code, 0, code - bcptr->current_branch - 1);  
               code += LINK_SIZE;  
               }  
5345              *code++ = arglen;              *code++ = arglen;
5346              memcpy(code, arg, arglen);              memcpy(code, arg, IN_UCHARS(arglen));
5347              code += arglen;              code += arglen;
5348              *code++ = 0;              *code++ = 0;
5349              }              }
# Line 5096  for (;; ptr++) Line 5366  for (;; ptr++)
5366        {        {
5367        int i, set, unset, namelen;        int i, set, unset, namelen;
5368        int *optset;        int *optset;
5369        const uschar *name;        const pcre_uchar *name;
5370        uschar *slot;        pcre_uchar *slot;
5371    
5372        switch (*(++ptr))        switch (*(++ptr))
5373          {          {
# Line 5249  for (;; ptr++) Line 5519  for (;; ptr++)
5519          slot = cd->name_table;          slot = cd->name_table;
5520          for (i = 0; i < cd->names_found; i++)          for (i = 0; i < cd->names_found; i++)
5521            {            {
5522            if (strncmp((char *)name, (char *)slot+2, namelen) == 0) break;            if (STRNCMP_UC_UC(name, slot+IMM2_SIZE, namelen) == 0) break;
5523            slot += cd->name_entry_size;            slot += cd->name_entry_size;
5524            }            }
5525    
# Line 5306  for (;; ptr++) Line 5576  for (;; ptr++)
5576          /* Similarly, check for the (?(DEFINE) "condition", which is always          /* Similarly, check for the (?(DEFINE) "condition", which is always
5577          false. */          false. */
5578    
5579          else if (namelen == 6 && strncmp((char *)name, STRING_DEFINE, 6) == 0)          else if (namelen == 6 && STRNCMP_UC_C8(name, STRING_DEFINE, 6) == 0)
5580            {            {
5581            code[1+LINK_SIZE] = OP_DEF;            code[1+LINK_SIZE] = OP_DEF;
5582            skipbytes = 1;            skipbytes = 1;
# Line 5386  for (;; ptr++) Line 5656  for (;; ptr++)
5656    
5657          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
5658          case CHAR_C:                 /* Callout - may be followed by digits; */          case CHAR_C:                 /* Callout - may be followed by digits; */
5659          previous_callout = code;  /* Save for later completion */          previous_callout = code;     /* Save for later completion */
5660          after_manual_callout = 1; /* Skip one item before completing */          after_manual_callout = 1;    /* Skip one item before completing */
5661          *code++ = OP_CALLOUT;          *code++ = OP_CALLOUT;
5662            {            {
5663            int n = 0;            int n = 0;
# Line 5509  for (;; ptr++) Line 5779  for (;; ptr++)
5779                if (crc < 0)                if (crc < 0)
5780                  {                  {
5781                  memmove(slot + cd->name_entry_size, slot,                  memmove(slot + cd->name_entry_size, slot,
5782                    (cd->names_found - i) * cd->name_entry_size);                    IN_UCHARS((cd->names_found - i) * cd->name_entry_size));
5783                  break;                  break;
5784                  }                  }
5785    
# Line 5523  for (;; ptr++) Line 5793  for (;; ptr++)
5793    
5794              if (!dupname)              if (!dupname)
5795                {                {
5796                uschar *cslot = cd->name_table;                pcre_uchar *cslot = cd->name_table;
5797                for (i = 0; i < cd->names_found; i++)                for (i = 0; i < cd->names_found; i++)
5798                  {                  {
5799                  if (cslot != slot)                  if (cslot != slot)
# Line 5540  for (;; ptr++) Line 5810  for (;; ptr++)
5810                }                }
5811    
5812              PUT2(slot, 0, cd->bracount + 1);              PUT2(slot, 0, cd->bracount + 1);
5813              memcpy(slot + 2, name, namelen);              memcpy(slot + 2, name, IN_UCHARS(namelen));
5814              slot[2+namelen] = 0;              slot[2 + namelen] = 0;
5815              }              }
5816            }            }
5817    
# Line 5579  for (;; ptr++) Line 5849  for (;; ptr++)
5849    
5850          if (lengthptr != NULL)          if (lengthptr != NULL)
5851            {            {
5852            const uschar *temp;            const pcre_uchar *temp;
5853    
5854            if (namelen == 0)            if (namelen == 0)
5855              {              {
# Line 5624  for (;; ptr++) Line 5894  for (;; ptr++)
5894            slot = cd->name_table;            slot = cd->name_table;
5895            for (i = 0; i < cd->names_found; i++)            for (i = 0; i < cd->names_found; i++)
5896              {              {
5897              if (strncmp((char *)name, (char *)slot+2, namelen) == 0 &&              if (STRNCMP_UC_UC(name, slot+IMM2_SIZE, namelen) == 0 &&
5898                  slot[2+namelen] == 0)                  slot[2+namelen] == 0)
5899                break;                break;
5900              slot += cd->name_entry_size;              slot += cd->name_entry_size;
# Line 5661  for (;; ptr++) Line 5931  for (;; ptr++)
5931          case CHAR_0: case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4:          case CHAR_0: case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4:
5932          case CHAR_5: case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9:          case CHAR_5: case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9:
5933            {            {
5934            const uschar *called;            const pcre_uchar *called;
5935            terminator = CHAR_RIGHT_PARENTHESIS;            terminator = CHAR_RIGHT_PARENTHESIS;
5936    
5937            /* Come here from the \g<...> and \g'...' code (Oniguruma            /* Come here from the \g<...> and \g'...' code (Oniguruma
# Line 5740  for (;; ptr++) Line 6010  for (;; ptr++)
6010              {              {
6011              *code = OP_END;              *code = OP_END;
6012              if (recno != 0)              if (recno != 0)
6013                called = _pcre_find_bracket(cd->start_code, utf8, recno);                called = PRIV(find_bracket)(cd->start_code, utf8, recno);
6014    
6015              /* Forward reference */              /* Forward reference */
6016    
# Line 5906  for (;; ptr++) Line 6176  for (;; ptr++)
6176      *code = bravalue;      *code = bravalue;
6177      tempcode = code;      tempcode = code;
6178      tempreqvary = cd->req_varyopt;        /* Save value before bracket */      tempreqvary = cd->req_varyopt;        /* Save value before bracket */
6179        tempbracount = cd->bracount;          /* Save value before bracket */
6180      length_prevgroup = 0;                 /* Initialize for pre-compile phase */      length_prevgroup = 0;                 /* Initialize for pre-compile phase */
6181    
6182      if (!compile_regex(      if (!compile_regex(
# Line 5928  for (;; ptr++) Line 6199  for (;; ptr++)
6199           ))           ))
6200        goto FAILED;        goto FAILED;
6201    
6202        /* If this was an atomic group and there are no capturing groups within it,
6203        generate OP_ONCE_NC instead of OP_ONCE. */
6204    
6205        if (bravalue == OP_ONCE && cd->bracount <= tempbracount)
6206          *code = OP_ONCE_NC;
6207    
6208      if (bravalue >= OP_ASSERT && bravalue <= OP_ASSERTBACK_NOT)      if (bravalue >= OP_ASSERT && bravalue <= OP_ASSERTBACK_NOT)
6209        cd->assert_depth -= 1;        cd->assert_depth -= 1;
6210    
6211      /* At the end of compiling, code is still pointing to the start of the      /* At the end of compiling, code is still pointing to the start of the
6212      group, while tempcode has been updated to point past the end of the group      group, while tempcode has been updated to point past the end of the group.
6213      and any option resetting that may follow it. The pattern pointer (ptr)      The pattern pointer (ptr) is on the bracket.
     is on the bracket. */  
6214    
6215      /* If this is a conditional bracket, check that there are no more than      If this is a conditional bracket, check that there are no more than
6216      two branches in the group, or just one if it's a DEFINE group. We do this      two branches in the group, or just one if it's a DEFINE group. We do this
6217      in the real compile phase, not in the pre-pass, where the whole group may      in the real compile phase, not in the pre-pass, where the whole group may
6218      not be available. */      not be available. */
6219    
6220      if (bravalue == OP_COND && lengthptr == NULL)      if (bravalue == OP_COND && lengthptr == NULL)
6221        {        {
6222        uschar *tc = code;        pcre_uchar *tc = code;
6223        int condcount = 0;        int condcount = 0;
6224    
6225        do {        do {
# Line 6118  for (;; ptr++) Line 6394  for (;; ptr++)
6394    
6395        if (-c == ESC_g)        if (-c == ESC_g)
6396          {          {
6397          const uschar *p;          const pcre_uchar *p;
6398          save_hwm = cd->hwm;   /* Normally this is set when '(' is read */          save_hwm = cd->hwm;   /* Normally this is set when '(' is read */
6399          terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?          terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?
6400            CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;            CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;
# Line 6258  for (;; ptr++) Line 6534  for (;; ptr++)
6534            }            }
6535          else          else
6536  #endif  #endif
6537            {          /* In non-UTF-8 mode, we turn \C into OP_ALLANY instead of OP_ANYBYTE
6538            so that it works in DFA mode and in lookbehinds. */
6539    
6540              {
6541            previous = (-c > ESC_b && -c < ESC_Z)? code : NULL;            previous = (-c > ESC_b && -c < ESC_Z)? code : NULL;
6542            *code++ = -c;            *code++ = (!utf8 && c == -ESC_C)? OP_ALLANY : -c;
6543            }            }
6544          }          }
6545        continue;        continue;
# Line 6272  for (;; ptr++) Line 6551  for (;; ptr++)
6551    
6552  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
6553      if (utf8 && c > 127)      if (utf8 && c > 127)
6554        mclength = _pcre_ord2utf8(c, mcbuffer);        mclength = PRIV(ord2utf8)(c, mcbuffer);
6555      else      else
6556  #endif  #endif
6557    
# Line 6335  for (;; ptr++) Line 6614  for (;; ptr++)
6614        else firstbyte = reqbyte = REQ_NONE;        else firstbyte = reqbyte = REQ_NONE;
6615        }        }
6616    
6617      /* firstbyte was previously set; we can set reqbyte only the length is      /* firstbyte was previously set; we can set reqbyte only if the length is
6618      1 or the matching is caseful. */      1 or the matching is caseful. */
6619    
6620      else      else
# Line 6394  Returns:         TRUE on success Line 6673  Returns:         TRUE on success
6673  */  */
6674    
6675  static BOOL  static BOOL
6676  compile_regex(int options, uschar **codeptr, const uschar **ptrptr,  compile_regex(int options, pcre_uchar **codeptr, const pcre_uchar **ptrptr,
6677    int *errorcodeptr, BOOL lookbehind, BOOL reset_bracount, int skipbytes,    int *errorcodeptr, BOOL lookbehind, BOOL reset_bracount, int skipbytes,
6678    int cond_depth, int *firstbyteptr, int *reqbyteptr, branch_chain *bcptr,    int cond_depth, int *firstbyteptr, int *reqbyteptr, branch_chain *bcptr,
6679    compile_data *cd, int *lengthptr)    compile_data *cd, int *lengthptr)
6680  {  {
6681  const uschar *ptr = *ptrptr;  const pcre_uchar *ptr = *ptrptr;
6682  uschar *code = *codeptr;  pcre_uchar *code = *codeptr;
6683  uschar *last_branch = code;  pcre_uchar *last_branch = code;
6684  uschar *start_bracket = code;  pcre_uchar *start_bracket = code;
6685  uschar *reverse_count = NULL;  pcre_uchar *reverse_count = NULL;
6686  open_capitem capitem;  open_capitem capitem;
6687  int capnumber = 0;  int capnumber = 0;
6688  int firstbyte, reqbyte;  int firstbyte, reqbyte;
# Line 6552  for (;;) Line 6831  for (;;)
6831          }          }
6832        else if (fixed_length < 0)        else if (fixed_length < 0)
6833          {          {
6834          *errorcodeptr = (fixed_length == -2)? ERR36 : ERR25;          *errorcodeptr = (fixed_length == -2)? ERR36 :
6835                            (fixed_length == -4)? ERR70: ERR25;
6836          *ptrptr = ptr;          *ptrptr = ptr;
6837          return FALSE;          return FALSE;
6838          }          }
# Line 6597  for (;;) Line 6877  for (;;)
6877        if (cd->open_caps->flag)        if (cd->open_caps->flag)
6878          {          {
6879          memmove(start_bracket + 1 + LINK_SIZE, start_bracket,          memmove(start_bracket + 1 + LINK_SIZE, start_bracket,
6880            code - start_bracket);            IN_UCHARS(code - start_bracket));
6881          *start_bracket = OP_ONCE;          *start_bracket = OP_ONCE;
6882          code += 1 + LINK_SIZE;          code += 1 + LINK_SIZE;
6883          PUT(start_bracket, 1, (int)(code - start_bracket));          PUT(start_bracket, 1, (int)(code - start_bracket));
# Line 6699  Returns:     TRUE or FALSE Line 6979  Returns:     TRUE or FALSE
6979  */  */
6980    
6981  static BOOL  static BOOL
6982  is_anchored(register const uschar *code, unsigned int bracket_map,  is_anchored(register const pcre_uchar *code, unsigned int bracket_map,
6983    unsigned int backref_map)    unsigned int backref_map)
6984  {  {
6985  do {  do {
6986     const uschar *scode = first_significant_code(code + _pcre_OP_lengths[*code],     const pcre_uchar *scode = first_significant_code(
6987       FALSE);       code + PRIV(OP_lengths)[*code], FALSE);
6988     register int op = *scode;     register int op = *scode;
6989    
6990     /* Non-capturing brackets */     /* Non-capturing brackets */
# Line 6727  do { Line 7007  do {
7007    
7008     /* Other brackets */     /* Other brackets */
7009    
7010     else if (op == OP_ASSERT || op == OP_ONCE || op == OP_COND)     else if (op == OP_ASSERT || op == OP_ONCE || op == OP_ONCE_NC ||
7011                op == OP_COND)
7012       {       {
7013       if (!is_anchored(scode, bracket_map, backref_map)) return FALSE;       if (!is_anchored(scode, bracket_map, backref_map)) return FALSE;
7014       }       }
# Line 6775  Returns:         TRUE or FALSE Line 7056  Returns:         TRUE or FALSE
7056  */  */
7057    
7058  static BOOL  static BOOL
7059  is_startline(const uschar *code, unsigned int bracket_map,  is_startline(const pcre_uchar *code, unsigned int bracket_map,
7060    unsigned int backref_map)    unsigned int backref_map)
7061  {  {
7062  do {  do {
7063     const uschar *scode = first_significant_code(code + _pcre_OP_lengths[*code],     const pcre_uchar *scode = first_significant_code(
7064       FALSE);       code + PRIV(OP_lengths)[*code], FALSE);
7065     register int op = *scode;     register int op = *scode;
7066    
7067     /* If we are at the start of a conditional assertion group, *both* the     /* If we are at the start of a conditional assertion group, *both* the
# Line 6791  do { Line 7072  do {
7072     if (op == OP_COND)     if (op == OP_COND)
7073       {       {
7074       scode += 1 + LINK_SIZE;       scode += 1 + LINK_SIZE;
7075       if (*scode == OP_CALLOUT) scode += _pcre_OP_lengths[OP_CALLOUT];       if (*scode == OP_CALLOUT) scode += PRIV(OP_lengths)[OP_CALLOUT];
7076       switch (*scode)       switch (*scode)
7077         {         {
7078         case OP_CREF:         case OP_CREF:
# Line 6831  do { Line 7112  do {
7112    
7113     /* Other brackets */     /* Other brackets */
7114    
7115     else if (op == OP_ASSERT || op == OP_ONCE)     else if (op == OP_ASSERT || op == OP_ONCE || op == OP_ONCE_NC)
7116       {       {
7117       if (!is_startline(scode, bracket_map, backref_map)) return FALSE;       if (!is_startline(scode, bracket_map, backref_map)) return FALSE;
7118       }       }
# Line 6878  Returns:     -1 or the fixed first char Line 7159  Returns:     -1 or the fixed first char
7159  */  */
7160    
7161  static int  static int
7162  find_firstassertedchar(const uschar *code, BOOL inassert)  find_firstassertedchar(const pcre_uchar *code, BOOL inassert)
7163  {  {
7164  register int c = -1;  register int c = -1;
7165  do {  do {
7166     int d;     int d;
7167     int xl = (*code == OP_CBRA || *code == OP_SCBRA ||     int xl = (*code == OP_CBRA || *code == OP_SCBRA ||
7168               *code == OP_CBRAPOS || *code == OP_SCBRAPOS)? 2:0;               *code == OP_CBRAPOS || *code == OP_SCBRAPOS)? 2:0;
7169     const uschar *scode = first_significant_code(code + 1+LINK_SIZE + xl, TRUE);     const pcre_uchar *scode = first_significant_code(code + 1+LINK_SIZE + xl,
7170         TRUE);
7171     register int op = *scode;     register int op = *scode;
7172    
7173     switch(op)     switch(op)
# Line 6901  do { Line 7183  do {
7183       case OP_SCBRAPOS:       case OP_SCBRAPOS:
7184       case OP_ASSERT:       case OP_ASSERT:
7185       case OP_ONCE:       case OP_ONCE:
7186         case OP_ONCE_NC:
7187       case OP_COND:       case OP_COND:
7188       if ((d = find_firstassertedchar(scode, op == OP_ASSERT)) < 0)       if ((d = find_firstassertedchar(scode, op == OP_ASSERT)) < 0)
7189         return -1;         return -1;
# Line 6964  Returns:        pointer to compiled data Line 7247  Returns:        pointer to compiled data
7247                  with errorptr and erroroffset set                  with errorptr and erroroffset set
7248  */  */
7249    
7250    #ifdef COMPILE_PCRE8
7251  PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION  PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION
7252  pcre_compile(const char *pattern, int options, const char **errorptr,  pcre_compile(const char *pattern, int options, const char **errorptr,
7253    int *erroroffset, const unsigned char *tables)    int *erroroffset, const unsigned char *tables)
7254    #else
7255    PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION
7256    pcre16_compile(PCRE_SPTR16 pattern, int options, const char **errorptr,
7257      int *erroroffset, const unsigned char *tables)
7258    #endif
7259  {  {
7260    #ifdef COMPILE_PCRE8
7261  return pcre_compile2(pattern, options, NULL, errorptr, erroroffset, tables);  return pcre_compile2(pattern, options, NULL, errorptr, erroroffset, tables);
7262    #else
7263    return pcre16_compile2(pattern, options, NULL, errorptr, erroroffset, tables);
7264    #endif
7265  }  }
7266    
7267    
7268    #ifdef COMPILE_PCRE8
7269  PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION  PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION
7270  pcre_compile2(const char *pattern, int options, int *errorcodeptr,  pcre_compile2(const char *pattern, int options, int *errorcodeptr,
7271    const char **errorptr, int *erroroffset, const unsigned char *tables)    const char **errorptr, int *erroroffset, const unsigned char *tables)
7272    #else
7273    PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION
7274    pcre16_compile2(PCRE_SPTR16 pattern, int options, int *errorcodeptr,
7275      const char **errorptr, int *erroroffset, const unsigned char *tables)
7276    #endif
7277  {  {
7278  real_pcre *re;  real_pcre *re;
7279  int length = 1;  /* For final END opcode */  int length = 1;  /* For final END opcode */
# Line 6983  int errorcode = 0; Line 7282  int errorcode = 0;
7282  int skipatstart = 0;  int skipatstart = 0;
7283  BOOL utf8;  BOOL utf8;
7284  size_t size;  size_t size;
7285  uschar *code;  pcre_uchar *code;
7286  const uschar *codestart;  const pcre_uchar *codestart;
7287  const uschar *ptr;  const pcre_uchar *ptr;
7288  compile_data compile_block;  compile_data compile_block;
7289  compile_data *cd = &compile_block;  compile_data *cd = &compile_block;
7290    
# Line 6995  as soon as possible, so that a fairly la Line 7294  as soon as possible, so that a fairly la
7294  this purpose. The same space is used in the second phase for remembering where  this purpose. The same space is used in the second phase for remembering where
7295  to fill in forward references to subpatterns. */  to fill in forward references to subpatterns. */
7296    
7297  uschar cworkspace[COMPILE_WORK_SIZE];  pcre_uchar cworkspace[COMPILE_WORK_SIZE];
7298    
7299  /* Set this early so that early errors get offset 0. */  /* Set this early so that early errors get offset 0. */
7300    
7301  ptr = (const uschar *)pattern;  ptr = (const pcre_uchar *)pattern;
7302    
7303  /* We can't pass back an error message if errorptr is NULL; I guess the best we  /* We can't pass back an error message if errorptr is NULL; I guess the best we
7304  can do is just return NULL, but we can set a code value if there is a code  can do is just return NULL, but we can set a code value if there is a code
# Line 7026  if (erroroffset == NULL) Line 7325  if (erroroffset == NULL)
7325    
7326  /* Set up pointers to the individual character tables */  /* Set up pointers to the individual character tables */
7327    
7328  if (tables == NULL) tables = _pcre_default_tables;  if (tables == NULL) tables = PRIV(default_tables);
7329  cd->lcc = tables + lcc_offset;  cd->lcc = tables + lcc_offset;
7330  cd->fcc = tables + fcc_offset;  cd->fcc = tables + fcc_offset;
7331  cd->cbits = tables + cbits_offset;  cd->cbits = tables + cbits_offset;
# Line 7049  while (ptr[skipatstart] == CHAR_LEFT_PAR Line 7348  while (ptr[skipatstart] == CHAR_LEFT_PAR
7348    int newnl = 0;    int newnl = 0;
7349    int newbsr = 0;    int newbsr = 0;
7350    
7351    if (strncmp((char *)(ptr+skipatstart+2), STRING_UTF8_RIGHTPAR, 5) == 0)    if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF8_RIGHTPAR, 5) == 0)
7352      { skipatstart += 7; options |= PCRE_UTF8; continue; }      { skipatstart += 7; options |= PCRE_UTF8; continue; }
7353    else if (strncmp((char *)(ptr+skipatstart+2), STRING_UCP_RIGHTPAR, 4) == 0)    else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UCP_RIGHTPAR, 4) == 0)
7354      { skipatstart += 6; options |= PCRE_UCP; continue; }      { skipatstart += 6; options |= PCRE_UCP; continue; }
7355    else if (strncmp((char *)(ptr+skipatstart+2), STRING_NO_START_OPT_RIGHTPAR, 13) == 0)    else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_NO_START_OPT_RIGHTPAR, 13) == 0)
7356      { skipatstart += 15; options |= PCRE_NO_START_OPTIMIZE; continue; }      { skipatstart += 15; options |= PCRE_NO_START_OPTIMIZE; continue; }
7357    
7358    if (strncmp((char *)(ptr+skipatstart+2), STRING_CR_RIGHTPAR, 3) == 0)    if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_CR_RIGHTPAR, 3) == 0)
7359      { skipatstart += 5; newnl = PCRE_NEWLINE_CR; }      { skipatstart += 5; newnl = PCRE_NEWLINE_CR; }
7360    else if (strncmp((char *)(ptr+skipatstart+2), STRING_LF_RIGHTPAR, 3)  == 0)    else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_LF_RIGHTPAR, 3)  == 0)
7361      { skipatstart += 5; newnl = PCRE_NEWLINE_LF; }      { skipatstart += 5; newnl = PCRE_NEWLINE_LF; }
7362    else if (strncmp((char *)(ptr+skipatstart+2), STRING_CRLF_RIGHTPAR, 5)  == 0)    else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_CRLF_RIGHTPAR, 5)  == 0)
7363      { skipatstart += 7; newnl = PCRE_NEWLINE_CR + PCRE_NEWLINE_LF; }      { skipatstart += 7; newnl = PCRE_NEWLINE_CR + PCRE_NEWLINE_LF; }
7364    else if (strncmp((char *)(ptr+skipatstart+2), STRING_ANY_RIGHTPAR, 4) == 0)    else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_ANY_RIGHTPAR, 4) == 0)
7365      { skipatstart += 6; newnl = PCRE_NEWLINE_ANY; }      { skipatstart += 6; newnl = PCRE_NEWLINE_ANY; }
7366    else if (strncmp((char *)(ptr+skipatstart+2), STRING_ANYCRLF_RIGHTPAR, 8) == 0)    else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_ANYCRLF_RIGHTPAR, 8) == 0)
7367      { skipatstart += 10; newnl = PCRE_NEWLINE_ANYCRLF; }      { skipatstart += 10; newnl = PCRE_NEWLINE_ANYCRLF; }
7368    
7369    else if (strncmp((char *)(ptr+skipatstart+2), STRING_BSR_ANYCRLF_RIGHTPAR, 12) == 0)    else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_BSR_ANYCRLF_RIGHTPAR, 12) == 0)
7370      { skipatstart += 14; newbsr = PCRE_BSR_ANYCRLF; }      { skipatstart += 14; newbsr = PCRE_BSR_ANYCRLF; }
7371    else if (strncmp((char *)(ptr+skipatstart+2), STRING_BSR_UNICODE_RIGHTPAR, 12) == 0)    else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_BSR_UNICODE_RIGHTPAR, 12) == 0)
7372      { skipatstart += 14; newbsr = PCRE_BSR_UNICODE; }      { skipatstart += 14; newbsr = PCRE_BSR_UNICODE; }
7373    
7374    if (newnl != 0)    if (newnl != 0)
# Line 7082  while (ptr[skipatstart] == CHAR_LEFT_PAR Line 7381  while (ptr[skipatstart] == CHAR_LEFT_PAR
7381  utf8 = (options & PCRE_UTF8) != 0;  utf8 = (options & PCRE_UTF8) != 0;
7382    
7383  /* Can't support UTF8 unless PCRE has been compiled to include the code. The  /* Can't support UTF8 unless PCRE has been compiled to include the code. The
7384  return of an error code from _pcre_valid_utf8() is a new feature, introduced in  return of an error code from PRIV(valid_utf8)() is a new feature, introduced in
7385  release 8.13. It is passed back from pcre_[dfa_]exec(), but at the moment is  release 8.13. It is passed back from pcre_[dfa_]exec(), but at the moment is
7386  not used here. */  not used here. */
7387    
7388  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
7389  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 &&  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 &&
7390       (errorcode = _pcre_valid_utf8((USPTR)pattern, -1, erroroffset)) != 0)       (errorcode = PRIV(valid_utf8)((PCRE_PUCHAR)pattern, -1, erroroffset)) != 0)
7391    {    {
7392    errorcode = ERR44;    errorcode = ERR44;
7393    goto PCRE_EARLY_ERROR_RETURN2;    goto PCRE_EARLY_ERROR_RETURN2;
# Line 7170  cd->backref_map = 0; Line 7469  cd->backref_map = 0;
7469  /* Reflect pattern for debugging output */  /* Reflect pattern for debugging output */
7470    
7471  DPRINTF(("------------------------------------------------------------------\n"));  DPRINTF(("------------------------------------------------------------------\n"));
7472  DPRINTF(("%s\n", pattern));  #ifdef PCRE_DEBUG
7473    print_puchar(stdout, (PCRE_PUCHAR)pattern);
7474    #endif
7475    DPRINTF(("\n"));
7476    
7477  /* Pretend to compile the pattern while actually just accumulating the length  /* Pretend to compile the pattern while actually just accumulating the length
7478  of memory required. This behaviour is triggered by passing a non-NULL final  of memory required. This behaviour is triggered by passing a non-NULL final
# Line 7186  cd->name_table = NULL; Line 7488  cd->name_table = NULL;
7488  cd->start_workspace = cworkspace;  cd->start_workspace = cworkspace;
7489  cd->start_code = cworkspace;  cd->start_code = cworkspace;
7490  cd->hwm = cworkspace;  cd->hwm = cworkspace;
7491  cd->start_pattern = (const uschar *)pattern;  cd->start_pattern = (const pcre_uchar *)pattern;
7492  cd->end_pattern = (const uschar *)(pattern + strlen(pattern));  cd->end_pattern = (const pcre_uchar *)(pattern + STRLEN_UC((const pcre_uchar *)pattern));
7493  cd->req_varyopt = 0;  cd->req_varyopt = 0;
7494  cd->external_options = options;  cd->external_options = options;
7495  cd->external_flags = 0;  cd->external_flags = 0;
# Line 7207  code = cworkspace; Line 7509  code = cworkspace;
7509  if (errorcode != 0) goto PCRE_EARLY_ERROR_RETURN;  if (errorcode != 0) goto PCRE_EARLY_ERROR_RETURN;
7510    
7511  DPRINTF(("end pre-compile: length=%d workspace=%d\n", length,  DPRINTF(("end pre-compile: length=%d workspace=%d\n", length,
7512    cd->hwm - cworkspace));    (int)(cd->hwm - cworkspace)));
7513    
7514  if (length > MAX_PATTERN_SIZE)  if (length > MAX_PATTERN_SIZE)
7515    {    {
# Line 7220  externally provided function. Integer ov Line 7522  externally provided function. Integer ov
7522  because nowadays we limit the maximum value of cd->names_found and  because nowadays we limit the maximum value of cd->names_found and
7523  cd->name_entry_size. */  cd->name_entry_size. */
7524    
7525  size = length + sizeof(real_pcre) + cd->names_found * (cd->name_entry_size + 3);  size = sizeof(real_pcre) + (length + cd->names_found * (cd->name_entry_size + 3)) * sizeof(pcre_uchar);
7526  re = (real_pcre *)(pcre_malloc)(size);  re = (real_pcre *)(pcre_malloc)(size);
7527    
7528  if (re == NULL)  if (re == NULL)
# Line 7242  re->flags = cd->external_flags; Line 7544  re->flags = cd->external_flags;
7544  re->dummy1 = 0;  re->dummy1 = 0;
7545  re->first_byte = 0;  re->first_byte = 0;
7546  re->req_byte = 0;  re->req_byte = 0;
7547  re->name_table_offset = sizeof(real_pcre);  re->name_table_offset = sizeof(real_pcre) / sizeof(pcre_uchar);
7548  re->name_entry_size = cd->name_entry_size;  re->name_entry_size = cd->name_entry_size;
7549  re->name_count = cd->names_found;  re->name_count = cd->names_found;
7550  re->ref_count = 0;  re->ref_count = 0;
7551  re->tables = (tables == _pcre_default_tables)? NULL : tables;  re->tables = (tables == PRIV(default_tables))? NULL : tables;
7552  re->nullpad = NULL;  re->nullpad = NULL;
7553    
7554  /* The starting points of the name/number translation table and of the code are  /* The starting points of the name/number translation table and of the code are
# Line 7260  cd->final_bracount = cd->bracount;  /* S Line 7562  cd->final_bracount = cd->bracount;  /* S
7562  cd->assert_depth = 0;  cd->assert_depth = 0;
7563  cd->bracount = 0;  cd->bracount = 0;
7564  cd->names_found = 0;  cd->names_found = 0;
7565  cd->name_table = (uschar *)re + re->name_table_offset;  cd->name_table = (pcre_uchar *)re + re->name_table_offset;
7566  codestart = cd->name_table + re->name_entry_size * re->name_count;  codestart = cd->name_table + re->name_entry_size * re->name_count;
7567  cd->start_code = codestart;  cd->start_code = codestart;
7568  cd->hwm = cworkspace;  cd->hwm = cworkspace;
# Line 7273  cd->open_caps = NULL; Line 7575  cd->open_caps = NULL;
7575  error, errorcode will be set non-zero, so we don't need to look at the result  error, errorcode will be set non-zero, so we don't need to look at the result
7576  of the function here. */  of the function here. */
7577    
7578  ptr = (const uschar *)pattern + skipatstart;  ptr = (const pcre_uchar *)pattern + skipatstart;
7579  code = (uschar *)codestart;  code = (pcre_uchar *)codestart;
7580  *code = OP_BRA;  *code = OP_BRA;
7581  (void)compile_regex(re->options, &code, &ptr, &errorcode, FALSE, FALSE, 0, 0,  (void)compile_regex(re->options, &code, &ptr, &errorcode, FALSE, FALSE, 0, 0,
7582    &firstbyte, &reqbyte, NULL, cd, NULL);    &firstbyte, &reqbyte, NULL, cd, NULL);
# Line 7282  re->top_bracket = cd->bracount; Line 7584  re->top_bracket = cd->bracount;
7584  re->top_backref = cd->top_backref;  re->top_backref = cd->top_backref;
7585  re->flags = cd->external_flags;  re->flags = cd->external_flags;
7586    
7587  if (cd->had_accept) reqbyte = -1;   /* Must disable after (*ACCEPT) */  if (cd->had_accept) reqbyte = REQ_NONE;   /* Must disable after (*ACCEPT) */
7588    
7589  /* If not reached end of pattern on success, there's an excess bracket. */  /* If not reached end of pattern on success, there's an excess bracket. */
7590    
# Line 7302  if (code - codestart > length) errorcode Line 7604  if (code - codestart > length) errorcode
7604  while (errorcode == 0 && cd->hwm > cworkspace)  while (errorcode == 0 && cd->hwm > cworkspace)
7605    {    {
7606    int offset, recno;    int offset, recno;
7607    const uschar *groupptr;    const pcre_uchar *groupptr;
7608    cd->hwm -= LINK_SIZE;    cd->hwm -= LINK_SIZE;
7609    offset = GET(cd->hwm, 0);    offset = GET(cd->hwm, 0);
7610    recno = GET(codestart, offset);    recno = GET(codestart, offset);
7611    groupptr = _pcre_find_bracket(codestart, utf8, recno);    groupptr = PRIV(find_bracket)(codestart, utf8, recno);
7612    if (groupptr == NULL) errorcode = ERR53;    if (groupptr == NULL) errorcode = ERR53;
7613      else PUT(((uschar *)codestart), offset, (int)(groupptr - codestart));      else PUT(((pcre_uchar *)codestart), offset, (int)(groupptr - codestart));
7614    }    }
7615    
7616  /* Give an error if there's back reference to a non-existent capturing  /* Give an error if there's back reference to a non-existent capturing
# Line 7326  length, and set their lengths. */ Line 7628  length, and set their lengths. */
7628    
7629  if (cd->check_lookbehind)  if (cd->check_lookbehind)
7630    {    {
7631    uschar *cc = (uschar *)codestart;    pcre_uchar *cc = (pcre_uchar *)codestart;
7632    
7633    /* Loop, searching for OP_REVERSE items, and process those that do not have    /* Loop, searching for OP_REVERSE items, and process those that do not have
7634    their length set. (Actually, it will also re-process any that have a length    their length set. (Actually, it will also re-process any that have a length
7635    of zero, but that is a pathological case, and it does no harm.) When we find    of zero, but that is a pathological case, and it does no harm.) When we find
7636    one, we temporarily terminate the branch it is in while we scan it. */    one, we temporarily terminate the branch it is in while we scan it. */
7637    
7638    for (cc = (uschar *)_pcre_find_bracket(codestart, utf8, -1);    for (cc = (pcre_uchar *)PRIV(find_bracket)(codestart, utf8, -1);
7639         cc != NULL;         cc != NULL;
7640         cc = (uschar *)_pcre_find_bracket(cc, utf8, -1))         cc = (pcre_uchar *)PRIV(find_bracket)(cc, utf8, -1))
7641      {      {
7642      if (GET(cc, 1) == 0)      if (GET(cc, 1) == 0)
7643        {        {
7644        int fixed_length;        int fixed_length;
7645        uschar *be = cc - 1 - LINK_SIZE + GET(cc, -LINK_SIZE);        pcre_uchar *be = cc - 1 - LINK_SIZE + GET(cc, -LINK_SIZE);
7646        int end_op = *be;        int end_op = *be;
7647        *be = OP_END;        *be = OP_END;
7648        fixed_length = find_fixedlength(cc, (re->options & PCRE_UTF8) != 0, TRUE,        fixed_length = find_fixedlength(cc, (re->options & PCRE_UTF8) != 0, TRUE,
# Line 7349  if (cd->check_lookbehind) Line 7651  if (cd->check_lookbehind)
7651        DPRINTF(("fixed length = %d\n", fixed_length));        DPRINTF(("fixed length = %d\n", fixed_length));
7652        if (fixed_length < 0)        if (fixed_length < 0)
7653          {          {
7654          errorcode = (fixed_length == -2)? ERR36 : ERR25;          errorcode = (fixed_length == -2)? ERR36 :
7655                        (fixed_length == -4)? ERR70 : ERR25;
7656          break;          break;
7657          }          }
7658        PUT(cc, 1, fixed_length);        PUT(cc, 1, fixed_length);
# Line 7364  if (errorcode != 0) Line 7667  if (errorcode != 0)
7667    {    {
7668    (pcre_free)(re);    (pcre_free)(re);
7669    PCRE_EARLY_ERROR_RETURN:    PCRE_EARLY_ERROR_RETURN:
7670    *erroroffset = (int)(ptr - (const uschar *)pattern);    *erroroffset = (int)(ptr - (const pcre_uchar *)pattern);
7671    PCRE_EARLY_ERROR_RETURN2:    PCRE_EARLY_ERROR_RETURN2:
7672    *errorptr = find_error_text(errorcode);    *errorptr = find_error_text(errorcode);
7673    if (errorcodeptr != NULL) *errorcodeptr = errorcode;    if (errorcodeptr != NULL) *errorcodeptr = errorcode;
# Line 7450  if (code - codestart > length) Line 7753  if (code - codestart > length)
7753    {    {
7754    (pcre_free)(re);    (pcre_free)(re);
7755    *errorptr = find_error_text(ERR23);    *errorptr = find_error_text(ERR23);
7756    *erroroffset = ptr - (uschar *)pattern;    *erroroffset = ptr - (pcre_uchar *)pattern;
7757    if (errorcodeptr != NULL) *errorcodeptr = ERR23;    if (errorcodeptr != NULL) *errorcodeptr = ERR23;
7758    return NULL;    return NULL;
7759    }    }

Legend:
Removed from v.654  
changed lines
  Added in v.767

  ViewVC Help
Powered by ViewVC 1.1.5