/[pcre]/code/branches/pcre16/pcre_compile.c
ViewVC logotype

Diff of /code/branches/pcre16/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

code/trunk/pcre_compile.c revision 716 by ph10, Tue Oct 4 16:38:05 2011 UTC code/branches/pcre16/pcre_compile.c revision 774 by zherczeg, Thu Dec 1 06:08:45 2011 UTC
# Line 97  overrun before it actually does run off Line 97  overrun before it actually does run off
97    
98  #define WORK_SIZE_CHECK (COMPILE_WORK_SIZE - 100)  #define WORK_SIZE_CHECK (COMPILE_WORK_SIZE - 100)
99    
100    /* Private flags added to firstchar and reqchar. */
101    
102    #define REQ_CASELESS   0x10000000l      /* Indicates caselessness */
103    #define REQ_VARY       0x20000000l      /* Reqchar followed non-literal item */
104    
105  /* Table for handling escaped characters in the range '0'-'z'. Positive returns  /* Table for handling escaped characters in the range '0'-'z'. Positive returns
106  are simple data values; negative values are for special things like \d and so  are simple data values; negative values are for special things like \d and so
# Line 231  static const char posix_names[] = Line 235  static const char posix_names[] =
235    STRING_graph0 STRING_print0 STRING_punct0 STRING_space0    STRING_graph0 STRING_print0 STRING_punct0 STRING_space0
236    STRING_word0  STRING_xdigit;    STRING_word0  STRING_xdigit;
237    
238  static const uschar posix_name_lengths[] = {  static const pcre_uint8 posix_name_lengths[] = {
239    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 };    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 };
240    
241  /* Table of class bit maps for each POSIX class. Each class is formed from a  /* Table of class bit maps for each POSIX class. Each class is formed from a
# Line 266  substitutes must be in the order of the Line 270  substitutes must be in the order of the
270  both positive and negative cases. NULL means no substitute. */  both positive and negative cases. NULL means no substitute. */
271    
272  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
273  static const uschar *substitutes[] = {  static const pcre_uchar string_PNd[]  = {
274    (uschar *)"\\P{Nd}",    /* \D */    CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
275    (uschar *)"\\p{Nd}",    /* \d */    CHAR_N, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' };
276    (uschar *)"\\P{Xsp}",   /* \S */       /* NOTE: Xsp is Perl space */  static const pcre_uchar string_pNd[]  = {
277    (uschar *)"\\p{Xsp}",   /* \s */    CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
278    (uschar *)"\\P{Xwd}",   /* \W */    CHAR_N, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' };
279    (uschar *)"\\p{Xwd}"    /* \w */  static const pcre_uchar string_PXsp[] = {
280      CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
281      CHAR_X, CHAR_s, CHAR_p, CHAR_RIGHT_CURLY_BRACKET, '\0' };
282    static const pcre_uchar string_pXsp[] = {
283      CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
284      CHAR_X, CHAR_s, CHAR_p, CHAR_RIGHT_CURLY_BRACKET, '\0' };
285    static const pcre_uchar string_PXwd[] = {
286      CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
287      CHAR_X, CHAR_w, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' };
288    static const pcre_uchar string_pXwd[] = {
289      CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
290      CHAR_X, CHAR_w, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' };
291    
292    static const pcre_uchar *substitutes[] = {
293      string_PNd,           /* \D */
294      string_pNd,           /* \d */
295      string_PXsp,          /* \S */       /* NOTE: Xsp is Perl space */
296      string_pXsp,          /* \s */
297      string_PXwd,          /* \W */
298      string_pXwd           /* \w */
299  };  };
300    
301  static const uschar *posix_substitutes[] = {  static const pcre_uchar string_pL[] =   {
302    (uschar *)"\\p{L}",     /* alpha */    CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
303    (uschar *)"\\p{Ll}",    /* lower */    CHAR_L, CHAR_RIGHT_CURLY_BRACKET, '\0' };
304    (uschar *)"\\p{Lu}",    /* upper */  static const pcre_uchar string_pLl[] =  {
305    (uschar *)"\\p{Xan}",   /* alnum */    CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
306    NULL,                   /* ascii */    CHAR_L, CHAR_l, CHAR_RIGHT_CURLY_BRACKET, '\0' };
307    (uschar *)"\\h",        /* blank */  static const pcre_uchar string_pLu[] =  {
308    NULL,                   /* cntrl */    CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
309    (uschar *)"\\p{Nd}",    /* digit */    CHAR_L, CHAR_u, CHAR_RIGHT_CURLY_BRACKET, '\0' };
310    NULL,                   /* graph */  static const pcre_uchar string_pXan[] = {
311    NULL,                   /* print */    CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
312    NULL,                   /* punct */    CHAR_X, CHAR_a, CHAR_n, CHAR_RIGHT_CURLY_BRACKET, '\0' };
313    (uschar *)"\\p{Xps}",   /* space */    /* NOTE: Xps is POSIX space */  static const pcre_uchar string_h[] =    {
314    (uschar *)"\\p{Xwd}",   /* word */    CHAR_BACKSLASH, CHAR_h, '\0' };
315    NULL,                   /* xdigit */  static const pcre_uchar string_pXps[] = {
316      CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
317      CHAR_X, CHAR_p, CHAR_s, CHAR_RIGHT_CURLY_BRACKET, '\0' };
318    static const pcre_uchar string_PL[] =   {
319      CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
320      CHAR_L, CHAR_RIGHT_CURLY_BRACKET, '\0' };
321    static const pcre_uchar string_PLl[] =  {
322      CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
323      CHAR_L, CHAR_l, CHAR_RIGHT_CURLY_BRACKET, '\0' };
324    static const pcre_uchar string_PLu[] =  {
325      CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
326      CHAR_L, CHAR_u, CHAR_RIGHT_CURLY_BRACKET, '\0' };
327    static const pcre_uchar string_PXan[] = {
328      CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
329      CHAR_X, CHAR_a, CHAR_n, CHAR_RIGHT_CURLY_BRACKET, '\0' };
330    static const pcre_uchar string_H[] =    {
331      CHAR_BACKSLASH, CHAR_H, '\0' };
332    static const pcre_uchar string_PXps[] = {
333      CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
334      CHAR_X, CHAR_p, CHAR_s, CHAR_RIGHT_CURLY_BRACKET, '\0' };
335    
336    static const pcre_uchar *posix_substitutes[] = {
337      string_pL,            /* alpha */
338      string_pLl,           /* lower */
339      string_pLu,           /* upper */
340      string_pXan,          /* alnum */
341      NULL,                 /* ascii */
342      string_h,             /* blank */
343      NULL,                 /* cntrl */
344      string_pNd,           /* digit */
345      NULL,                 /* graph */
346      NULL,                 /* print */
347      NULL,                 /* punct */
348      string_pXps,          /* space */    /* NOTE: Xps is POSIX space */
349      string_pXwd,          /* word */
350      NULL,                 /* xdigit */
351    /* Negated cases */    /* Negated cases */
352    (uschar *)"\\P{L}",     /* ^alpha */    string_PL,            /* ^alpha */
353    (uschar *)"\\P{Ll}",    /* ^lower */    string_PLl,           /* ^lower */
354    (uschar *)"\\P{Lu}",    /* ^upper */    string_PLu,           /* ^upper */
355    (uschar *)"\\P{Xan}",   /* ^alnum */    string_PXan,          /* ^alnum */
356    NULL,                   /* ^ascii */    NULL,                 /* ^ascii */
357    (uschar *)"\\H",        /* ^blank */    string_H,             /* ^blank */
358    NULL,                   /* ^cntrl */    NULL,                 /* ^cntrl */
359    (uschar *)"\\P{Nd}",    /* ^digit */    string_PNd,           /* ^digit */
360    NULL,                   /* ^graph */    NULL,                 /* ^graph */
361    NULL,                   /* ^print */    NULL,                 /* ^print */
362    NULL,                   /* ^punct */    NULL,                 /* ^punct */
363    (uschar *)"\\P{Xps}",   /* ^space */   /* NOTE: Xps is POSIX space */    string_PXps,          /* ^space */   /* NOTE: Xps is POSIX space */
364    (uschar *)"\\P{Xwd}",   /* ^word */    string_PXwd,          /* ^word */
365    NULL                    /* ^xdigit */    NULL                  /* ^xdigit */
366  };  };
367  #define POSIX_SUBSIZE (sizeof(posix_substitutes)/sizeof(uschar *))  #define POSIX_SUBSIZE (sizeof(posix_substitutes) / sizeof(pcre_uchar *))
368  #endif  #endif
369    
370  #define STRING(a)  # a  #define STRING(a)  # a
# Line 410  static const char error_texts[] = Line 468  static const char error_texts[] =
468    "this version of PCRE is not compiled with PCRE_UCP support\0"    "this version of PCRE is not compiled with PCRE_UCP support\0"
469    "\\c must be followed by an ASCII character\0"    "\\c must be followed by an ASCII character\0"
470    "\\k is not followed by a braced, angle-bracketed, or quoted name\0"    "\\k is not followed by a braced, angle-bracketed, or quoted name\0"
471      /* 70 */
472      "internal error: unknown opcode in find_fixedlength()\0"
473    ;    ;
474    
475  /* Table to identify digits and hex digits. This is used when compiling  /* Table to identify digits and hex digits. This is used when compiling
# Line 428  For convenience, we use the same bit def Line 488  For convenience, we use the same bit def
488    
489  Then we can use ctype_digit and ctype_xdigit in the code. */  Then we can use ctype_digit and ctype_xdigit in the code. */
490    
491    /* Using a simple comparison for decimal numbers rather than a memory read
492    is much faster, and the resulting code is simpler (the compiler turns it
493    into a subtraction and unsigned comparison). */
494    
495    #define IS_DIGIT(x) ((x) >= CHAR_0 && (x) <= CHAR_9)
496    
497  #ifndef EBCDIC  #ifndef EBCDIC
498    
499  /* This is the "normal" case, for ASCII systems, and EBCDIC systems running in  /* This is the "normal" case, for ASCII systems, and EBCDIC systems running in
500  UTF-8 mode. */  UTF-8 mode. */
501    
502  static const unsigned char digitab[] =  static const pcre_uint8 digitab[] =
503    {    {
504    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
505    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   8- 15 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   8- 15 */
# Line 472  static const unsigned char digitab[] = Line 538  static const unsigned char digitab[] =
538    
539  /* This is the "abnormal" case, for EBCDIC systems not running in UTF-8 mode. */  /* This is the "abnormal" case, for EBCDIC systems not running in UTF-8 mode. */
540    
541  static const unsigned char digitab[] =  static const pcre_unit8 digitab[] =
542    {    {
543    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7  0 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7  0 */
544    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   8- 15    */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   8- 15    */
# Line 507  static const unsigned char digitab[] = Line 573  static const unsigned char digitab[] =
573    0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c, /*  0 - 7  F0 */    0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c, /*  0 - 7  F0 */
574    0x0c,0x0c,0x00,0x00,0x00,0x00,0x00,0x00};/*  8 -255    */    0x0c,0x0c,0x00,0x00,0x00,0x00,0x00,0x00};/*  8 -255    */
575    
576  static const unsigned char ebcdic_chartab[] = { /* chartable partial dup */  static const pcre_uint8 ebcdic_chartab[] = { /* chartable partial dup */
577    0x80,0x00,0x00,0x00,0x00,0x01,0x00,0x00, /*   0-  7 */    0x80,0x00,0x00,0x00,0x00,0x01,0x00,0x00, /*   0-  7 */
578    0x00,0x00,0x00,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */    0x00,0x00,0x00,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
579    0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00, /*  16- 23 */    0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00, /*  16- 23 */
# Line 546  static const unsigned char ebcdic_charta Line 612  static const unsigned char ebcdic_charta
612  /* Definition to allow mutual recursion */  /* Definition to allow mutual recursion */
613    
614  static BOOL  static BOOL
615    compile_regex(int, uschar **, const uschar **, int *, BOOL, BOOL, int, int,    compile_regex(int, pcre_uchar **, const pcre_uchar **, int *, BOOL, BOOL, int, int,
616      int *, int *, branch_chain *, compile_data *, int *);      int *, int *, branch_chain *, compile_data *, int *);
617    
618    
# Line 593  Returns:    TRUE or FALSE Line 659  Returns:    TRUE or FALSE
659  */  */
660    
661  static BOOL  static BOOL
662  is_counted_repeat(const uschar *p)  is_counted_repeat(const pcre_uchar *p)
663  {  {
664  if ((digitab[*p++] & ctype_digit) == 0) return FALSE;  if (!IS_DIGIT(*p)) return FALSE;
665  while ((digitab[*p] & ctype_digit) != 0) p++;  p++;
666    while (IS_DIGIT(*p)) p++;
667  if (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE;  if (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE;
668    
669  if (*p++ != CHAR_COMMA) return FALSE;  if (*p++ != CHAR_COMMA) return FALSE;
670  if (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE;  if (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE;
671    
672  if ((digitab[*p++] & ctype_digit) == 0) return FALSE;  if (!IS_DIGIT(*p)) return FALSE;
673  while ((digitab[*p] & ctype_digit) != 0) p++;  p++;
674    while (IS_DIGIT(*p)) p++;
675    
676  return (*p == CHAR_RIGHT_CURLY_BRACKET);  return (*p == CHAR_RIGHT_CURLY_BRACKET);
677  }  }
# Line 635  Returns:         zero or positive => a d Line 703  Returns:         zero or positive => a d
703  */  */
704    
705  static int  static int
706  check_escape(const uschar **ptrptr, int *errorcodeptr, int bracount,  check_escape(const pcre_uchar **ptrptr, int *errorcodeptr, int bracount,
707    int options, BOOL isclass)    int options, BOOL isclass)
708  {  {
709  BOOL utf8 = (options & PCRE_UTF8) != 0;  BOOL utf8 = (options & PCRE_UTF8) != 0;
710  const uschar *ptr = *ptrptr + 1;  const pcre_uchar *ptr = *ptrptr + 1;
711  int c, i;  int c, i;
712    
713  GETCHARINCTEST(c, ptr);           /* Get character value, increment pointer */  GETCHARINCTEST(c, ptr);           /* Get character value, increment pointer */
# Line 654  in a table. A non-zero result is somethi Line 722  in a table. A non-zero result is somethi
722  Otherwise further processing may be required. */  Otherwise further processing may be required. */
723    
724  #ifndef EBCDIC  /* ASCII/UTF-8 coding */  #ifndef EBCDIC  /* ASCII/UTF-8 coding */
725  else if (c < CHAR_0 || c > CHAR_z) {}                     /* Not alphanumeric */  /* Not alphanumeric */
726    else if (c < CHAR_0 || c > CHAR_z) {}
727  else if ((i = escapes[c - CHAR_0]) != 0) c = i;  else if ((i = escapes[c - CHAR_0]) != 0) c = i;
728    
729  #else           /* EBCDIC coding */  #else           /* EBCDIC coding */
730  else if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {}   /* Not alphanumeric */  /* Not alphanumeric */
731    else if (c < 'a' || (!MAX_255(c) || (ebcdic_chartab[c] & 0x0E) == 0)) {}
732  else if ((i = escapes[c - 0x48]) != 0)  c = i;  else if ((i = escapes[c - 0x48]) != 0)  c = i;
733  #endif  #endif
734    
# Line 666  else if ((i = escapes[c - 0x48]) != 0) Line 736  else if ((i = escapes[c - 0x48]) != 0)
736    
737  else  else
738    {    {
739    const uschar *oldptr;    const pcre_uchar *oldptr;
740    BOOL braced, negated;    BOOL braced, negated;
741    
742    switch (c)    switch (c)
# Line 676  else Line 746  else
746    
747      case CHAR_l:      case CHAR_l:
748      case CHAR_L:      case CHAR_L:
749        *errorcodeptr = ERR37;
750        break;
751    
752      case CHAR_u:      case CHAR_u:
753        if ((options & PCRE_JAVASCRIPT_COMPAT) != 0)
754          {
755          /* In JavaScript, \u must be followed by four hexadecimal numbers.
756          Otherwise it is a lowercase u letter. */
757          if (MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0
758            && MAX_255(ptr[2]) && (digitab[ptr[2]] & ctype_xdigit) != 0
759            && MAX_255(ptr[3]) && (digitab[ptr[3]] & ctype_xdigit) != 0
760            && MAX_255(ptr[4]) && (digitab[ptr[4]] & ctype_xdigit) != 0)
761            {
762            c = 0;
763            for (i = 0; i < 4; ++i)
764              {
765              register int cc = *(++ptr);
766    #ifndef EBCDIC  /* ASCII/UTF-8 coding */
767              if (cc >= CHAR_a) cc -= 32;               /* Convert to upper case */
768              c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
769    #else           /* EBCDIC coding */
770              if (cc >= CHAR_a && cc <= CHAR_z) cc += 64;  /* Convert to upper case */
771              c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
772    #endif
773              }
774            }
775          }
776        else
777          *errorcodeptr = ERR37;
778        break;
779    
780      case CHAR_U:      case CHAR_U:
781      *errorcodeptr = ERR37;      /* In JavaScript, \U is an uppercase U letter. */
782        if ((options & PCRE_JAVASCRIPT_COMPAT) == 0) *errorcodeptr = ERR37;
783      break;      break;
784    
785      /* In a character class, \g is just a literal "g". Outside a character      /* In a character class, \g is just a literal "g". Outside a character
# Line 710  else Line 811  else
811    
812      if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)      if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)
813        {        {
814        const uschar *p;        const pcre_uchar *p;
815        for (p = ptr+2; *p != 0 && *p != CHAR_RIGHT_CURLY_BRACKET; p++)        for (p = ptr+2; *p != 0 && *p != CHAR_RIGHT_CURLY_BRACKET; p++)
816          if (*p != CHAR_MINUS && (digitab[*p] & ctype_digit) == 0) break;          if (*p != CHAR_MINUS && !IS_DIGIT(*p)) break;
817        if (*p != 0 && *p != CHAR_RIGHT_CURLY_BRACKET)        if (*p != 0 && *p != CHAR_RIGHT_CURLY_BRACKET)
818          {          {
819          c = -ESC_k;          c = -ESC_k;
# Line 730  else Line 831  else
831        }        }
832      else negated = FALSE;      else negated = FALSE;
833    
834        /* The integer range is limited by the machine's int representation. */
835      c = 0;      c = 0;
836      while ((digitab[ptr[1]] & ctype_digit) != 0)      while (IS_DIGIT(ptr[1]))
837          {
838          if (((unsigned int)c) > INT_MAX / 10) /* Integer overflow */
839            {
840            c = -1;
841            break;
842            }
843        c = c * 10 + *(++ptr) - CHAR_0;        c = c * 10 + *(++ptr) - CHAR_0;
844          }
845      if (c < 0)   /* Integer overflow */      if (((unsigned int)c) > INT_MAX) /* Integer overflow */
846        {        {
847          while (IS_DIGIT(ptr[1]))
848            ptr++;
849        *errorcodeptr = ERR61;        *errorcodeptr = ERR61;
850        break;        break;
851        }        }
# Line 783  else Line 893  else
893      if (!isclass)      if (!isclass)
894        {        {
895        oldptr = ptr;        oldptr = ptr;
896          /* The integer range is limited by the machine's int representation. */
897        c -= CHAR_0;        c -= CHAR_0;
898        while ((digitab[ptr[1]] & ctype_digit) != 0)        while (IS_DIGIT(ptr[1]))
899            {
900            if (((unsigned int)c) > INT_MAX / 10) /* Integer overflow */
901              {
902              c = -1;
903              break;
904              }
905          c = c * 10 + *(++ptr) - CHAR_0;          c = c * 10 + *(++ptr) - CHAR_0;
906        if (c < 0)    /* Integer overflow */          }
907          if (((unsigned int)c) > INT_MAX) /* Integer overflow */
908          {          {
909            while (IS_DIGIT(ptr[1]))
910              ptr++;
911          *errorcodeptr = ERR61;          *errorcodeptr = ERR61;
912          break;          break;
913          }          }
# Line 820  else Line 940  else
940      c -= CHAR_0;      c -= CHAR_0;
941      while(i++ < 2 && ptr[1] >= CHAR_0 && ptr[1] <= CHAR_7)      while(i++ < 2 && ptr[1] >= CHAR_0 && ptr[1] <= CHAR_7)
942          c = c * 8 + *(++ptr) - CHAR_0;          c = c * 8 + *(++ptr) - CHAR_0;
943      if (!utf8 && c > 255) *errorcodeptr = ERR51;      if (!utf8 && c > 0xff) *errorcodeptr = ERR51;
944      break;      break;
945    
946      /* \x is complicated. \x{ddd} is a character number which can be greater      /* \x is complicated. \x{ddd} is a character number which can be greater
# Line 828  else Line 948  else
948      treated as a data character. */      treated as a data character. */
949    
950      case CHAR_x:      case CHAR_x:
951        if ((options & PCRE_JAVASCRIPT_COMPAT) != 0)
952          {
953          /* In JavaScript, \x must be followed by two hexadecimal numbers.
954          Otherwise it is a lowercase x letter. */
955          if (MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0
956            && MAX_255(ptr[2]) && (digitab[ptr[2]] & ctype_xdigit) != 0)
957            {
958            c = 0;
959            for (i = 0; i < 2; ++i)
960              {
961              register int cc = *(++ptr);
962    #ifndef EBCDIC  /* ASCII/UTF-8 coding */
963              if (cc >= CHAR_a) cc -= 32;               /* Convert to upper case */
964              c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
965    #else           /* EBCDIC coding */
966              if (cc >= CHAR_a && cc <= CHAR_z) cc += 64;  /* Convert to upper case */
967              c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
968    #endif
969              }
970            }
971          break;
972          }
973    
974      if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)      if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)
975        {        {
976        const uschar *pt = ptr + 2;        const pcre_uchar *pt = ptr + 2;
977        int count = 0;        int count = 0;
978    
979        c = 0;        c = 0;
980        while ((digitab[*pt] & ctype_xdigit) != 0)        while (MAX_255(*pt) && (digitab[*pt] & ctype_xdigit) != 0)
981          {          {
982          register int cc = *pt++;          register int cc = *pt++;
983          if (c == 0 && cc == CHAR_0) continue;     /* Leading zeroes */          if (c == 0 && cc == CHAR_0) continue;     /* Leading zeroes */
# Line 851  else Line 994  else
994    
995        if (*pt == CHAR_RIGHT_CURLY_BRACKET)        if (*pt == CHAR_RIGHT_CURLY_BRACKET)
996          {          {
997          if (c < 0 || count > (utf8? 8 : 2)) *errorcodeptr = ERR34;  #ifdef COMPILE_PCRE8
998            if (c < 0 || count > (utf8? 8:2)) *errorcodeptr = ERR34;
999    #else
1000    #ifdef COMPILE_PCRE16
1001            if (c < 0 || count > (utf8? 8:4)) *errorcodeptr = ERR34;
1002    #endif
1003    #endif
1004          ptr = pt;          ptr = pt;
1005          break;          break;
1006          }          }
# Line 863  else Line 1012  else
1012      /* Read just a single-byte hex-defined char */      /* Read just a single-byte hex-defined char */
1013    
1014      c = 0;      c = 0;
1015      while (i++ < 2 && (digitab[ptr[1]] & ctype_xdigit) != 0)      while (i++ < 2 && MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0)
1016        {        {
1017        int cc;                                  /* Some compilers don't like */        int cc;                                  /* Some compilers don't like */
1018        cc = *(++ptr);                           /* ++ in initializers */        cc = *(++ptr);                           /* ++ in initializers */
# Line 961  Returns:         type value from ucp_typ Line 1110  Returns:         type value from ucp_typ
1110  */  */
1111    
1112  static int  static int
1113  get_ucp(const uschar **ptrptr, BOOL *negptr, int *dptr, int *errorcodeptr)  get_ucp(const pcre_uchar **ptrptr, BOOL *negptr, int *dptr, int *errorcodeptr)
1114  {  {
1115  int c, i, bot, top;  int c, i, bot, top;
1116  const uschar *ptr = *ptrptr;  const pcre_uchar *ptr = *ptrptr;
1117  char name[32];  pcre_uchar name[32];
1118    
1119  c = *(++ptr);  c = *(++ptr);
1120  if (c == 0) goto ERROR_RETURN;  if (c == 0) goto ERROR_RETURN;
# Line 1006  else Line 1155  else
1155  /* Search for a recognized property name using binary chop */  /* Search for a recognized property name using binary chop */
1156    
1157  bot = 0;  bot = 0;
1158  top = _pcre_utt_size;  top = PRIV(utt_size);
1159    
1160  while (bot < top)  while (bot < top)
1161    {    {
1162    i = (bot + top) >> 1;    i = (bot + top) >> 1;
1163    c = strcmp(name, _pcre_utt_names + _pcre_utt[i].name_offset);    c = STRCMP_UC_C8(name, PRIV(utt_names) + PRIV(utt)[i].name_offset);
1164    if (c == 0)    if (c == 0)
1165      {      {
1166      *dptr = _pcre_utt[i].value;      *dptr = PRIV(utt)[i].value;
1167      return _pcre_utt[i].type;      return PRIV(utt)[i].type;
1168      }      }
1169    if (c > 0) bot = i + 1; else top = i;    if (c > 0) bot = i + 1; else top = i;
1170    }    }
# Line 1053  Returns:         pointer to '}' on succe Line 1202  Returns:         pointer to '}' on succe
1202                   current ptr on error, with errorcodeptr set non-zero                   current ptr on error, with errorcodeptr set non-zero
1203  */  */
1204    
1205  static const uschar *  static const pcre_uchar *
1206  read_repeat_counts(const uschar *p, int *minp, int *maxp, int *errorcodeptr)  read_repeat_counts(const pcre_uchar *p, int *minp, int *maxp, int *errorcodeptr)
1207  {  {
1208  int min = 0;  int min = 0;
1209  int max = -1;  int max = -1;
# Line 1062  int max = -1; Line 1211  int max = -1;
1211  /* Read the minimum value and do a paranoid check: a negative value indicates  /* Read the minimum value and do a paranoid check: a negative value indicates
1212  an integer overflow. */  an integer overflow. */
1213    
1214  while ((digitab[*p] & ctype_digit) != 0) min = min * 10 + *p++ - CHAR_0;  while (IS_DIGIT(*p)) min = min * 10 + *p++ - CHAR_0;
1215  if (min < 0 || min > 65535)  if (min < 0 || min > 65535)
1216    {    {
1217    *errorcodeptr = ERR5;    *errorcodeptr = ERR5;
# Line 1077  if (*p == CHAR_RIGHT_CURLY_BRACKET) max Line 1226  if (*p == CHAR_RIGHT_CURLY_BRACKET) max
1226    if (*(++p) != CHAR_RIGHT_CURLY_BRACKET)    if (*(++p) != CHAR_RIGHT_CURLY_BRACKET)
1227      {      {
1228      max = 0;      max = 0;
1229      while((digitab[*p] & ctype_digit) != 0) max = max * 10 + *p++ - CHAR_0;      while(IS_DIGIT(*p)) max = max * 10 + *p++ - CHAR_0;
1230      if (max < 0 || max > 65535)      if (max < 0 || max > 65535)
1231        {        {
1232        *errorcodeptr = ERR5;        *errorcodeptr = ERR5;
# Line 1139  Returns:       the number of the named s Line 1288  Returns:       the number of the named s
1288  */  */
1289    
1290  static int  static int
1291  find_parens_sub(uschar **ptrptr, compile_data *cd, const uschar *name, int lorn,  find_parens_sub(pcre_uchar **ptrptr, compile_data *cd, const pcre_uchar *name, int lorn,
1292    BOOL xmode, BOOL utf8, int *count)    BOOL xmode, BOOL utf8, int *count)
1293  {  {
1294  uschar *ptr = *ptrptr;  pcre_uchar *ptr = *ptrptr;
1295  int start_count = *count;  int start_count = *count;
1296  int hwm_count = start_count;  int hwm_count = start_count;
1297  BOOL dup_parens = FALSE;  BOOL dup_parens = FALSE;
# Line 1209  if (ptr[0] == CHAR_LEFT_PARENTHESIS) Line 1358  if (ptr[0] == CHAR_LEFT_PARENTHESIS)
1358          ptr[1] != CHAR_EQUALS_SIGN) || *ptr == CHAR_APOSTROPHE)          ptr[1] != CHAR_EQUALS_SIGN) || *ptr == CHAR_APOSTROPHE)
1359        {        {
1360        int term;        int term;
1361        const uschar *thisname;        const pcre_uchar *thisname;
1362        *count += 1;        *count += 1;
1363        if (name == NULL && *count == lorn) return *count;        if (name == NULL && *count == lorn) return *count;
1364        term = *ptr++;        term = *ptr++;
# Line 1217  if (ptr[0] == CHAR_LEFT_PARENTHESIS) Line 1366  if (ptr[0] == CHAR_LEFT_PARENTHESIS)
1366        thisname = ptr;        thisname = ptr;
1367        while (*ptr != term) ptr++;        while (*ptr != term) ptr++;
1368        if (name != NULL && lorn == ptr - thisname &&        if (name != NULL && lorn == ptr - thisname &&
1369            strncmp((const char *)name, (const char *)thisname, lorn) == 0)            STRNCMP_UC_UC(name, thisname, lorn) == 0)
1370          return *count;          return *count;
1371        term++;        term++;
1372        }        }
# Line 1260  for (; ptr < cd->end_pattern; ptr++) Line 1409  for (; ptr < cd->end_pattern; ptr++)
1409          {          {
1410          if (ptr[2] == CHAR_E)          if (ptr[2] == CHAR_E)
1411            ptr+= 2;            ptr+= 2;
1412          else if (strncmp((const char *)ptr+2,          else if (STRNCMP_UC_C8(ptr + 2,
1413                   STR_Q STR_BACKSLASH STR_E, 3) == 0)                   STR_Q STR_BACKSLASH STR_E, 3) == 0)
1414            ptr += 4;            ptr += 4;
1415          else          else
# Line 1372  Returns:       the number of the found s Line 1521  Returns:       the number of the found s
1521  */  */
1522    
1523  static int  static int
1524  find_parens(compile_data *cd, const uschar *name, int lorn, BOOL xmode,  find_parens(compile_data *cd, const pcre_uchar *name, int lorn, BOOL xmode,
1525    BOOL utf8)    BOOL utf8)
1526  {  {
1527  uschar *ptr = (uschar *)cd->start_pattern;  pcre_uchar *ptr = (pcre_uchar *)cd->start_pattern;
1528  int count = 0;  int count = 0;
1529  int rc;  int rc;
1530    
# Line 1413  Arguments: Line 1562  Arguments:
1562  Returns:       pointer to the first significant opcode  Returns:       pointer to the first significant opcode
1563  */  */
1564    
1565  static const uschar*  static const pcre_uchar*
1566  first_significant_code(const uschar *code, BOOL skipassert)  first_significant_code(const pcre_uchar *code, BOOL skipassert)
1567  {  {
1568  for (;;)  for (;;)
1569    {    {
# Line 1425  for (;;) Line 1574  for (;;)
1574      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
1575      if (!skipassert) return code;      if (!skipassert) return code;
1576      do code += GET(code, 1); while (*code == OP_ALT);      do code += GET(code, 1); while (*code == OP_ALT);
1577      code += _pcre_OP_lengths[*code];      code += PRIV(OP_lengths)[*code];
1578      break;      break;
1579    
1580      case OP_WORD_BOUNDARY:      case OP_WORD_BOUNDARY:
# Line 1439  for (;;) Line 1588  for (;;)
1588      case OP_RREF:      case OP_RREF:
1589      case OP_NRREF:      case OP_NRREF:
1590      case OP_DEF:      case OP_DEF:
1591      code += _pcre_OP_lengths[*code];      code += PRIV(OP_lengths)[*code];
1592      break;      break;
1593    
1594      default:      default:
# Line 1475  Arguments: Line 1624  Arguments:
1624    
1625  Returns:   the fixed length,  Returns:   the fixed length,
1626               or -1 if there is no fixed length,               or -1 if there is no fixed length,
1627               or -2 if \C was encountered               or -2 if \C was encountered (in UTF-8 mode only)
1628               or -3 if an OP_RECURSE item was encountered and atend is FALSE               or -3 if an OP_RECURSE item was encountered and atend is FALSE
1629                 or -4 if an unknown opcode was encountered (internal error)
1630  */  */
1631    
1632  static int  static int
1633  find_fixedlength(uschar *code, BOOL utf8, BOOL atend, compile_data *cd)  find_fixedlength(pcre_uchar *code, BOOL utf8, BOOL atend, compile_data *cd)
1634  {  {
1635  int length = -1;  int length = -1;
1636    
1637  register int branchlength = 0;  register int branchlength = 0;
1638  register uschar *cc = code + 1 + LINK_SIZE;  register pcre_uchar *cc = code + 1 + LINK_SIZE;
1639    
1640  /* Scan along the opcodes for this branch. If we get to the end of the  /* Scan along the opcodes for this branch. If we get to the end of the
1641  branch, check the length against that of the other branches. */  branch, check the length against that of the other branches. */
# Line 1493  branch, check the length against that of Line 1643  branch, check the length against that of
1643  for (;;)  for (;;)
1644    {    {
1645    int d;    int d;
1646    uschar *ce, *cs;    pcre_uchar *ce, *cs;
1647    register int op = *cc;    register int op = *cc;
1648    switch (op)    switch (op)
1649      {      {
1650      /* We only need to continue for OP_CBRA (normal capturing bracket) and      /* We only need to continue for OP_CBRA (normal capturing bracket) and
1651      OP_BRA (normal non-capturing bracket) because the other variants of these      OP_BRA (normal non-capturing bracket) because the other variants of these
1652      opcodes are all concerned with unlimited repeated groups, which of course      opcodes are all concerned with unlimited repeated groups, which of course
1653      are not of fixed length. They will cause a -1 response from the default      are not of fixed length. */
     case of this switch. */  
1654    
1655      case OP_CBRA:      case OP_CBRA:
1656      case OP_BRA:      case OP_BRA:
1657      case OP_ONCE:      case OP_ONCE:
1658        case OP_ONCE_NC:
1659      case OP_COND:      case OP_COND:
1660      d = find_fixedlength(cc + ((op == OP_CBRA)? 2:0), utf8, atend, cd);      d = find_fixedlength(cc + ((op == OP_CBRA)? IMM2_SIZE : 0), utf8, atend, cd);
1661      if (d < 0) return d;      if (d < 0) return d;
1662      branchlength += d;      branchlength += d;
1663      do cc += GET(cc, 1); while (*cc == OP_ALT);      do cc += GET(cc, 1); while (*cc == OP_ALT);
1664      cc += 1 + LINK_SIZE;      cc += 1 + LINK_SIZE;
1665      break;      break;
1666    
1667      /* Reached end of a branch; if it's a ket it is the end of a nested      /* Reached end of a branch; if it's a ket it is the end of a nested call.
1668      call. If it's ALT it is an alternation in a nested call. If it is      If it's ALT it is an alternation in a nested call. An ACCEPT is effectively
1669      END it's the end of the outer call. All can be handled by the same code.      an ALT. If it is END it's the end of the outer call. All can be handled by
1670      Note that we must not include the OP_KETRxxx opcodes here, because they      the same code. Note that we must not include the OP_KETRxxx opcodes here,
1671      all imply an unlimited repeat. */      because they all imply an unlimited repeat. */
1672    
1673      case OP_ALT:      case OP_ALT:
1674      case OP_KET:      case OP_KET:
1675      case OP_END:      case OP_END:
1676        case OP_ACCEPT:
1677        case OP_ASSERT_ACCEPT:
1678      if (length < 0) length = branchlength;      if (length < 0) length = branchlength;
1679        else if (length != branchlength) return -1;        else if (length != branchlength) return -1;
1680      if (*cc != OP_ALT) return length;      if (*cc != OP_ALT) return length;
# Line 1536  for (;;) Line 1688  for (;;)
1688    
1689      case OP_RECURSE:      case OP_RECURSE:
1690      if (!atend) return -3;      if (!atend) return -3;
1691      cs = ce = (uschar *)cd->start_code + GET(cc, 1);  /* Start subpattern */      cs = ce = (pcre_uchar *)cd->start_code + GET(cc, 1);  /* Start subpattern */
1692      do ce += GET(ce, 1); while (*ce == OP_ALT);       /* End subpattern */      do ce += GET(ce, 1); while (*ce == OP_ALT);           /* End subpattern */
1693      if (cc > cs && cc < ce) return -1;                /* Recursion */      if (cc > cs && cc < ce) return -1;                    /* Recursion */
1694      d = find_fixedlength(cs + 2, utf8, atend, cd);      d = find_fixedlength(cs + 2, utf8, atend, cd);
1695      if (d < 0) return d;      if (d < 0) return d;
1696      branchlength += d;      branchlength += d;
# Line 1556  for (;;) Line 1708  for (;;)
1708    
1709      /* Skip over things that don't match chars */      /* Skip over things that don't match chars */
1710    
1711      case OP_REVERSE:      case OP_MARK:
1712      case OP_CREF:      case OP_PRUNE_ARG:
1713      case OP_NCREF:      case OP_SKIP_ARG:
1714      case OP_RREF:      case OP_THEN_ARG:
1715      case OP_NRREF:      cc += cc[1] + PRIV(OP_lengths)[*cc];
1716      case OP_DEF:      break;
1717    
1718      case OP_CALLOUT:      case OP_CALLOUT:
     case OP_SOD:  
     case OP_SOM:  
     case OP_SET_SOM:  
     case OP_EOD:  
     case OP_EODN:  
1719      case OP_CIRC:      case OP_CIRC:
1720      case OP_CIRCM:      case OP_CIRCM:
1721        case OP_CLOSE:
1722        case OP_COMMIT:
1723        case OP_CREF:
1724        case OP_DEF:
1725      case OP_DOLL:      case OP_DOLL:
1726      case OP_DOLLM:      case OP_DOLLM:
1727        case OP_EOD:
1728        case OP_EODN:
1729        case OP_FAIL:
1730        case OP_NCREF:
1731        case OP_NRREF:
1732      case OP_NOT_WORD_BOUNDARY:      case OP_NOT_WORD_BOUNDARY:
1733        case OP_PRUNE:
1734        case OP_REVERSE:
1735        case OP_RREF:
1736        case OP_SET_SOM:
1737        case OP_SKIP:
1738        case OP_SOD:
1739        case OP_SOM:
1740        case OP_THEN:
1741      case OP_WORD_BOUNDARY:      case OP_WORD_BOUNDARY:
1742      cc += _pcre_OP_lengths[*cc];      cc += PRIV(OP_lengths)[*cc];
1743      break;      break;
1744    
1745      /* Handle literal characters */      /* Handle literal characters */
# Line 1586  for (;;) Line 1751  for (;;)
1751      branchlength++;      branchlength++;
1752      cc += 2;      cc += 2;
1753  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1754      if (utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f];      if (utf8 && cc[-1] >= 0xc0) cc += PRIV(utf8_table4)[cc[-1] & 0x3f];
1755  #endif  #endif
1756      break;      break;
1757    
# Line 1594  for (;;) Line 1759  for (;;)
1759      need to skip over a multibyte character in UTF8 mode.  */      need to skip over a multibyte character in UTF8 mode.  */
1760    
1761      case OP_EXACT:      case OP_EXACT:
1762        case OP_EXACTI:
1763        case OP_NOTEXACT:
1764        case OP_NOTEXACTI:
1765      branchlength += GET2(cc,1);      branchlength += GET2(cc,1);
1766      cc += 4;      cc += 2 + IMM2_SIZE;
1767  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1768      if (utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f];      if (utf8 && cc[-1] >= 0xc0) cc += PRIV(utf8_table4)[cc[-1] & 0x3f];
1769  #endif  #endif
1770      break;      break;
1771    
1772      case OP_TYPEEXACT:      case OP_TYPEEXACT:
1773      branchlength += GET2(cc,1);      branchlength += GET2(cc,1);
1774      if (cc[3] == OP_PROP || cc[3] == OP_NOTPROP) cc += 2;      if (cc[1 + IMM2_SIZE] == OP_PROP || cc[1 + IMM2_SIZE] == OP_NOTPROP) cc += 2;
1775      cc += 4;      cc += 1 + IMM2_SIZE + 1;
1776      break;      break;
1777    
1778      /* Handle single-char matchers */      /* Handle single-char matchers */
# Line 1614  for (;;) Line 1782  for (;;)
1782      cc += 2;      cc += 2;
1783      /* Fall through */      /* Fall through */
1784    
1785        case OP_HSPACE:
1786        case OP_VSPACE:
1787        case OP_NOT_HSPACE:
1788        case OP_NOT_VSPACE:
1789      case OP_NOT_DIGIT:      case OP_NOT_DIGIT:
1790      case OP_DIGIT:      case OP_DIGIT:
1791      case OP_NOT_WHITESPACE:      case OP_NOT_WHITESPACE:
# Line 1626  for (;;) Line 1798  for (;;)
1798      cc++;      cc++;
1799      break;      break;
1800    
1801      /* The single-byte matcher isn't allowed */      /* The single-byte matcher isn't allowed. This only happens in UTF-8 mode;
1802        otherwise \C is coded as OP_ALLANY. */
1803    
1804      case OP_ANYBYTE:      case OP_ANYBYTE:
1805      return -2;      return -2;
1806    
1807      /* Check a class for variable quantification */      /* Check a class for variable quantification */
1808    
1809  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || defined COMPILE_PCRE16
1810      case OP_XCLASS:      case OP_XCLASS:
1811      cc += GET(cc, 1) - 33;      cc += GET(cc, 1) - PRIV(OP_lengths)[OP_CLASS];
1812      /* Fall through */      /* Fall through */
1813  #endif  #endif
1814    
1815      case OP_CLASS:      case OP_CLASS:
1816      case OP_NCLASS:      case OP_NCLASS:
1817      cc += 33;      cc += PRIV(OP_lengths)[OP_CLASS];
1818    
1819      switch (*cc)      switch (*cc)
1820        {        {
1821          case OP_CRPLUS:
1822          case OP_CRMINPLUS:
1823        case OP_CRSTAR:        case OP_CRSTAR:
1824        case OP_CRMINSTAR:        case OP_CRMINSTAR:
1825        case OP_CRQUERY:        case OP_CRQUERY:
# Line 1653  for (;;) Line 1828  for (;;)
1828    
1829        case OP_CRRANGE:        case OP_CRRANGE:
1830        case OP_CRMINRANGE:        case OP_CRMINRANGE:
1831        if (GET2(cc,1) != GET2(cc,3)) return -1;        if (GET2(cc,1) != GET2(cc,1+IMM2_SIZE)) return -1;
1832        branchlength += GET2(cc,1);        branchlength += GET2(cc,1);
1833        cc += 5;        cc += 1 + 2 * IMM2_SIZE;
1834        break;        break;
1835    
1836        default:        default:
# Line 1665  for (;;) Line 1840  for (;;)
1840    
1841      /* Anything else is variable length */      /* Anything else is variable length */
1842    
1843      default:      case OP_ANYNL:
1844        case OP_BRAMINZERO:
1845        case OP_BRAPOS:
1846        case OP_BRAPOSZERO:
1847        case OP_BRAZERO:
1848        case OP_CBRAPOS:
1849        case OP_EXTUNI:
1850        case OP_KETRMAX:
1851        case OP_KETRMIN:
1852        case OP_KETRPOS:
1853        case OP_MINPLUS:
1854        case OP_MINPLUSI:
1855        case OP_MINQUERY:
1856        case OP_MINQUERYI:
1857        case OP_MINSTAR:
1858        case OP_MINSTARI:
1859        case OP_MINUPTO:
1860        case OP_MINUPTOI:
1861        case OP_NOTMINPLUS:
1862        case OP_NOTMINPLUSI:
1863        case OP_NOTMINQUERY:
1864        case OP_NOTMINQUERYI:
1865        case OP_NOTMINSTAR:
1866        case OP_NOTMINSTARI:
1867        case OP_NOTMINUPTO:
1868        case OP_NOTMINUPTOI:
1869        case OP_NOTPLUS:
1870        case OP_NOTPLUSI:
1871        case OP_NOTPOSPLUS:
1872        case OP_NOTPOSPLUSI:
1873        case OP_NOTPOSQUERY:
1874        case OP_NOTPOSQUERYI:
1875        case OP_NOTPOSSTAR:
1876        case OP_NOTPOSSTARI:
1877        case OP_NOTPOSUPTO:
1878        case OP_NOTPOSUPTOI:
1879        case OP_NOTQUERY:
1880        case OP_NOTQUERYI:
1881        case OP_NOTSTAR:
1882        case OP_NOTSTARI:
1883        case OP_NOTUPTO:
1884        case OP_NOTUPTOI:
1885        case OP_PLUS:
1886        case OP_PLUSI:
1887        case OP_POSPLUS:
1888        case OP_POSPLUSI:
1889        case OP_POSQUERY:
1890        case OP_POSQUERYI:
1891        case OP_POSSTAR:
1892        case OP_POSSTARI:
1893        case OP_POSUPTO:
1894        case OP_POSUPTOI:
1895        case OP_QUERY:
1896        case OP_QUERYI:
1897        case OP_REF:
1898        case OP_REFI:
1899        case OP_SBRA:
1900        case OP_SBRAPOS:
1901        case OP_SCBRA:
1902        case OP_SCBRAPOS:
1903        case OP_SCOND:
1904        case OP_SKIPZERO:
1905        case OP_STAR:
1906        case OP_STARI:
1907        case OP_TYPEMINPLUS:
1908        case OP_TYPEMINQUERY:
1909        case OP_TYPEMINSTAR:
1910        case OP_TYPEMINUPTO:
1911        case OP_TYPEPLUS:
1912        case OP_TYPEPOSPLUS:
1913        case OP_TYPEPOSQUERY:
1914        case OP_TYPEPOSSTAR:
1915        case OP_TYPEPOSUPTO:
1916        case OP_TYPEQUERY:
1917        case OP_TYPESTAR:
1918        case OP_TYPEUPTO:
1919        case OP_UPTO:
1920        case OP_UPTOI:
1921      return -1;      return -1;
1922    
1923        /* Catch unrecognized opcodes so that when new ones are added they
1924        are not forgotten, as has happened in the past. */
1925    
1926        default:
1927        return -4;
1928      }      }
1929    }    }
1930  /* Control never gets here */  /* Control never gets here */
# Line 1693  Arguments: Line 1951  Arguments:
1951  Returns:      pointer to the opcode for the bracket, or NULL if not found  Returns:      pointer to the opcode for the bracket, or NULL if not found
1952  */  */
1953    
1954  const uschar *  const pcre_uchar *
1955  _pcre_find_bracket(const uschar *code, BOOL utf8, int number)  PRIV(find_bracket)(const pcre_uchar *code, BOOL utf8, int number)
1956  {  {
1957  for (;;)  for (;;)
1958    {    {
# Line 1712  for (;;) Line 1970  for (;;)
1970    
1971    else if (c == OP_REVERSE)    else if (c == OP_REVERSE)
1972      {      {
1973      if (number < 0) return (uschar *)code;      if (number < 0) return (pcre_uchar *)code;
1974      code += _pcre_OP_lengths[c];      code += PRIV(OP_lengths)[c];
1975      }      }
1976    
1977    /* Handle capturing bracket */    /* Handle capturing bracket */
# Line 1722  for (;;) Line 1980  for (;;)
1980             c == OP_CBRAPOS || c == OP_SCBRAPOS)             c == OP_CBRAPOS || c == OP_SCBRAPOS)
1981      {      {
1982      int n = GET2(code, 1+LINK_SIZE);      int n = GET2(code, 1+LINK_SIZE);
1983      if (n == number) return (uschar *)code;      if (n == number) return (pcre_uchar *)code;
1984      code += _pcre_OP_lengths[c];      code += PRIV(OP_lengths)[c];
1985      }      }
1986    
1987    /* Otherwise, we can get the item's length from the table, except that for    /* Otherwise, we can get the item's length from the table, except that for
# Line 1751  for (;;) Line 2009  for (;;)
2009        case OP_TYPEMINUPTO:        case OP_TYPEMINUPTO:
2010        case OP_TYPEEXACT:        case OP_TYPEEXACT:
2011        case OP_TYPEPOSUPTO:        case OP_TYPEPOSUPTO:
2012        if (code[3] == OP_PROP || code[3] == OP_NOTPROP) code += 2;        if (code[1 + IMM2_SIZE] == OP_PROP
2013            || code[1 + IMM2_SIZE] == OP_NOTPROP) code += 2;
2014        break;        break;
2015    
2016        case OP_MARK:        case OP_MARK:
# Line 1767  for (;;) Line 2026  for (;;)
2026    
2027      /* Add in the fixed length from the table */      /* Add in the fixed length from the table */
2028    
2029      code += _pcre_OP_lengths[c];      code += PRIV(OP_lengths)[c];
2030    
2031    /* In UTF-8 mode, opcodes that are followed by a character may be followed by    /* In UTF-8 mode, opcodes that are followed by a character may be followed by
2032    a multi-byte character. The length in the table is a minimum, so we have to    a multi-byte character. The length in the table is a minimum, so we have to
# Line 1804  for (;;) Line 2063  for (;;)
2063        case OP_MINQUERYI:        case OP_MINQUERYI:
2064        case OP_POSQUERY:        case OP_POSQUERY:
2065        case OP_POSQUERYI:        case OP_POSQUERYI:
2066        if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];        if (code[-1] >= 0xc0) code += PRIV(utf8_table4)[code[-1] & 0x3f];
2067        break;        break;
2068        }        }
2069  #else  #else
# Line 1830  Arguments: Line 2089  Arguments:
2089  Returns:      pointer to the opcode for OP_RECURSE, or NULL if not found  Returns:      pointer to the opcode for OP_RECURSE, or NULL if not found
2090  */  */
2091    
2092  static const uschar *  static const pcre_uchar *
2093  find_recurse(const uschar *code, BOOL utf8)  find_recurse(const pcre_uchar *code, BOOL utf8)
2094  {  {
2095  for (;;)  for (;;)
2096    {    {
# Line 1870  for (;;) Line 2129  for (;;)
2129        case OP_TYPEUPTO:        case OP_TYPEUPTO:
2130        case OP_TYPEMINUPTO:        case OP_TYPEMINUPTO:
2131        case OP_TYPEEXACT:        case OP_TYPEEXACT:
2132        if (code[3] == OP_PROP || code[3] == OP_NOTPROP) code += 2;        if (code[1 + IMM2_SIZE] == OP_PROP
2133            || code[1 + IMM2_SIZE] == OP_NOTPROP) code += 2;
2134        break;        break;
2135    
2136        case OP_MARK:        case OP_MARK:
# Line 1886  for (;;) Line 2146  for (;;)
2146    
2147      /* Add in the fixed length from the table */      /* Add in the fixed length from the table */
2148    
2149      code += _pcre_OP_lengths[c];      code += PRIV(OP_lengths)[c];
2150    
2151      /* In UTF-8 mode, opcodes that are followed by a character may be followed      /* In UTF-8 mode, opcodes that are followed by a character may be followed
2152      by a multi-byte character. The length in the table is a minimum, so we have      by a multi-byte character. The length in the table is a minimum, so we have
# Line 1923  for (;;) Line 2183  for (;;)
2183        case OP_MINQUERYI:        case OP_MINQUERYI:
2184        case OP_POSQUERY:        case OP_POSQUERY:
2185        case OP_POSQUERYI:        case OP_POSQUERYI:
2186        if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];        if (code[-1] >= 0xc0) code += PRIV(utf8_table4)[code[-1] & 0x3f];
2187        break;        break;
2188        }        }
2189  #else  #else
# Line 1957  Returns:      TRUE if what is matched co Line 2217  Returns:      TRUE if what is matched co
2217  */  */
2218    
2219  static BOOL  static BOOL
2220  could_be_empty_branch(const uschar *code, const uschar *endcode, BOOL utf8,  could_be_empty_branch(const pcre_uchar *code, const pcre_uchar *endcode,
2221    compile_data *cd)    BOOL utf8, compile_data *cd)
2222  {  {
2223  register int c;  register int c;
2224  for (code = first_significant_code(code + _pcre_OP_lengths[*code], TRUE);  for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
2225       code < endcode;       code < endcode;
2226       code = first_significant_code(code + _pcre_OP_lengths[c], TRUE))       code = first_significant_code(code + PRIV(OP_lengths)[c], TRUE))
2227    {    {
2228    const uschar *ccode;    const pcre_uchar *ccode;
2229    
2230    c = *code;    c = *code;
2231    
# Line 1988  for (code = first_significant_code(code Line 2248  for (code = first_significant_code(code
2248    
2249    if (c == OP_RECURSE)    if (c == OP_RECURSE)
2250      {      {
2251      const uschar *scode;      const pcre_uchar *scode;
2252      BOOL empty_branch;      BOOL empty_branch;
2253    
2254      /* Test for forward reference */      /* Test for forward reference */
# Line 2024  for (code = first_significant_code(code Line 2284  for (code = first_significant_code(code
2284    if (c == OP_BRAZERO || c == OP_BRAMINZERO || c == OP_SKIPZERO ||    if (c == OP_BRAZERO || c == OP_BRAMINZERO || c == OP_SKIPZERO ||
2285        c == OP_BRAPOSZERO)        c == OP_BRAPOSZERO)
2286      {      {
2287      code += _pcre_OP_lengths[c];      code += PRIV(OP_lengths)[c];
2288      do code += GET(code, 1); while (*code == OP_ALT);      do code += GET(code, 1); while (*code == OP_ALT);
2289      c = *code;      c = *code;
2290      continue;      continue;
# Line 2045  for (code = first_significant_code(code Line 2305  for (code = first_significant_code(code
2305    
2306    if (c == OP_BRA  || c == OP_BRAPOS ||    if (c == OP_BRA  || c == OP_BRAPOS ||
2307        c == OP_CBRA || c == OP_CBRAPOS ||        c == OP_CBRA || c == OP_CBRAPOS ||
2308        c == OP_ONCE || c == OP_COND)        c == OP_ONCE || c == OP_ONCE_NC ||
2309          c == OP_COND)
2310      {      {
2311      BOOL empty_branch;      BOOL empty_branch;
2312      if (GET(code, 1) == 0) return TRUE;    /* Hit unclosed bracket */      if (GET(code, 1) == 0) return TRUE;    /* Hit unclosed bracket */
# Line 2079  for (code = first_significant_code(code Line 2340  for (code = first_significant_code(code
2340      {      {
2341      /* Check for quantifiers after a class. XCLASS is used for classes that      /* Check for quantifiers after a class. XCLASS is used for classes that
2342      cannot be represented just by a bit map. This includes negated single      cannot be represented just by a bit map. This includes negated single
2343      high-valued characters. The length in _pcre_OP_lengths[] is zero; the      high-valued characters. The length in PRIV(OP_lengths)[] is zero; the
2344      actual length is stored in the compiled code, so we must update "code"      actual length is stored in the compiled code, so we must update "code"
2345      here. */      here. */
2346    
# Line 2091  for (code = first_significant_code(code Line 2352  for (code = first_significant_code(code
2352    
2353      case OP_CLASS:      case OP_CLASS:
2354      case OP_NCLASS:      case OP_NCLASS:
2355      ccode = code + 33;      ccode = code + PRIV(OP_lengths)[OP_CLASS];
2356    
2357  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2358      CHECK_CLASS_REPEAT:      CHECK_CLASS_REPEAT:
# Line 2166  for (code = first_significant_code(code Line 2427  for (code = first_significant_code(code
2427      case OP_TYPEUPTO:      case OP_TYPEUPTO:
2428      case OP_TYPEMINUPTO:      case OP_TYPEMINUPTO:
2429      case OP_TYPEPOSUPTO:      case OP_TYPEPOSUPTO:
2430      if (code[3] == OP_PROP || code[3] == OP_NOTPROP) code += 2;      if (code[1 + IMM2_SIZE] == OP_PROP
2431          || code[1 + IMM2_SIZE] == OP_NOTPROP) code += 2;
2432      break;      break;
2433    
2434      /* End of branch */      /* End of branch */
# Line 2194  for (code = first_significant_code(code Line 2456  for (code = first_significant_code(code
2456      case OP_MINQUERYI:      case OP_MINQUERYI:
2457      case OP_POSQUERY:      case OP_POSQUERY:
2458      case OP_POSQUERYI:      case OP_POSQUERYI:
2459      if (utf8 && code[1] >= 0xc0) code += _pcre_utf8_table4[code[1] & 0x3f];      if (utf8 && code[1] >= 0xc0) code += PRIV(utf8_table4)[code[1] & 0x3f];
2460      break;      break;
2461    
2462      case OP_UPTO:      case OP_UPTO:
# Line 2203  for (code = first_significant_code(code Line 2465  for (code = first_significant_code(code
2465      case OP_MINUPTOI:      case OP_MINUPTOI:
2466      case OP_POSUPTO:      case OP_POSUPTO:
2467      case OP_POSUPTOI:      case OP_POSUPTOI:
2468      if (utf8 && code[3] >= 0xc0) code += _pcre_utf8_table4[code[3] & 0x3f];      if (utf8 && code[1 + IMM2_SIZE] >= 0xc0) code += PRIV(utf8_table4)[code[1 + IMM2_SIZE] & 0x3f];
2469      break;      break;
2470  #endif  #endif
2471    
# Line 2254  Returns:      TRUE if what is matched co Line 2516  Returns:      TRUE if what is matched co
2516  */  */
2517    
2518  static BOOL  static BOOL
2519  could_be_empty(const uschar *code, const uschar *endcode, branch_chain *bcptr,  could_be_empty(const pcre_uchar *code, const pcre_uchar *endcode,
2520    BOOL utf8, compile_data *cd)    branch_chain *bcptr, BOOL utf8, compile_data *cd)
2521  {  {
2522  while (bcptr != NULL && bcptr->current_branch >= code)  while (bcptr != NULL && bcptr->current_branch >= code)
2523    {    {
# Line 2311  Returns:   TRUE or FALSE Line 2573  Returns:   TRUE or FALSE
2573  */  */
2574    
2575  static BOOL  static BOOL
2576  check_posix_syntax(const uschar *ptr, const uschar **endptr)  check_posix_syntax(const pcre_uchar *ptr, const pcre_uchar **endptr)
2577  {  {
2578  int terminator;          /* Don't combine these lines; the Solaris cc */  int terminator;          /* Don't combine these lines; the Solaris cc */
2579  terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */  terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */
# Line 2355  Returns:     a value representing the na Line 2617  Returns:     a value representing the na
2617  */  */
2618    
2619  static int  static int
2620  check_posix_name(const uschar *ptr, int len)  check_posix_name(const pcre_uchar *ptr, int len)
2621  {  {
2622  const char *pn = posix_names;  const char *pn = posix_names;
2623  register int yield = 0;  register int yield = 0;
2624  while (posix_name_lengths[yield] != 0)  while (posix_name_lengths[yield] != 0)
2625    {    {
2626    if (len == posix_name_lengths[yield] &&    if (len == posix_name_lengths[yield] &&
2627      strncmp((const char *)ptr, pn, len) == 0) return yield;      STRNCMP_UC_C8(ptr, pn, len) == 0) return yield;
2628    pn += posix_name_lengths[yield] + 1;    pn += posix_name_lengths[yield] + 1;
2629    yield++;    yield++;
2630    }    }
# Line 2402  Returns:     nothing Line 2664  Returns:     nothing
2664  */  */
2665    
2666  static void  static void
2667  adjust_recurse(uschar *group, int adjust, BOOL utf8, compile_data *cd,  adjust_recurse(pcre_uchar *group, int adjust, BOOL utf8, compile_data *cd,
2668    uschar *save_hwm)    pcre_uchar *save_hwm)
2669  {  {
2670  uschar *ptr = group;  pcre_uchar *ptr = group;
2671    
2672  while ((ptr = (uschar *)find_recurse(ptr, utf8)) != NULL)  while ((ptr = (pcre_uchar *)find_recurse(ptr, utf8)) != NULL)
2673    {    {
2674    int offset;    int offset;
2675    uschar *hc;    pcre_uchar *hc;
2676    
2677    /* See if this recursion is on the forward reference list. If so, adjust the    /* See if this recursion is on the forward reference list. If so, adjust the
2678    reference. */    reference. */
# Line 2455  Arguments: Line 2717  Arguments:
2717  Returns:         new code pointer  Returns:         new code pointer
2718  */  */
2719    
2720  static uschar *  static pcre_uchar *
2721  auto_callout(uschar *code, const uschar *ptr, compile_data *cd)  auto_callout(pcre_uchar *code, const pcre_uchar *ptr, compile_data *cd)
2722  {  {
2723  *code++ = OP_CALLOUT;  *code++ = OP_CALLOUT;
2724  *code++ = 255;  *code++ = 255;
2725  PUT(code, 0, (int)(ptr - cd->start_pattern));  /* Pattern offset */  PUT(code, 0, (int)(ptr - cd->start_pattern));  /* Pattern offset */
2726  PUT(code, LINK_SIZE, 0);                       /* Default length */  PUT(code, LINK_SIZE, 0);                       /* Default length */
2727  return code + 2*LINK_SIZE;  return code + 2 * LINK_SIZE;
2728  }  }
2729    
2730    
# Line 2484  Returns:             nothing Line 2746  Returns:             nothing
2746  */  */
2747    
2748  static void  static void
2749  complete_callout(uschar *previous_callout, const uschar *ptr, compile_data *cd)  complete_callout(pcre_uchar *previous_callout, const pcre_uchar *ptr, compile_data *cd)
2750  {  {
2751  int length = (int)(ptr - cd->start_pattern - GET(previous_callout, 2));  int length = (int)(ptr - cd->start_pattern - GET(previous_callout, 2));
2752  PUT(previous_callout, 2 + LINK_SIZE, length);  PUT(previous_callout, 2 + LINK_SIZE, length);
# Line 2567  switch(ptype) Line 2829  switch(ptype)
2829            prop->chartype == ucp_Lt) == negated;            prop->chartype == ucp_Lt) == negated;
2830    
2831    case PT_GC:    case PT_GC:
2832    return (pdata == _pcre_ucp_gentype[prop->chartype]) == negated;    return (pdata == PRIV(ucp_gentype)[prop->chartype]) == negated;
2833    
2834    case PT_PC:    case PT_PC:
2835    return (pdata == prop->chartype) == negated;    return (pdata == prop->chartype) == negated;
# Line 2578  switch(ptype) Line 2840  switch(ptype)
2840    /* These are specials */    /* These are specials */
2841    
2842    case PT_ALNUM:    case PT_ALNUM:
2843    return (_pcre_ucp_gentype[prop->chartype] == ucp_L ||    return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2844            _pcre_ucp_gentype[prop->chartype] == ucp_N) == negated;            PRIV(ucp_gentype)[prop->chartype] == ucp_N) == negated;
2845    
2846    case PT_SPACE:    /* Perl space */    case PT_SPACE:    /* Perl space */
2847    return (_pcre_ucp_gentype[prop->chartype] == ucp_Z ||    return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
2848            c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)            c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2849            == negated;            == negated;
2850    
2851    case PT_PXSPACE:  /* POSIX space */    case PT_PXSPACE:  /* POSIX space */
2852    return (_pcre_ucp_gentype[prop->chartype] == ucp_Z ||    return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
2853            c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||            c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2854            c == CHAR_FF || c == CHAR_CR)            c == CHAR_FF || c == CHAR_CR)
2855            == negated;            == negated;
2856    
2857    case PT_WORD:    case PT_WORD:
2858    return (_pcre_ucp_gentype[prop->chartype] == ucp_L ||    return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2859            _pcre_ucp_gentype[prop->chartype] == ucp_N ||            PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
2860            c == CHAR_UNDERSCORE) == negated;            c == CHAR_UNDERSCORE) == negated;
2861    }    }
2862  return FALSE;  return FALSE;
# Line 2622  Returns:        TRUE if possessifying is Line 2884  Returns:        TRUE if possessifying is
2884  */  */
2885    
2886  static BOOL  static BOOL
2887  check_auto_possessive(const uschar *previous, BOOL utf8, const uschar *ptr,  check_auto_possessive(const pcre_uchar *previous, BOOL utf8,
2888    int options, compile_data *cd)    const pcre_uchar *ptr, int options, compile_data *cd)
2889  {  {
2890  int c, next;  int c, next;
2891  int op_code = *previous++;  int op_code = *previous++;
# Line 2698  if ((options & PCRE_EXTENDED) != 0) Line 2960  if ((options & PCRE_EXTENDED) != 0)
2960  /* If the next thing is itself optional, we have to give up. */  /* If the next thing is itself optional, we have to give up. */
2961    
2962  if (*ptr == CHAR_ASTERISK || *ptr == CHAR_QUESTION_MARK ||  if (*ptr == CHAR_ASTERISK || *ptr == CHAR_QUESTION_MARK ||
2963    strncmp((char *)ptr, STR_LEFT_CURLY_BRACKET STR_0 STR_COMMA, 3) == 0)    STRNCMP_UC_C8(ptr, STR_LEFT_CURLY_BRACKET STR_0 STR_COMMA, 3) == 0)
2964      return FALSE;      return FALSE;
2965    
2966  /* Now compare the next item with the previous opcode. First, handle cases when  /* Now compare the next item with the previous opcode. First, handle cases when
# Line 2960  switch(op_code) Line 3222  switch(op_code)
3222        to the original \d etc. At this point, ptr will point to a zero byte. */        to the original \d etc. At this point, ptr will point to a zero byte. */
3223    
3224        if (*ptr == CHAR_ASTERISK || *ptr == CHAR_QUESTION_MARK ||        if (*ptr == CHAR_ASTERISK || *ptr == CHAR_QUESTION_MARK ||
3225          strncmp((char *)ptr, STR_LEFT_CURLY_BRACKET STR_0 STR_COMMA, 3) == 0)          STRNCMP_UC_C8(ptr, STR_LEFT_CURLY_BRACKET STR_0 STR_COMMA, 3) == 0)
3226            return FALSE;            return FALSE;
3227    
3228        /* Do the property check. */        /* Do the property check. */
# Line 3038  Arguments: Line 3300  Arguments:
3300    codeptr        points to the pointer to the current code point    codeptr        points to the pointer to the current code point
3301    ptrptr         points to the current pattern pointer    ptrptr         points to the current pattern pointer
3302    errorcodeptr   points to error code variable    errorcodeptr   points to error code variable
3303    firstbyteptr   set to initial literal character, or < 0 (REQ_UNSET, REQ_NONE)    firstcharptr   set to initial literal character, or < 0 (REQ_UNSET, REQ_NONE)
3304    reqbyteptr     set to the last literal character required, else < 0    reqcharptr     set to the last literal character required, else < 0
3305    bcptr          points to current branch chain    bcptr          points to current branch chain
3306    cond_depth     conditional nesting depth    cond_depth     conditional nesting depth
3307    cd             contains pointers to tables etc.    cd             contains pointers to tables etc.
# Line 3051  Returns:         TRUE on success Line 3313  Returns:         TRUE on success
3313  */  */
3314    
3315  static BOOL  static BOOL
3316  compile_branch(int *optionsptr, uschar **codeptr, const uschar **ptrptr,  compile_branch(int *optionsptr, pcre_uchar **codeptr,
3317    int *errorcodeptr, int *firstbyteptr, int *reqbyteptr, branch_chain *bcptr,    const pcre_uchar **ptrptr, int *errorcodeptr, pcre_int32 *firstcharptr,
3318    int cond_depth, compile_data *cd, int *lengthptr)    pcre_int32 *reqcharptr, branch_chain *bcptr, int cond_depth,
3319      compile_data *cd, int *lengthptr)
3320  {  {
3321  int repeat_type, op_type;  int repeat_type, op_type;
3322  int repeat_min = 0, repeat_max = 0;      /* To please picky compilers */  int repeat_min = 0, repeat_max = 0;      /* To please picky compilers */
3323  int bravalue = 0;  int bravalue = 0;
3324  int greedy_default, greedy_non_default;  int greedy_default, greedy_non_default;
3325  int firstbyte, reqbyte;  pcre_int32 firstchar, reqchar;
3326  int zeroreqbyte, zerofirstbyte;  pcre_int32 zeroreqchar, zerofirstchar;
3327  int req_caseopt, reqvary, tempreqvary;  pcre_int32 req_caseopt, reqvary, tempreqvary;
3328  int options = *optionsptr;               /* May change dynamically */  int options = *optionsptr;               /* May change dynamically */
3329  int after_manual_callout = 0;  int after_manual_callout = 0;
3330  int length_prevgroup = 0;  int length_prevgroup = 0;
3331  register int c;  register int c;
3332  register uschar *code = *codeptr;  register pcre_uchar *code = *codeptr;
3333  uschar *last_code = code;  pcre_uchar *last_code = code;
3334  uschar *orig_code = code;  pcre_uchar *orig_code = code;
3335  uschar *tempcode;  pcre_uchar *tempcode;
3336  BOOL inescq = FALSE;  BOOL inescq = FALSE;
3337  BOOL groupsetfirstbyte = FALSE;  BOOL groupsetfirstchar = FALSE;
3338  const uschar *ptr = *ptrptr;  const pcre_uchar *ptr = *ptrptr;
3339  const uschar *tempptr;  const pcre_uchar *tempptr;
3340  const uschar *nestptr = NULL;  const pcre_uchar *nestptr = NULL;
3341  uschar *previous = NULL;  pcre_uchar *previous = NULL;
3342  uschar *previous_callout = NULL;  pcre_uchar *previous_callout = NULL;
3343  uschar *save_hwm = NULL;  pcre_uchar *save_hwm = NULL;
3344  uschar classbits[32];  pcre_uint8 classbits[32];
3345    
3346  /* We can fish out the UTF-8 setting once and for all into a BOOL, but we  /* We can fish out the UTF-8 setting once and for all into a BOOL, but we
3347  must not do this for other options (e.g. PCRE_EXTENDED) because they may change  must not do this for other options (e.g. PCRE_EXTENDED) because they may change
3348  dynamically as we process the pattern. */  dynamically as we process the pattern. */
3349    
3350  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
 BOOL class_utf8;  
3351  BOOL utf8 = (options & PCRE_UTF8) != 0;  BOOL utf8 = (options & PCRE_UTF8) != 0;
3352  uschar *class_utf8data;  pcre_uint8 utf8_char[6];
 uschar *class_utf8data_base;  
 uschar utf8_char[6];  
3353  #else  #else
3354  BOOL utf8 = FALSE;  BOOL utf8 = FALSE;
3355  #endif  #endif
3356    
3357    /* Helper variables for OP_XCLASS opcode (for characters > 255). */
3358    
3359    #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3360    BOOL xclass;
3361    pcre_uchar *class_uchardata;
3362    pcre_uchar *class_uchardata_base;
3363    #endif
3364    
3365  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
3366  if (lengthptr != NULL) DPRINTF((">> start branch\n"));  if (lengthptr != NULL) DPRINTF((">> start branch\n"));
3367  #endif  #endif
# Line 3105  greedy_non_default = greedy_default ^ 1; Line 3373  greedy_non_default = greedy_default ^ 1;
3373    
3374  /* Initialize no first byte, no required byte. REQ_UNSET means "no char  /* Initialize no first byte, no required byte. REQ_UNSET means "no char
3375  matching encountered yet". It gets changed to REQ_NONE if we hit something that  matching encountered yet". It gets changed to REQ_NONE if we hit something that
3376  matches a non-fixed char first char; reqbyte just remains unset if we never  matches a non-fixed char first char; reqchar just remains unset if we never
3377  find one.  find one.
3378    
3379  When we hit a repeat whose minimum is zero, we may have to adjust these values  When we hit a repeat whose minimum is zero, we may have to adjust these values
3380  to take the zero repeat into account. This is implemented by setting them to  to take the zero repeat into account. This is implemented by setting them to
3381  zerofirstbyte and zeroreqbyte when such a repeat is encountered. The individual  zerofirstbyte and zeroreqchar when such a repeat is encountered. The individual
3382  item types that can be repeated set these backoff variables appropriately. */  item types that can be repeated set these backoff variables appropriately. */
3383    
3384  firstbyte = reqbyte = zerofirstbyte = zeroreqbyte = REQ_UNSET;  firstchar = reqchar = zerofirstchar = zeroreqchar = REQ_UNSET;
3385    
3386  /* The variable req_caseopt contains either the REQ_CASELESS value or zero,  /* The variable req_caseopt contains either the REQ_CASELESS value
3387  according to the current setting of the caseless flag. REQ_CASELESS is a bit  or zero, according to the current setting of the caseless flag. The
3388  value > 255. It is added into the firstbyte or reqbyte variables to record the  REQ_CASELESS leaves the lower 28 bit empty. It is added into the
3389  case status of the value. This is used only for ASCII characters. */  firstchar or reqchar variables to record the case status of the
3390    value. This is used only for ASCII characters. */
3391    
3392  req_caseopt = ((options & PCRE_CASELESS) != 0)? REQ_CASELESS : 0;  req_caseopt = ((options & PCRE_CASELESS) != 0)? REQ_CASELESS:0;
3393    
3394  /* Switch on next character until the end of the branch */  /* Switch on next character until the end of the branch */
3395    
# Line 3138  for (;; ptr++) Line 3407  for (;; ptr++)
3407    int recno;    int recno;
3408    int refsign;    int refsign;
3409    int skipbytes;    int skipbytes;
3410    int subreqbyte;    int subreqchar;
3411    int subfirstbyte;    int subfirstchar;
3412    int terminator;    int terminator;
3413    int mclength;    int mclength;
3414    uschar mcbuffer[8];    int tempbracount;
3415      pcre_uchar mcbuffer[8];
3416    
3417    /* Get next byte in the pattern */    /* Get next byte in the pattern */
3418    
# Line 3189  for (;; ptr++) Line 3459  for (;; ptr++)
3459        }        }
3460    
3461      *lengthptr += (int)(code - last_code);      *lengthptr += (int)(code - last_code);
3462      DPRINTF(("length=%d added %d c=%c\n", *lengthptr, code - last_code, c));      DPRINTF(("length=%d added %d c=%c\n", *lengthptr, (int)(code - last_code),
3463          c));
3464    
3465      /* If "previous" is set and it is not at the start of the work space, move      /* If "previous" is set and it is not at the start of the work space, move
3466      it back to there, in order to avoid filling up the work space. Otherwise,      it back to there, in order to avoid filling up the work space. Otherwise,
# Line 3199  for (;; ptr++) Line 3470  for (;; ptr++)
3470        {        {
3471        if (previous > orig_code)        if (previous > orig_code)
3472          {          {
3473          memmove(orig_code, previous, code - previous);          memmove(orig_code, previous, IN_UCHARS(code - previous));
3474          code -= previous - orig_code;          code -= previous - orig_code;
3475          previous = orig_code;          previous = orig_code;
3476          }          }
# Line 3300  for (;; ptr++) Line 3571  for (;; ptr++)
3571      case 0:                        /* The branch terminates at string end */      case 0:                        /* The branch terminates at string end */
3572      case CHAR_VERTICAL_LINE:       /* or | or ) */      case CHAR_VERTICAL_LINE:       /* or | or ) */
3573      case CHAR_RIGHT_PARENTHESIS:      case CHAR_RIGHT_PARENTHESIS:
3574      *firstbyteptr = firstbyte;      *firstcharptr = firstchar;
3575      *reqbyteptr = reqbyte;      *reqcharptr = reqchar;
3576      *codeptr = code;      *codeptr = code;
3577      *ptrptr = ptr;      *ptrptr = ptr;
3578      if (lengthptr != NULL)      if (lengthptr != NULL)
# Line 3325  for (;; ptr++) Line 3596  for (;; ptr++)
3596      previous = NULL;      previous = NULL;
3597      if ((options & PCRE_MULTILINE) != 0)      if ((options & PCRE_MULTILINE) != 0)
3598        {        {
3599        if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;        if (firstchar == REQ_UNSET) firstchar = REQ_NONE;
3600        *code++ = OP_CIRCM;        *code++ = OP_CIRCM;
3601        }        }
3602      else *code++ = OP_CIRC;      else *code++ = OP_CIRC;
# Line 3337  for (;; ptr++) Line 3608  for (;; ptr++)
3608      break;      break;
3609    
3610      /* There can never be a first char if '.' is first, whatever happens about      /* There can never be a first char if '.' is first, whatever happens about
3611      repeats. The value of reqbyte doesn't change either. */      repeats. The value of reqchar doesn't change either. */
3612    
3613      case CHAR_DOT:      case CHAR_DOT:
3614      if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;      if (firstchar == REQ_UNSET) firstchar = REQ_NONE;
3615      zerofirstbyte = firstbyte;      zerofirstchar = firstchar;
3616      zeroreqbyte = reqbyte;      zeroreqchar = reqchar;
3617      previous = code;      previous = code;
3618      *code++ = ((options & PCRE_DOTALL) != 0)? OP_ALLANY: OP_ANY;      *code++ = ((options & PCRE_DOTALL) != 0)? OP_ALLANY: OP_ANY;
3619      break;      break;
# Line 3397  for (;; ptr++) Line 3668  for (;; ptr++)
3668          {          {
3669          if (ptr[1] == CHAR_E)          if (ptr[1] == CHAR_E)
3670            ptr++;            ptr++;
3671          else if (strncmp((const char *)ptr+1,          else if (STRNCMP_UC_C8(ptr + 1, STR_Q STR_BACKSLASH STR_E, 3) == 0)
                           STR_Q STR_BACKSLASH STR_E, 3) == 0)  
3672            ptr += 3;            ptr += 3;
3673          else          else
3674            break;            break;
# Line 3417  for (;; ptr++) Line 3687  for (;; ptr++)
3687          (cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0)          (cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0)
3688        {        {
3689        *code++ = negate_class? OP_ALLANY : OP_FAIL;        *code++ = negate_class? OP_ALLANY : OP_FAIL;
3690        if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;        if (firstchar == REQ_UNSET) firstchar = REQ_NONE;
3691        zerofirstbyte = firstbyte;        zerofirstchar = firstchar;
3692        break;        break;
3693        }        }
3694    
# Line 3440  for (;; ptr++) Line 3710  for (;; ptr++)
3710      than 256), because in that case the compiled code doesn't use the bit map.      than 256), because in that case the compiled code doesn't use the bit map.
3711      */      */
3712    
3713      memset(classbits, 0, 32 * sizeof(uschar));      memset(classbits, 0, 32 * sizeof(pcre_uint8));
3714    
3715  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3716      class_utf8 = FALSE;                       /* No chars >= 256 */      xclass = FALSE;                           /* No chars >= 256 */
3717      class_utf8data = code + LINK_SIZE + 2;    /* For UTF-8 items */      class_uchardata = code + LINK_SIZE + 2;   /* For UTF-8 items */
3718      class_utf8data_base = class_utf8data;     /* For resetting in pass 1 */      class_uchardata_base = class_uchardata;   /* For resetting in pass 1 */
3719  #endif  #endif
3720    
3721      /* Process characters until ] is reached. By writing this as a "do" it      /* Process characters until ] is reached. By writing this as a "do" it
# Line 3454  for (;; ptr++) Line 3724  for (;; ptr++)
3724    
3725      if (c != 0) do      if (c != 0) do
3726        {        {
3727        const uschar *oldptr;        const pcre_uchar *oldptr;
3728    
3729  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
3730        if (utf8 && c > 127)        if (utf8 && c > 127)
3731          {                           /* Braces are required because the */          {                           /* Braces are required because the */
3732          GETCHARLEN(c, ptr, ptr);    /* macro generates multiple statements */          GETCHARLEN(c, ptr, ptr);    /* macro generates multiple statements */
3733          }          }
3734    #endif
3735    
3736        /* In the pre-compile phase, accumulate the length of any UTF-8 extra  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3737          /* In the pre-compile phase, accumulate the length of any extra
3738        data and reset the pointer. This is so that very large classes that        data and reset the pointer. This is so that very large classes that
3739        contain a zillion UTF-8 characters no longer overwrite the work space        contain a zillion > 255 characters no longer overwrite the work space
3740        (which is on the stack). */        (which is on the stack). */
3741    
3742        if (lengthptr != NULL)        if (lengthptr != NULL)
3743          {          {
3744          *lengthptr += class_utf8data - class_utf8data_base;          *lengthptr += class_uchardata - class_uchardata_base;
3745          class_utf8data = class_utf8data_base;          class_uchardata = class_uchardata_base;
3746          }          }
   
3747  #endif  #endif
3748    
3749        /* Inside \Q...\E everything is literal except \E */        /* Inside \Q...\E everything is literal except \E */
# Line 3500  for (;; ptr++) Line 3771  for (;; ptr++)
3771          {          {
3772          BOOL local_negate = FALSE;          BOOL local_negate = FALSE;
3773          int posix_class, taboffset, tabopt;          int posix_class, taboffset, tabopt;
3774          register const uschar *cbits = cd->cbits;          register const pcre_uint8 *cbits = cd->cbits;
3775          uschar pbits[32];          pcre_uint8 pbits[32];
3776    
3777          if (ptr[1] != CHAR_COLON)          if (ptr[1] != CHAR_COLON)
3778            {            {
# Line 3556  for (;; ptr++) Line 3827  for (;; ptr++)
3827          /* Copy in the first table (always present) */          /* Copy in the first table (always present) */
3828    
3829          memcpy(pbits, cbits + posix_class_maps[posix_class],          memcpy(pbits, cbits + posix_class_maps[posix_class],
3830            32 * sizeof(uschar));            32 * sizeof(pcre_uint8));
3831    
3832          /* If there is a second table, add or remove it as required. */          /* If there is a second table, add or remove it as required. */
3833    
# Line 3618  for (;; ptr++) Line 3889  for (;; ptr++)
3889    
3890          if (c < 0)          if (c < 0)
3891            {            {
3892            register const uschar *cbits = cd->cbits;            register const pcre_uint8 *cbits = cd->cbits;
3893            class_charcount += 2;     /* Greater than 1 is what matters */            class_charcount += 2;     /* Greater than 1 is what matters */
3894    
3895            switch (-c)            switch (-c)
# Line 3673  for (;; ptr++) Line 3944  for (;; ptr++)
3944              SETBIT(classbits, 0x09); /* VT */              SETBIT(classbits, 0x09); /* VT */
3945              SETBIT(classbits, 0x20); /* SPACE */              SETBIT(classbits, 0x20); /* SPACE */
3946              SETBIT(classbits, 0xa0); /* NSBP */              SETBIT(classbits, 0xa0); /* NSBP */
3947  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3948              if (utf8)              if (utf8)
3949                {                {
3950                class_utf8 = TRUE;                xclass = TRUE;
3951                *class_utf8data++ = XCL_SINGLE;                *class_uchardata++ = XCL_SINGLE;
3952                class_utf8data += _pcre_ord2utf8(0x1680, class_utf8data);                class_uchardata += PRIV(ord2utf8)(0x1680, class_uchardata);
3953                *class_utf8data++ = XCL_SINGLE;                *class_uchardata++ = XCL_SINGLE;
3954                class_utf8data += _pcre_ord2utf8(0x180e, class_utf8data);                class_uchardata += PRIV(ord2utf8)(0x180e, class_uchardata);
3955                *class_utf8data++ = XCL_RANGE;                *class_uchardata++ = XCL_RANGE;
3956                class_utf8data += _pcre_ord2utf8(0x2000, class_utf8data);                class_uchardata += PRIV(ord2utf8)(0x2000, class_uchardata);
3957                class_utf8data += _pcre_ord2utf8(0x200A, class_utf8data);                class_uchardata += PRIV(ord2utf8)(0x200A, class_uchardata);
3958                *class_utf8data++ = XCL_SINGLE;                *class_uchardata++ = XCL_SINGLE;
3959                class_utf8data += _pcre_ord2utf8(0x202f, class_utf8data);                class_uchardata += PRIV(ord2utf8)(0x202f, class_uchardata);
3960                *class_utf8data++ = XCL_SINGLE;                *class_uchardata++ = XCL_SINGLE;
3961                class_utf8data += _pcre_ord2utf8(0x205f, class_utf8data);                class_uchardata += PRIV(ord2utf8)(0x205f, class_uchardata);
3962                *class_utf8data++ = XCL_SINGLE;                *class_uchardata++ = XCL_SINGLE;
3963                class_utf8data += _pcre_ord2utf8(0x3000, class_utf8data);                class_uchardata += PRIV(ord2utf8)(0x3000, class_uchardata);
3964                }                }
3965  #endif  #endif
3966              continue;              continue;
# Line 3708  for (;; ptr++) Line 3979  for (;; ptr++)
3979                classbits[c] |= x;                classbits[c] |= x;
3980                }                }
3981    
3982  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3983              if (utf8)              if (utf8)
3984                {                {
3985                class_utf8 = TRUE;                xclass = TRUE;
3986                *class_utf8data++ = XCL_RANGE;                *class_uchardata++ = XCL_RANGE;
3987                class_utf8data += _pcre_ord2utf8(0x0100, class_utf8data);                class_uchardata += PRIV(ord2utf8)(0x0100, class_uchardata);
3988                class_utf8data += _pcre_ord2utf8(0x167f, class_utf8data);                class_uchardata += PRIV(ord2utf8)(0x167f, class_uchardata);
3989                *class_utf8data++ = XCL_RANGE;                *class_uchardata++ = XCL_RANGE;
3990                class_utf8data += _pcre_ord2utf8(0x1681, class_utf8data);                class_uchardata += PRIV(ord2utf8)(0x1681, class_uchardata);
3991                class_utf8data += _pcre_ord2utf8(0x180d, class_utf8data);                class_uchardata += PRIV(ord2utf8)(0x180d, class_uchardata);
3992                *class_utf8data++ = XCL_RANGE;                *class_uchardata++ = XCL_RANGE;
3993                class_utf8data += _pcre_ord2utf8(0x180f, class_utf8data);                class_uchardata += PRIV(ord2utf8)(0x180f, class_uchardata);
3994                class_utf8data += _pcre_ord2utf8(0x1fff, class_utf8data);                class_uchardata += PRIV(ord2utf8)(0x1fff, class_uchardata);
3995                *class_utf8data++ = XCL_RANGE;                *class_uchardata++ = XCL_RANGE;
3996                class_utf8data += _pcre_ord2utf8(0x200B, class_utf8data);                class_uchardata += PRIV(ord2utf8)(0x200B, class_uchardata);
3997                class_utf8data += _pcre_ord2utf8(0x202e, class_utf8data);                class_uchardata += PRIV(ord2utf8)(0x202e, class_uchardata);
3998                *class_utf8data++ = XCL_RANGE;                *class_uchardata++ = XCL_RANGE;
3999                class_utf8data += _pcre_ord2utf8(0x2030, class_utf8data);                class_uchardata += PRIV(ord2utf8)(0x2030, class_uchardata);
4000                class_utf8data += _pcre_ord2utf8(0x205e, class_utf8data);                class_uchardata += PRIV(ord2utf8)(0x205e, class_uchardata);
4001                *class_utf8data++ = XCL_RANGE;                *class_uchardata++ = XCL_RANGE;
4002                class_utf8data += _pcre_ord2utf8(0x2060, class_utf8data);                class_uchardata += PRIV(ord2utf8)(0x2060, class_uchardata);
4003                class_utf8data += _pcre_ord2utf8(0x2fff, class_utf8data);                class_uchardata += PRIV(ord2utf8)(0x2fff, class_uchardata);
4004                *class_utf8data++ = XCL_RANGE;                *class_uchardata++ = XCL_RANGE;
4005                class_utf8data += _pcre_ord2utf8(0x3001, class_utf8data);                class_uchardata += PRIV(ord2utf8)(0x3001, class_uchardata);
4006                class_utf8data += _pcre_ord2utf8(0x7fffffff, class_utf8data);                class_uchardata += PRIV(ord2utf8)(0x7fffffff, class_uchardata);
4007                }                }
4008  #endif  #endif
4009              continue;              continue;
# Line 3743  for (;; ptr++) Line 4014  for (;; ptr++)
4014              SETBIT(classbits, 0x0c); /* FF */              SETBIT(classbits, 0x0c); /* FF */
4015              SETBIT(classbits, 0x0d); /* CR */              SETBIT(classbits, 0x0d); /* CR */
4016              SETBIT(classbits, 0x85); /* NEL */              SETBIT(classbits, 0x85); /* NEL */
4017  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
4018              if (utf8)              if (utf8)
4019                {                {
4020                class_utf8 = TRUE;                xclass = TRUE;
4021                *class_utf8data++ = XCL_RANGE;                *class_uchardata++ = XCL_RANGE;
4022                class_utf8data += _pcre_ord2utf8(0x2028, class_utf8data);                class_uchardata += PRIV(ord2utf8)(0x2028, class_uchardata);
4023                class_utf8data += _pcre_ord2utf8(0x2029, class_utf8data);                class_uchardata += PRIV(ord2utf8)(0x2029, class_uchardata);
4024                }                }
4025  #endif  #endif
4026              continue;              continue;
# Line 3771  for (;; ptr++) Line 4042  for (;; ptr++)
4042                classbits[c] |= x;                classbits[c] |= x;
4043                }                }
4044    
4045  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
4046              if (utf8)              if (utf8)
4047                {                {
4048                class_utf8 = TRUE;                xclass = TRUE;
4049                *class_utf8data++ = XCL_RANGE;                *class_uchardata++ = XCL_RANGE;
4050                class_utf8data += _pcre_ord2utf8(0x0100, class_utf8data);                class_uchardata += PRIV(ord2utf8)(0x0100, class_uchardata);
4051                class_utf8data += _pcre_ord2utf8(0x2027, class_utf8data);                class_uchardata += PRIV(ord2utf8)(0x2027, class_uchardata);
4052                *class_utf8data++ = XCL_RANGE;                *class_uchardata++ = XCL_RANGE;
4053                class_utf8data += _pcre_ord2utf8(0x2029, class_utf8data);                class_uchardata += PRIV(ord2utf8)(0x2029, class_uchardata);
4054                class_utf8data += _pcre_ord2utf8(0x7fffffff, class_utf8data);                class_uchardata += PRIV(ord2utf8)(0x7fffffff, class_uchardata);
4055                }                }
4056  #endif  #endif
4057              continue;              continue;
# Line 3793  for (;; ptr++) Line 4064  for (;; ptr++)
4064                int pdata;                int pdata;
4065                int ptype = get_ucp(&ptr, &negated, &pdata, errorcodeptr);                int ptype = get_ucp(&ptr, &negated, &pdata, errorcodeptr);
4066                if (ptype < 0) goto FAILED;                if (ptype < 0) goto FAILED;
4067                class_utf8 = TRUE;                xclass = TRUE;
4068                *class_utf8data++ = ((-c == ESC_p) != negated)?                *class_uchardata++ = ((-c == ESC_p) != negated)?
4069                  XCL_PROP : XCL_NOTPROP;                  XCL_PROP : XCL_NOTPROP;
4070                *class_utf8data++ = ptype;                *class_uchardata++ = ptype;
4071                *class_utf8data++ = pdata;                *class_uchardata++ = pdata;
4072                class_charcount -= 2;   /* Not a < 256 character */                class_charcount -= 2;   /* Not a < 256 character */
4073                continue;                continue;
4074                }                }
# Line 3819  for (;; ptr++) Line 4090  for (;; ptr++)
4090            }            }
4091    
4092          /* Fall through if we have a single character (c >= 0). This may be          /* Fall through if we have a single character (c >= 0). This may be
4093          greater than 256 in UTF-8 mode. */          greater than 256 mode. */
4094    
4095          }   /* End of backslash handling */          }   /* End of backslash handling */
4096    
# Line 3917  for (;; ptr++) Line 4188  for (;; ptr++)
4188          matching for characters > 127 is available only if UCP support is          matching for characters > 127 is available only if UCP support is
4189          available. */          available. */
4190    
4191  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
4192          if (utf8 && (d > 255 || ((options & PCRE_CASELESS) != 0 && d > 127)))          if (utf8 && (d > 255 || ((options & PCRE_CASELESS) != 0 && d > 127)))
4193    #endif
4194    #ifndef COMPILE_PCRE8
4195            if (d > 255)
4196    #endif
4197    #if defined SUPPORT_UTF || defined COMPILE_PCRE16
4198            {            {
4199            class_utf8 = TRUE;            xclass = TRUE;
4200    
4201            /* With UCP support, we can find the other case equivalents of            /* With UCP support, we can find the other case equivalents of
4202            the relevant characters. There may be several ranges. Optimize how            the relevant characters. There may be several ranges. Optimize how
# Line 3953  for (;; ptr++) Line 4229  for (;; ptr++)
4229    
4230                if (occ == ocd)                if (occ == ocd)
4231                  {                  {
4232                  *class_utf8data++ = XCL_SINGLE;                  *class_uchardata++ = XCL_SINGLE;
4233                  }                  }
4234                else                else
4235                  {                  {
4236                  *class_utf8data++ = XCL_RANGE;                  *class_uchardata++ = XCL_RANGE;
4237                  class_utf8data += _pcre_ord2utf8(occ, class_utf8data);                  class_uchardata += PRIV(ord2utf8)(occ, class_uchardata);
4238                  }                  }
4239                class_utf8data += _pcre_ord2utf8(ocd, class_utf8data);                class_uchardata += PRIV(ord2utf8)(ocd, class_uchardata);
4240                }                }
4241              }              }
4242  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
# Line 3968  for (;; ptr++) Line 4244  for (;; ptr++)
4244            /* Now record the original range, possibly modified for UCP caseless            /* Now record the original range, possibly modified for UCP caseless
4245            overlapping ranges. */            overlapping ranges. */
4246    
4247            *class_utf8data++ = XCL_RANGE;            *class_uchardata++ = XCL_RANGE;
4248            class_utf8data += _pcre_ord2utf8(c, class_utf8data);  #ifdef SUPPORT_UTF
4249            class_utf8data += _pcre_ord2utf8(d, class_utf8data);            class_uchardata += PRIV(ord2utf8)(c, class_uchardata);
4250              class_uchardata += PRIV(ord2utf8)(d, class_uchardata);
4251    #else
4252              *class_uchardata++ = c;
4253              *class_uchardata++ = d;
4254    #endif
4255    
4256            /* With UCP support, we are done. Without UCP support, there is no            /* With UCP support, we are done. Without UCP support, there is no
4257            caseless matching for UTF-8 characters > 127; we can use the bit map            caseless matching for UTF characters > 127; we can use the bit map
4258            for the smaller ones. */            for the smaller ones. As for 16 bit characters without UTF, we
4259              can still use  */
4260    
4261  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
4262            continue;    /* With next character in the class */            continue;    /* With next character in the class */
4263  #else  #else
4264    #ifdef SUPPORT_UTF
4265            if ((options & PCRE_CASELESS) == 0 || c > 127) continue;            if ((options & PCRE_CASELESS) == 0 || c > 127) continue;
   
4266            /* Adjust upper limit and fall through to set up the map */            /* Adjust upper limit and fall through to set up the map */
   
4267            d = 127;            d = 127;
4268    #else
4269              if (c > 255) continue;
4270              /* Adjust upper limit and fall through to set up the map */
4271              d = 255;
4272    #endif  /* SUPPORT_UTF */
4273  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
4274            }            }
4275  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 || COMPILE_PCRE16 */
4276    
4277          /* We use the bit map for all cases when not in UTF-8 mode; else          /* We use the bit map for 8 bit mode, or when the characters fall
4278          ranges that lie entirely within 0-127 when there is UCP support; else          partially or entirely to [0-255] ([0-127] for UCP) ranges. */
         for partial ranges without UCP support. */  
4279    
4280          class_charcount += d - c + 1;          class_charcount += d - c + 1;
4281          class_lastchar = d;          class_lastchar = d;
# Line 4019  for (;; ptr++) Line 4303  for (;; ptr++)
4303    
4304        /* Handle a character that cannot go in the bit map */        /* Handle a character that cannot go in the bit map */
4305    
4306  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
4307        if (utf8 && (c > 255 || ((options & PCRE_CASELESS) != 0 && c > 127)))        if (utf8 && (c > 255 || ((options & PCRE_CASELESS) != 0 && c > 127)))
4308    #endif
4309    #ifndef COMPILE_PCRE8
4310          if (c > 255)
4311    #endif
4312    #if defined SUPPORT_UTF || defined COMPILE_PCRE16
4313          {          {
4314          class_utf8 = TRUE;          xclass = TRUE;
4315          *class_utf8data++ = XCL_SINGLE;          *class_uchardata++ = XCL_SINGLE;
4316          class_utf8data += _pcre_ord2utf8(c, class_utf8data);  #ifdef SUPPORT_UTF
4317            class_uchardata += PRIV(ord2utf8)(c, class_uchardata);
4318    #else
4319            *class_uchardata++ = c;
4320    #endif
4321    
4322  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
4323          if ((options & PCRE_CASELESS) != 0)          if ((options & PCRE_CASELESS) != 0)
# Line 4032  for (;; ptr++) Line 4325  for (;; ptr++)
4325            unsigned int othercase;            unsigned int othercase;
4326            if ((othercase = UCD_OTHERCASE(c)) != c)            if ((othercase = UCD_OTHERCASE(c)) != c)
4327              {              {
4328              *class_utf8data++ = XCL_SINGLE;              *class_uchardata++ = XCL_SINGLE;
4329              class_utf8data += _pcre_ord2utf8(othercase, class_utf8data);              class_uchardata += PRIV(ord2utf8)(othercase, class_uchardata);
4330              }              }
4331            }            }
4332  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
# Line 4085  for (;; ptr++) Line 4378  for (;; ptr++)
4378      The optimization throws away the bit map. We turn the item into a      The optimization throws away the bit map. We turn the item into a
4379      1-character OP_CHAR[I] if it's positive, or OP_NOT[I] if it's negative.      1-character OP_CHAR[I] if it's positive, or OP_NOT[I] if it's negative.
4380      Note that OP_NOT[I] does not support multibyte characters. In the positive      Note that OP_NOT[I] does not support multibyte characters. In the positive
4381      case, it can cause firstbyte to be set. Otherwise, there can be no first      case, it can cause firstchar to be set. Otherwise, there can be no first
4382      char if this item is first, whatever repeat count may follow. In the case      char if this item is first, whatever repeat count may follow. In the case
4383      of reqbyte, save the previous value for reinstating. */      of reqchar, save the previous value for reinstating. */
4384    
4385  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
4386      if (class_charcount == 1 && !class_utf8 &&      if (class_charcount == 1 && !xclass &&
4387        (!utf8 || !negate_class || class_lastchar < 128))        (!utf8 || !negate_class || class_lastchar < 128))
4388  #else  #elif defined COMPILE_PCRE8
4389      if (class_charcount == 1)      if (class_charcount == 1)
4390    #else
4391        if (class_charcount == 1 && !xclass)
4392  #endif  #endif
4393        {        {
4394        zeroreqbyte = reqbyte;        zeroreqchar = reqchar;
4395    
4396        /* The OP_NOT[I] opcodes work on one-byte characters only. */        /* The OP_NOT[I] opcodes work on one-byte characters only. */
4397    
4398        if (negate_class)        if (negate_class)
4399          {          {
4400          if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;          if (firstchar == REQ_UNSET) firstchar = REQ_NONE;
4401          zerofirstbyte = firstbyte;          zerofirstchar = firstchar;
4402          *code++ = ((options & PCRE_CASELESS) != 0)? OP_NOTI: OP_NOT;          *code++ = ((options & PCRE_CASELESS) != 0)? OP_NOTI: OP_NOT;
4403          *code++ = class_lastchar;          *code++ = class_lastchar;
4404          break;          break;
# Line 4114  for (;; ptr++) Line 4409  for (;; ptr++)
4409    
4410  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
4411        if (utf8 && class_lastchar > 127)        if (utf8 && class_lastchar > 127)
4412          mclength = _pcre_ord2utf8(class_lastchar, mcbuffer);          mclength = PRIV(ord2utf8)(class_lastchar, mcbuffer);
4413        else        else
4414  #endif  #endif
4415          {          {
# Line 4126  for (;; ptr++) Line 4421  for (;; ptr++)
4421    
4422      /* The general case - not the one-char optimization. If this is the first      /* The general case - not the one-char optimization. If this is the first
4423      thing in the branch, there can be no first char setting, whatever the      thing in the branch, there can be no first char setting, whatever the
4424      repeat count. Any reqbyte setting must remain unchanged after any kind of      repeat count. Any reqchar setting must remain unchanged after any kind of
4425      repeat. */      repeat. */
4426    
4427      if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;      if (firstchar == REQ_UNSET) firstchar = REQ_NONE;
4428      zerofirstbyte = firstbyte;      zerofirstchar = firstchar;
4429      zeroreqbyte = reqbyte;      zeroreqchar = reqchar;
4430    
4431      /* If there are characters with values > 255, we have to compile an      /* If there are characters with values > 255, we have to compile an
4432      extended class, with its own opcode, unless there was a negated special      extended class, with its own opcode, unless there was a negated special
# Line 4141  for (;; ptr++) Line 4436  for (;; ptr++)
4436      be listed) there are no characters < 256, we can omit the bitmap in the      be listed) there are no characters < 256, we can omit the bitmap in the
4437      actual compiled code. */      actual compiled code. */
4438    
4439  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
4440      if (class_utf8 && (!should_flip_negation || (options & PCRE_UCP) != 0))      if (xclass && (!should_flip_negation || (options & PCRE_UCP) != 0))
4441    #endif
4442    #ifndef COMPILE_PCRE8
4443        if (xclass && !should_flip_negation)
4444    #endif
4445    #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4446        {        {
4447        *class_utf8data++ = XCL_END;    /* Marks the end of extra data */        *class_uchardata++ = XCL_END;    /* Marks the end of extra data */
4448        *code++ = OP_XCLASS;        *code++ = OP_XCLASS;
4449        code += LINK_SIZE;        code += LINK_SIZE;
4450        *code = negate_class? XCL_NOT : 0;        *code = negate_class? XCL_NOT:0;
4451    
4452        /* If the map is required, move up the extra data to make room for it;        /* If the map is required, move up the extra data to make room for it;
4453        otherwise just move the code pointer to the end of the extra data. */        otherwise just move the code pointer to the end of the extra data. */
# Line 4155  for (;; ptr++) Line 4455  for (;; ptr++)
4455        if (class_charcount > 0)        if (class_charcount > 0)
4456          {          {
4457          *code++ |= XCL_MAP;          *code++ |= XCL_MAP;
4458          memmove(code + 32, code, class_utf8data - code);          memmove(code + (32 / sizeof(pcre_uchar)), code,
4459              IN_UCHARS(class_uchardata - code));
4460          memcpy(code, classbits, 32);          memcpy(code, classbits, 32);
4461          code = class_utf8data + 32;          code = class_uchardata + (32 / sizeof(pcre_uchar));
4462          }          }
4463        else code = class_utf8data;        else code = class_uchardata;
4464    
4465        /* Now fill in the complete length of the item */        /* Now fill in the complete length of the item */
4466    
# Line 4175  for (;; ptr++) Line 4476  for (;; ptr++)
4476      negating it if necessary. */      negating it if necessary. */
4477    
4478      *code++ = (negate_class == should_flip_negation) ? OP_CLASS : OP_NCLASS;      *code++ = (negate_class == should_flip_negation) ? OP_CLASS : OP_NCLASS;
4479      if (negate_class)      if (lengthptr == NULL)    /* Save time in the pre-compile phase */
       {  
       if (lengthptr == NULL)    /* Save time in the pre-compile phase */  
         for (c = 0; c < 32; c++) code[c] = ~classbits[c];  
       }  
     else  
4480        {        {
4481          if (negate_class)
4482            for (c = 0; c < 32; c++) classbits[c] = ~classbits[c];
4483        memcpy(code, classbits, 32);        memcpy(code, classbits, 32);
4484        }        }
4485      code += 32;      code += 32 / sizeof(pcre_uchar);
4486      break;      break;
4487    
4488    
# Line 4221  for (;; ptr++) Line 4519  for (;; ptr++)
4519    
4520      if (repeat_min == 0)      if (repeat_min == 0)
4521        {        {
4522        firstbyte = zerofirstbyte;    /* Adjust for zero repeat */        firstchar = zerofirstchar;    /* Adjust for zero repeat */
4523        reqbyte = zeroreqbyte;        /* Ditto */        reqchar = zeroreqchar;        /* Ditto */
4524        }        }
4525    
4526      /* Remember whether this is a variable length repeat */      /* Remember whether this is a variable length repeat */
# Line 4261  for (;; ptr++) Line 4559  for (;; ptr++)
4559      past, but it no longer happens for non-repeated recursions. In fact, the      past, but it no longer happens for non-repeated recursions. In fact, the
4560      repeated ones could be re-implemented independently so as not to need this,      repeated ones could be re-implemented independently so as not to need this,
4561      but for the moment we rely on the code for repeating groups. */      but for the moment we rely on the code for repeating groups. */
4562    
4563      if (*previous == OP_RECURSE)      if (*previous == OP_RECURSE)
4564        {        {
4565        memmove(previous + 1 + LINK_SIZE, previous, 1 + LINK_SIZE);        memmove(previous + 1 + LINK_SIZE, previous, IN_UCHARS(1 + LINK_SIZE));
4566        *previous = OP_ONCE;        *previous = OP_ONCE;
4567        PUT(previous, 1, 2 + 2*LINK_SIZE);        PUT(previous, 1, 2 + 2*LINK_SIZE);
4568        previous[2 + 2*LINK_SIZE] = OP_KET;        previous[2 + 2*LINK_SIZE] = OP_KET;
# Line 4287  for (;; ptr++) Line 4585  for (;; ptr++)
4585    
4586      /* If previous was a character match, abolish the item and generate a      /* If previous was a character match, abolish the item and generate a
4587      repeat item instead. If a char item has a minumum of more than one, ensure      repeat item instead. If a char item has a minumum of more than one, ensure
4588      that it is set in reqbyte - it might not be if a sequence such as x{3} is      that it is set in reqchar - it might not be if a sequence such as x{3} is
4589      the first thing in a branch because the x will have gone into firstbyte      the first thing in a branch because the x will have gone into firstchar
4590      instead.  */      instead.  */
4591    
4592      if (*previous == OP_CHAR || *previous == OP_CHARI)      if (*previous == OP_CHAR || *previous == OP_CHARI)
# Line 4303  for (;; ptr++) Line 4601  for (;; ptr++)
4601  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
4602        if (utf8 && (code[-1] & 0x80) != 0)        if (utf8 && (code[-1] & 0x80) != 0)
4603          {          {
4604          uschar *lastchar = code - 1;          pcre_uchar *lastchar = code - 1;
4605          while((*lastchar & 0xc0) == 0x80) lastchar--;          while((*lastchar & 0xc0) == 0x80) lastchar--;
4606          c = code - lastchar;            /* Length of UTF-8 character */          c = code - lastchar;            /* Length of UTF-8 character */
4607          memcpy(utf8_char, lastchar, c); /* Save the char */          memcpy(utf8_char, lastchar, c); /* Save the char */
# Line 4317  for (;; ptr++) Line 4615  for (;; ptr++)
4615    
4616          {          {
4617          c = code[-1];          c = code[-1];
4618          if (repeat_min > 1) reqbyte = c | req_caseopt | cd->req_varyopt;          if (repeat_min > 1) reqchar = c | req_caseopt | cd->req_varyopt;
4619          }          }
4620    
4621        /* If the repetition is unlimited, it pays to see if the next thing on        /* If the repetition is unlimited, it pays to see if the next thing on
# Line 4365  for (;; ptr++) Line 4663  for (;; ptr++)
4663    
4664      else if (*previous < OP_EODN)      else if (*previous < OP_EODN)
4665        {        {
4666        uschar *oldcode;        pcre_uchar *oldcode;
4667        int prop_type, prop_value;        int prop_type, prop_value;
4668        op_type = OP_TYPESTAR - OP_STAR;  /* Use type opcodes */        op_type = OP_TYPESTAR - OP_STAR;  /* Use type opcodes */
4669        c = *previous;        c = *previous;
# Line 4538  for (;; ptr++) Line 4836  for (;; ptr++)
4836    
4837      else if (*previous == OP_CLASS ||      else if (*previous == OP_CLASS ||
4838               *previous == OP_NCLASS ||               *previous == OP_NCLASS ||
4839  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF8 || !defined COMPILE_PCRE8
4840               *previous == OP_XCLASS ||               *previous == OP_XCLASS ||
4841  #endif  #endif
4842               *previous == OP_REF ||               *previous == OP_REF ||
# Line 4587  for (;; ptr++) Line 4885  for (;; ptr++)
4885        {        {
4886        register int i;        register int i;
4887        int len = (int)(code - previous);        int len = (int)(code - previous);
4888        uschar *bralink = NULL;        pcre_uchar *bralink = NULL;
4889        uschar *brazeroptr = NULL;        pcre_uchar *brazeroptr = NULL;
4890    
4891        /* Repeating a DEFINE group is pointless, but Perl allows the syntax, so        /* Repeating a DEFINE group is pointless, but Perl allows the syntax, so
4892        we just ignore the repeat. */        we just ignore the repeat. */
# Line 4642  for (;; ptr++) Line 4940  for (;; ptr++)
4940            {            {
4941            *code = OP_END;            *code = OP_END;
4942            adjust_recurse(previous, 1, utf8, cd, save_hwm);            adjust_recurse(previous, 1, utf8, cd, save_hwm);
4943            memmove(previous+1, previous, len);            memmove(previous + 1, previous, IN_UCHARS(len));
4944            code++;            code++;
4945            if (repeat_max == 0)            if (repeat_max == 0)
4946              {              {
# Line 4666  for (;; ptr++) Line 4964  for (;; ptr++)
4964            int offset;            int offset;
4965            *code = OP_END;            *code = OP_END;
4966            adjust_recurse(previous, 2 + LINK_SIZE, utf8, cd, save_hwm);            adjust_recurse(previous, 2 + LINK_SIZE, utf8, cd, save_hwm);
4967            memmove(previous + 2 + LINK_SIZE, previous, len);            memmove(previous + 2 + LINK_SIZE, previous, IN_UCHARS(len));
4968            code += 2 + LINK_SIZE;            code += 2 + LINK_SIZE;
4969            *previous++ = OP_BRAZERO + repeat_type;            *previous++ = OP_BRAZERO + repeat_type;
4970            *previous++ = OP_BRA;            *previous++ = OP_BRA;
# Line 4716  for (;; ptr++) Line 5014  for (;; ptr++)
5014    
5015            else            else
5016              {              {
5017              if (groupsetfirstbyte && reqbyte < 0) reqbyte = firstbyte;              if (groupsetfirstchar && reqchar < 0) reqchar = firstchar;
5018              for (i = 1; i < repeat_min; i++)              for (i = 1; i < repeat_min; i++)
5019                {                {
5020                uschar *hc;                pcre_uchar *hc;
5021                uschar *this_hwm = cd->hwm;                pcre_uchar *this_hwm = cd->hwm;
5022                memcpy(code, previous, len);                memcpy(code, previous, IN_UCHARS(len));
5023                for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE)                for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE)
5024                  {                  {
5025                  PUT(cd->hwm, 0, GET(hc, 0) + len);                  PUT(cd->hwm, 0, GET(hc, 0) + len);
# Line 4771  for (;; ptr++) Line 5069  for (;; ptr++)
5069    
5070          else for (i = repeat_max - 1; i >= 0; i--)          else for (i = repeat_max - 1; i >= 0; i--)
5071            {            {
5072            uschar *hc;            pcre_uchar *hc;
5073            uschar *this_hwm = cd->hwm;            pcre_uchar *this_hwm = cd->hwm;
5074    
5075            *code++ = OP_BRAZERO + repeat_type;            *code++ = OP_BRAZERO + repeat_type;
5076    
# Line 4788  for (;; ptr++) Line 5086  for (;; ptr++)
5086              PUTINC(code, 0, offset);              PUTINC(code, 0, offset);
5087              }              }
5088    
5089            memcpy(code, previous, len);            memcpy(code, previous, IN_UCHARS(len));
5090            for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE)            for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE)
5091              {              {
5092              PUT(cd->hwm, 0, GET(hc, 0) + len + ((i != 0)? 2+LINK_SIZE : 1));              PUT(cd->hwm, 0, GET(hc, 0) + len + ((i != 0)? 2+LINK_SIZE : 1));
# Line 4805  for (;; ptr++) Line 5103  for (;; ptr++)
5103            {            {
5104            int oldlinkoffset;            int oldlinkoffset;
5105            int offset = (int)(code - bralink + 1);            int offset = (int)(code - bralink + 1);
5106            uschar *bra = code - offset;            pcre_uchar *bra = code - offset;
5107            oldlinkoffset = GET(bra, 1);            oldlinkoffset = GET(bra, 1);
5108            bralink = (oldlinkoffset == 0)? NULL : bralink - oldlinkoffset;            bralink = (oldlinkoffset == 0)? NULL : bralink - oldlinkoffset;
5109            *code++ = OP_KET;            *code++ = OP_KET;
# Line 4819  for (;; ptr++) Line 5117  for (;; ptr++)
5117        ONCE brackets can be converted into non-capturing brackets, as the        ONCE brackets can be converted into non-capturing brackets, as the
5118        behaviour of (?:xx)++ is the same as (?>xx)++ and this saves having to        behaviour of (?:xx)++ is the same as (?>xx)++ and this saves having to
5119        deal with possessive ONCEs specially.        deal with possessive ONCEs specially.
5120    
5121        Otherwise, if the quantifier was possessive, we convert the BRA code to        Otherwise, when we are doing the actual compile phase, check to see
5122        the POS form, and the KET code to KETRPOS. (It turns out to be convenient        whether this group is one that could match an empty string. If so,
5123        at runtime to detect this kind of subpattern at both the start and at the        convert the initial operator to the S form (e.g. OP_BRA -> OP_SBRA) so
5124        end.) The use of special opcodes makes it possible to reduce greatly the        that runtime checking can be done. [This check is also applied to ONCE
5125        stack usage in pcre_exec(). If the group is preceded by OP_BRAZERO,        groups at runtime, but in a different way.]
5126        convert this to OP_BRAPOSZERO. Then cancel the possessive flag so that  
5127        the default action below, of wrapping everything inside atomic brackets,        Then, if the quantifier was possessive and the bracket is not a
5128        does not happen.        conditional, we convert the BRA code to the POS form, and the KET code to
5129          KETRPOS. (It turns out to be convenient at runtime to detect this kind of
5130        Then, when we are doing the actual compile phase, check to see whether        subpattern at both the start and at the end.) The use of special opcodes
5131        this group is one that could match an empty string. If so, convert the        makes it possible to reduce greatly the stack usage in pcre_exec(). If
5132        initial operator to the S form (e.g. OP_BRA -> OP_SBRA) so that runtime        the group is preceded by OP_BRAZERO, convert this to OP_BRAPOSZERO.
5133        checking can be done. [This check is also applied to ONCE groups at  
5134        runtime, but in a different way.] */        Then, if the minimum number of matches is 1 or 0, cancel the possessive
5135          flag so that the default action below, of wrapping everything inside
5136          atomic brackets, does not happen. When the minimum is greater than 1,
5137          there will be earlier copies of the group, and so we still have to wrap
5138          the whole thing. */
5139    
5140        else        else
5141          {          {
5142          uschar *ketcode = code - 1 - LINK_SIZE;          pcre_uchar *ketcode = code - 1 - LINK_SIZE;
5143          uschar *bracode = ketcode - GET(ketcode, 1);          pcre_uchar *bracode = ketcode - GET(ketcode, 1);
5144    
5145          if (*bracode == OP_ONCE && possessive_quantifier) *bracode = OP_BRA;          /* Convert possessive ONCE brackets to non-capturing */
5146          if (*bracode == OP_ONCE)  
5147            if ((*bracode == OP_ONCE || *bracode == OP_ONCE_NC) &&
5148                possessive_quantifier) *bracode = OP_BRA;
5149    
5150            /* For non-possessive ONCE brackets, all we need to do is to
5151            set the KET. */
5152    
5153            if (*bracode == OP_ONCE || *bracode == OP_ONCE_NC)
5154            *ketcode = OP_KETRMAX + repeat_type;            *ketcode = OP_KETRMAX + repeat_type;
5155    
5156            /* Handle non-ONCE brackets and possessive ONCEs (which have been
5157            converted to non-capturing above). */
5158    
5159          else          else
5160            {            {
5161            if (possessive_quantifier)            /* In the compile phase, check for empty string matching. */
5162              {  
             *bracode += 1;                   /* Switch to xxxPOS opcodes */  
             *ketcode = OP_KETRPOS;  
             if (brazeroptr != NULL) *brazeroptr = OP_BRAPOSZERO;  
             possessive_quantifier = FALSE;  
             }  
           else *ketcode = OP_KETRMAX + repeat_type;  
   
5163            if (lengthptr == NULL)            if (lengthptr == NULL)
5164              {              {
5165              uschar *scode = bracode;              pcre_uchar *scode = bracode;
5166              do              do
5167                {                {
5168                if (could_be_empty_branch(scode, ketcode, utf8, cd))                if (could_be_empty_branch(scode, ketcode, utf8, cd))
# Line 4868  for (;; ptr++) Line 5174  for (;; ptr++)
5174                }                }
5175              while (*scode == OP_ALT);              while (*scode == OP_ALT);
5176              }              }
5177    
5178              /* Handle possessive quantifiers. */
5179    
5180              if (possessive_quantifier)
5181                {
5182                /* For COND brackets, we wrap the whole thing in a possessively
5183                repeated non-capturing bracket, because we have not invented POS
5184                versions of the COND opcodes. Because we are moving code along, we
5185                must ensure that any pending recursive references are updated. */
5186    
5187                if (*bracode == OP_COND || *bracode == OP_SCOND)
5188                  {
5189                  int nlen = (int)(code - bracode);
5190                  *code = OP_END;
5191                  adjust_recurse(bracode, 1 + LINK_SIZE, utf8, cd, save_hwm);
5192                  memmove(bracode + 1 + LINK_SIZE, bracode, IN_UCHARS(nlen));
5193                  code += 1 + LINK_SIZE;
5194                  nlen += 1 + LINK_SIZE;
5195                  *bracode = OP_BRAPOS;
5196                  *code++ = OP_KETRPOS;
5197                  PUTINC(code, 0, nlen);
5198                  PUT(bracode, 1, nlen);
5199                  }
5200    
5201                /* For non-COND brackets, we modify the BRA code and use KETRPOS. */
5202    
5203                else
5204                  {
5205                  *bracode += 1;              /* Switch to xxxPOS opcodes */
5206                  *ketcode = OP_KETRPOS;
5207                  }
5208    
5209                /* If the minimum is zero, mark it as possessive, then unset the
5210                possessive flag when the minimum is 0 or 1. */
5211    
5212                if (brazeroptr != NULL) *brazeroptr = OP_BRAPOSZERO;
5213                if (repeat_min < 2) possessive_quantifier = FALSE;
5214                }
5215    
5216              /* Non-possessive quantifier */
5217    
5218              else *ketcode = OP_KETRMAX + repeat_type;
5219            }            }
5220          }          }
5221        }        }
# Line 4894  for (;; ptr++) Line 5242  for (;; ptr++)
5242      notation is just syntactic sugar, taken from Sun's Java package, but the      notation is just syntactic sugar, taken from Sun's Java package, but the
5243      special opcodes can optimize it.      special opcodes can optimize it.
5244    
5245      Possessively repeated subpatterns have already been handled in the code      Some (but not all) possessively repeated subpatterns have already been
5246      just above, so possessive_quantifier is always FALSE for them at this      completely handled in the code just above. For them, possessive_quantifier
5247      stage.      is always FALSE at this stage.
5248    
5249      Note that the repeated item starts at tempcode, not at previous, which      Note that the repeated item starts at tempcode, not at previous, which
5250      might be the first part of a string whose (former) last char we repeated.      might be the first part of a string whose (former) last char we repeated.
# Line 4910  for (;; ptr++) Line 5258  for (;; ptr++)
5258        int len;        int len;
5259    
5260        if (*tempcode == OP_TYPEEXACT)        if (*tempcode == OP_TYPEEXACT)
5261          tempcode += _pcre_OP_lengths[*tempcode] +          tempcode += PRIV(OP_lengths)[*tempcode] +
5262            ((tempcode[3] == OP_PROP || tempcode[3] == OP_NOTPROP)? 2 : 0);            ((tempcode[1 + IMM2_SIZE] == OP_PROP
5263              || tempcode[1 + IMM2_SIZE] == OP_NOTPROP)? 2 : 0);
5264    
5265        else if (*tempcode == OP_EXACT || *tempcode == OP_NOTEXACT)        else if (*tempcode == OP_EXACT || *tempcode == OP_NOTEXACT)
5266          {          {
5267          tempcode += _pcre_OP_lengths[*tempcode];          tempcode += PRIV(OP_lengths)[*tempcode];
5268  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
5269          if (utf8 && tempcode[-1] >= 0xc0)          if (utf8 && tempcode[-1] >= 0xc0)
5270            tempcode += _pcre_utf8_table4[tempcode[-1] & 0x3f];            tempcode += PRIV(utf8_table4)[tempcode[-1] & 0x3f];
5271  #endif  #endif
5272          }          }
5273    
# Line 4956  for (;; ptr++) Line 5305  for (;; ptr++)
5305          default:          default:
5306          *code = OP_END;          *code = OP_END;
5307          adjust_recurse(tempcode, 1 + LINK_SIZE, utf8, cd, save_hwm);          adjust_recurse(tempcode, 1 + LINK_SIZE, utf8, cd, save_hwm);
5308          memmove(tempcode + 1+LINK_SIZE, tempcode, len);          memmove(tempcode + 1 + LINK_SIZE, tempcode, IN_UCHARS(len));
5309          code += 1 + LINK_SIZE;          code += 1 + LINK_SIZE;
5310          len += 1 + LINK_SIZE;          len += 1 + LINK_SIZE;
5311          tempcode[0] = OP_ONCE;          tempcode[0] = OP_ONCE;
# Line 4968  for (;; ptr++) Line 5317  for (;; ptr++)
5317        }        }
5318    
5319      /* In all case we no longer have a previous item. We also set the      /* In all case we no longer have a previous item. We also set the
5320      "follows varying string" flag for subsequently encountered reqbytes if      "follows varying string" flag for subsequently encountered reqchars if
5321      it isn't already set and we have just passed a varying length item. */      it isn't already set and we have just passed a varying length item. */
5322    
5323      END_REPEAT:      END_REPEAT:
# Line 4997  for (;; ptr++) Line 5346  for (;; ptr++)
5346        int i, namelen;        int i, namelen;
5347        int arglen = 0;        int arglen = 0;
5348        const char *vn = verbnames;        const char *vn = verbnames;
5349        const uschar *name = ptr + 1;        const pcre_uchar *name = ptr + 1;
5350        const uschar *arg = NULL;        const pcre_uchar *arg = NULL;
5351        previous = NULL;        previous = NULL;
5352        while ((cd->ctypes[*++ptr] & ctype_letter) != 0) {};        while ((cd->ctypes[*++ptr] & ctype_letter) != 0) {};
5353        namelen = (int)(ptr - name);        namelen = (int)(ptr - name);
# Line 5025  for (;; ptr++) Line 5374  for (;; ptr++)
5374        for (i = 0; i < verbcount; i++)        for (i = 0; i < verbcount; i++)
5375          {          {
5376          if (namelen == verbs[i].len &&          if (namelen == verbs[i].len &&
5377              strncmp((char *)name, vn, namelen) == 0)              STRNCMP_UC_C8(name, vn, namelen) == 0)
5378            {            {
5379            /* Check for open captures before ACCEPT and convert it to            /* Check for open captures before ACCEPT and convert it to
5380            ASSERT_ACCEPT if in an assertion. */            ASSERT_ACCEPT if in an assertion. */
# Line 5046  for (;; ptr++) Line 5395  for (;; ptr++)
5395                }                }
5396              *code++ = (cd->assert_depth > 0)? OP_ASSERT_ACCEPT : OP_ACCEPT;              *code++ = (cd->assert_depth > 0)? OP_ASSERT_ACCEPT : OP_ACCEPT;
5397    
5398              /* Do not set firstbyte after *ACCEPT */              /* Do not set firstchar after *ACCEPT */
5399              if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;              if (firstchar == REQ_UNSET) firstchar = REQ_NONE;
5400              }              }
5401    
5402            /* Handle other cases with/without an argument */            /* Handle other cases with/without an argument */
# Line 5073  for (;; ptr++) Line 5422  for (;; ptr++)
5422              *code = verbs[i].op_arg;              *code = verbs[i].op_arg;
5423              if (*code++ == OP_THEN_ARG) cd->external_flags |= PCRE_HASTHEN;              if (*code++ == OP_THEN_ARG) cd->external_flags |= PCRE_HASTHEN;
5424              *code++ = arglen;              *code++ = arglen;
5425              memcpy(code, arg, arglen);              memcpy(code, arg, IN_UCHARS(arglen));
5426              code += arglen;              code += arglen;
5427              *code++ = 0;              *code++ = 0;
5428              }              }
# Line 5096  for (;; ptr++) Line 5445  for (;; ptr++)
5445        {        {
5446        int i, set, unset, namelen;        int i, set, unset, namelen;
5447        int *optset;        int *optset;
5448        const uschar *name;        const pcre_uchar *name;
5449        uschar *slot;        pcre_uchar *slot;
5450    
5451        switch (*(++ptr))        switch (*(++ptr))
5452          {          {
# Line 5150  for (;; ptr++) Line 5499  for (;; ptr++)
5499            break;            break;
5500    
5501          /* Most other conditions use OP_CREF (a couple change to OP_RREF          /* Most other conditions use OP_CREF (a couple change to OP_RREF
5502          below), and all need to skip 3 bytes at the start of the group. */          below), and all need to skip 1+IMM2_SIZE bytes at the start of the group. */
5503    
5504          code[1+LINK_SIZE] = OP_CREF;          code[1+LINK_SIZE] = OP_CREF;
5505          skipbytes = 3;          skipbytes = 1+IMM2_SIZE;
5506          refsign = -1;          refsign = -1;
5507    
5508          /* Check for a test for recursion in a named group. */          /* Check for a test for recursion in a named group. */
# Line 5200  for (;; ptr++) Line 5549  for (;; ptr++)
5549          while ((cd->ctypes[*ptr] & ctype_word) != 0)          while ((cd->ctypes[*ptr] & ctype_word) != 0)
5550            {            {
5551            if (recno >= 0)            if (recno >= 0)
5552              recno = ((digitab[*ptr] & ctype_digit) != 0)?              recno = (IS_DIGIT(*ptr))? recno * 10 + *ptr - CHAR_0 : -1;
               recno * 10 + *ptr - CHAR_0 : -1;  
5553            ptr++;            ptr++;
5554            }            }
5555          namelen = (int)(ptr - name);          namelen = (int)(ptr - name);
# Line 5249  for (;; ptr++) Line 5597  for (;; ptr++)
5597          slot = cd->name_table;          slot = cd->name_table;
5598          for (i = 0; i < cd->names_found; i++)          for (i = 0; i < cd->names_found; i++)
5599            {            {
5600            if (strncmp((char *)name, (char *)slot+2, namelen) == 0) break;            if (STRNCMP_UC_UC(name, slot+IMM2_SIZE, namelen) == 0) break;
5601            slot += cd->name_entry_size;            slot += cd->name_entry_size;
5602            }            }
5603    
# Line 5291  for (;; ptr++) Line 5639  for (;; ptr++)
5639            recno = 0;            recno = 0;
5640            for (i = 1; i < namelen; i++)            for (i = 1; i < namelen; i++)
5641              {              {
5642              if ((digitab[name[i]] & ctype_digit) == 0)              if (!IS_DIGIT(name[i]))
5643                {                {
5644                *errorcodeptr = ERR15;                *errorcodeptr = ERR15;
5645                goto FAILED;                goto FAILED;
# Line 5306  for (;; ptr++) Line 5654  for (;; ptr++)
5654          /* Similarly, check for the (?(DEFINE) "condition", which is always          /* Similarly, check for the (?(DEFINE) "condition", which is always
5655          false. */          false. */
5656    
5657          else if (namelen == 6 && strncmp((char *)name, STRING_DEFINE, 6) == 0)          else if (namelen == 6 && STRNCMP_UC_C8(name, STRING_DEFINE, 6) == 0)
5658            {            {
5659            code[1+LINK_SIZE] = OP_DEF;            code[1+LINK_SIZE] = OP_DEF;
5660            skipbytes = 1;            skipbytes = 1;
# Line 5386  for (;; ptr++) Line 5734  for (;; ptr++)
5734    
5735          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
5736          case CHAR_C:                 /* Callout - may be followed by digits; */          case CHAR_C:                 /* Callout - may be followed by digits; */
5737          previous_callout = code;  /* Save for later completion */          previous_callout = code;     /* Save for later completion */
5738          after_manual_callout = 1; /* Skip one item before completing */          after_manual_callout = 1;    /* Skip one item before completing */
5739          *code++ = OP_CALLOUT;          *code++ = OP_CALLOUT;
5740            {            {
5741            int n = 0;            int n = 0;
5742            while ((digitab[*(++ptr)] & ctype_digit) != 0)            ptr++;
5743              n = n * 10 + *ptr - CHAR_0;            while(IS_DIGIT(*ptr))
5744                n = n * 10 + *ptr++ - CHAR_0;
5745            if (*ptr != CHAR_RIGHT_PARENTHESIS)            if (*ptr != CHAR_RIGHT_PARENTHESIS)
5746              {              {
5747              *errorcodeptr = ERR39;              *errorcodeptr = ERR39;
# Line 5509  for (;; ptr++) Line 5858  for (;; ptr++)
5858                if (crc < 0)                if (crc < 0)
5859                  {                  {
5860                  memmove(slot + cd->name_entry_size, slot,                  memmove(slot + cd->name_entry_size, slot,
5861                    (cd->names_found - i) * cd->name_entry_size);                    IN_UCHARS((cd->names_found - i) * cd->name_entry_size));
5862                  break;                  break;
5863                  }                  }
5864    
# Line 5523  for (;; ptr++) Line 5872  for (;; ptr++)
5872    
5873              if (!dupname)              if (!dupname)
5874                {                {
5875                uschar *cslot = cd->name_table;                pcre_uchar *cslot = cd->name_table;
5876                for (i = 0; i < cd->names_found; i++)                for (i = 0; i < cd->names_found; i++)
5877                  {                  {
5878                  if (cslot != slot)                  if (cslot != slot)
# Line 5540  for (;; ptr++) Line 5889  for (;; ptr++)
5889                }                }
5890    
5891              PUT2(slot, 0, cd->bracount + 1);              PUT2(slot, 0, cd->bracount + 1);
5892              memcpy(slot + 2, name, namelen);              memcpy(slot + 2, name, IN_UCHARS(namelen));
5893              slot[2+namelen] = 0;              slot[2 + namelen] = 0;
5894              }              }
5895            }            }
5896    
# Line 5579  for (;; ptr++) Line 5928  for (;; ptr++)
5928    
5929          if (lengthptr != NULL)          if (lengthptr != NULL)
5930            {            {
5931            const uschar *temp;            const pcre_uchar *temp;
5932    
5933            if (namelen == 0)            if (namelen == 0)
5934              {              {
# Line 5624  for (;; ptr++) Line 5973  for (;; ptr++)
5973            slot = cd->name_table;            slot = cd->name_table;
5974            for (i = 0; i < cd->names_found; i++)            for (i = 0; i < cd->names_found; i++)
5975              {              {
5976              if (strncmp((char *)name, (char *)slot+2, namelen) == 0 &&              if (STRNCMP_UC_UC(name, slot+IMM2_SIZE, namelen) == 0 &&
5977                  slot[2+namelen] == 0)                  slot[2+namelen] == 0)
5978                break;                break;
5979              slot += cd->name_entry_size;              slot += cd->name_entry_size;
# Line 5661  for (;; ptr++) Line 6010  for (;; ptr++)
6010          case CHAR_0: case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4:          case CHAR_0: case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4:
6011          case CHAR_5: case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9:          case CHAR_5: case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9:
6012            {            {
6013            const uschar *called;            const pcre_uchar *called;
6014            terminator = CHAR_RIGHT_PARENTHESIS;            terminator = CHAR_RIGHT_PARENTHESIS;
6015    
6016            /* Come here from the \g<...> and \g'...' code (Oniguruma            /* Come here from the \g<...> and \g'...' code (Oniguruma
# Line 5675  for (;; ptr++) Line 6024  for (;; ptr++)
6024            if ((refsign = *ptr) == CHAR_PLUS)            if ((refsign = *ptr) == CHAR_PLUS)
6025              {              {
6026              ptr++;              ptr++;
6027              if ((digitab[*ptr] & ctype_digit) == 0)              if (!IS_DIGIT(*ptr))
6028                {                {
6029                *errorcodeptr = ERR63;                *errorcodeptr = ERR63;
6030                goto FAILED;                goto FAILED;
# Line 5683  for (;; ptr++) Line 6032  for (;; ptr++)
6032              }              }
6033            else if (refsign == CHAR_MINUS)            else if (refsign == CHAR_MINUS)
6034              {              {
6035              if ((digitab[ptr[1]] & ctype_digit) == 0)              if (!IS_DIGIT(ptr[1]))
6036                goto OTHER_CHAR_AFTER_QUERY;                goto OTHER_CHAR_AFTER_QUERY;
6037              ptr++;              ptr++;
6038              }              }
6039    
6040            recno = 0;            recno = 0;
6041            while((digitab[*ptr] & ctype_digit) != 0)            while(IS_DIGIT(*ptr))
6042              recno = recno * 10 + *ptr++ - CHAR_0;              recno = recno * 10 + *ptr++ - CHAR_0;
6043    
6044            if (*ptr != terminator)            if (*ptr != terminator)
# Line 5740  for (;; ptr++) Line 6089  for (;; ptr++)
6089              {              {
6090              *code = OP_END;              *code = OP_END;
6091              if (recno != 0)              if (recno != 0)
6092                called = _pcre_find_bracket(cd->start_code, utf8, recno);                called = PRIV(find_bracket)(cd->start_code, utf8, recno);
6093    
6094              /* Forward reference */              /* Forward reference */
6095    
# Line 5787  for (;; ptr++) Line 6136  for (;; ptr++)
6136    
6137          /* Can't determine a first byte now */          /* Can't determine a first byte now */
6138    
6139          if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;          if (firstchar == REQ_UNSET) firstchar = REQ_NONE;
6140          continue;          continue;
6141    
6142    
# Line 5844  for (;; ptr++) Line 6193  for (;; ptr++)
6193          both phases.          both phases.
6194    
6195          If we are not at the pattern start, reset the greedy defaults and the          If we are not at the pattern start, reset the greedy defaults and the
6196          case value for firstbyte and reqbyte. */          case value for firstchar and reqchar. */
6197    
6198          if (*ptr == CHAR_RIGHT_PARENTHESIS)          if (*ptr == CHAR_RIGHT_PARENTHESIS)
6199            {            {
# Line 5857  for (;; ptr++) Line 6206  for (;; ptr++)
6206              {              {
6207              greedy_default = ((newoptions & PCRE_UNGREEDY) != 0);              greedy_default = ((newoptions & PCRE_UNGREEDY) != 0);
6208              greedy_non_default = greedy_default ^ 1;              greedy_non_default = greedy_default ^ 1;
6209              req_caseopt = ((newoptions & PCRE_CASELESS) != 0)? REQ_CASELESS : 0;              req_caseopt = ((newoptions & PCRE_CASELESS) != 0)? REQ_CASELESS:0;
6210              }              }
6211    
6212            /* Change options at this level, and pass them back for use            /* Change options at this level, and pass them back for use
# Line 5894  for (;; ptr++) Line 6243  for (;; ptr++)
6243        NUMBERED_GROUP:        NUMBERED_GROUP:
6244        cd->bracount += 1;        cd->bracount += 1;
6245        PUT2(code, 1+LINK_SIZE, cd->bracount);        PUT2(code, 1+LINK_SIZE, cd->bracount);
6246        skipbytes = 2;        skipbytes = IMM2_SIZE;
6247        }        }
6248    
6249      /* Process nested bracketed regex. Assertions used not to be repeatable,      /* Process nested bracketed regex. Assertions used not to be repeatable,
# Line 5906  for (;; ptr++) Line 6255  for (;; ptr++)
6255      *code = bravalue;      *code = bravalue;
6256      tempcode = code;      tempcode = code;
6257      tempreqvary = cd->req_varyopt;        /* Save value before bracket */      tempreqvary = cd->req_varyopt;        /* Save value before bracket */
6258        tempbracount = cd->bracount;          /* Save value before bracket */
6259      length_prevgroup = 0;                 /* Initialize for pre-compile phase */      length_prevgroup = 0;                 /* Initialize for pre-compile phase */
6260    
6261      if (!compile_regex(      if (!compile_regex(
# Line 5919  for (;; ptr++) Line 6269  for (;; ptr++)
6269           skipbytes,                       /* Skip over bracket number */           skipbytes,                       /* Skip over bracket number */
6270           cond_depth +           cond_depth +
6271             ((bravalue == OP_COND)?1:0),   /* Depth of condition subpatterns */             ((bravalue == OP_COND)?1:0),   /* Depth of condition subpatterns */
6272           &subfirstbyte,                   /* For possible first char */           &subfirstchar,                   /* For possible first char */
6273           &subreqbyte,                     /* For possible last char */           &subreqchar,                     /* For possible last char */
6274           bcptr,                           /* Current branch chain */           bcptr,                           /* Current branch chain */
6275           cd,                              /* Tables block */           cd,                              /* Tables block */
6276           (lengthptr == NULL)? NULL :      /* Actual compile phase */           (lengthptr == NULL)? NULL :      /* Actual compile phase */
# Line 5928  for (;; ptr++) Line 6278  for (;; ptr++)
6278           ))           ))
6279        goto FAILED;        goto FAILED;
6280    
6281        /* If this was an atomic group and there are no capturing groups within it,
6282        generate OP_ONCE_NC instead of OP_ONCE. */
6283    
6284        if (bravalue == OP_ONCE && cd->bracount <= tempbracount)
6285          *code = OP_ONCE_NC;
6286    
6287      if (bravalue >= OP_ASSERT && bravalue <= OP_ASSERTBACK_NOT)      if (bravalue >= OP_ASSERT && bravalue <= OP_ASSERTBACK_NOT)
6288        cd->assert_depth -= 1;        cd->assert_depth -= 1;
6289    
6290      /* At the end of compiling, code is still pointing to the start of the      /* At the end of compiling, code is still pointing to the start of the
6291      group, while tempcode has been updated to point past the end of the group      group, while tempcode has been updated to point past the end of the group.
6292      and any option resetting that may follow it. The pattern pointer (ptr)      The pattern pointer (ptr) is on the bracket.
     is on the bracket. */  
6293    
6294      /* If this is a conditional bracket, check that there are no more than      If this is a conditional bracket, check that there are no more than
6295      two branches in the group, or just one if it's a DEFINE group. We do this      two branches in the group, or just one if it's a DEFINE group. We do this
6296      in the real compile phase, not in the pre-pass, where the whole group may      in the real compile phase, not in the pre-pass, where the whole group may
6297      not be available. */      not be available. */
6298    
6299      if (bravalue == OP_COND && lengthptr == NULL)      if (bravalue == OP_COND && lengthptr == NULL)
6300        {        {
6301        uschar *tc = code;        pcre_uchar *tc = code;
6302        int condcount = 0;        int condcount = 0;
6303    
6304        do {        do {
# Line 5966  for (;; ptr++) Line 6321  for (;; ptr++)
6321          }          }
6322    
6323        /* A "normal" conditional group. If there is just one branch, we must not        /* A "normal" conditional group. If there is just one branch, we must not
6324        make use of its firstbyte or reqbyte, because this is equivalent to an        make use of its firstchar or reqchar, because this is equivalent to an
6325        empty second branch. */        empty second branch. */
6326    
6327        else        else
# Line 5976  for (;; ptr++) Line 6331  for (;; ptr++)
6331            *errorcodeptr = ERR27;            *errorcodeptr = ERR27;
6332            goto FAILED;            goto FAILED;
6333            }            }
6334          if (condcount == 1) subfirstbyte = subreqbyte = REQ_NONE;          if (condcount == 1) subfirstchar = subreqchar = REQ_NONE;
6335          }          }
6336        }        }
6337    
# Line 6020  for (;; ptr++) Line 6375  for (;; ptr++)
6375      /* Handle updating of the required and first characters for other types of      /* Handle updating of the required and first characters for other types of
6376      group. Update for normal brackets of all kinds, and conditions with two      group. Update for normal brackets of all kinds, and conditions with two
6377      branches (see code above). If the bracket is followed by a quantifier with      branches (see code above). If the bracket is followed by a quantifier with
6378      zero repeat, we have to back off. Hence the definition of zeroreqbyte and      zero repeat, we have to back off. Hence the definition of zeroreqchar and
6379      zerofirstbyte outside the main loop so that they can be accessed for the      zerofirstchar outside the main loop so that they can be accessed for the
6380      back off. */      back off. */
6381    
6382      zeroreqbyte = reqbyte;      zeroreqchar = reqchar;
6383      zerofirstbyte = firstbyte;      zerofirstchar = firstchar;
6384      groupsetfirstbyte = FALSE;      groupsetfirstchar = FALSE;
6385    
6386      if (bravalue >= OP_ONCE)      if (bravalue >= OP_ONCE)
6387        {        {
6388        /* If we have not yet set a firstbyte in this branch, take it from the        /* If we have not yet set a firstchar in this branch, take it from the
6389        subpattern, remembering that it was set here so that a repeat of more        subpattern, remembering that it was set here so that a repeat of more
6390        than one can replicate it as reqbyte if necessary. If the subpattern has        than one can replicate it as reqchar if necessary. If the subpattern has
6391        no firstbyte, set "none" for the whole branch. In both cases, a zero        no firstchar, set "none" for the whole branch. In both cases, a zero
6392        repeat forces firstbyte to "none". */        repeat forces firstchar to "none". */
6393    
6394        if (firstbyte == REQ_UNSET)        if (firstchar == REQ_UNSET)
6395          {          {
6396          if (subfirstbyte >= 0)          if (subfirstchar >= 0)
6397            {            {
6398            firstbyte = subfirstbyte;            firstchar = subfirstchar;
6399            groupsetfirstbyte = TRUE;            groupsetfirstchar = TRUE;
6400            }            }
6401          else firstbyte = REQ_NONE;          else firstchar = REQ_NONE;
6402          zerofirstbyte = REQ_NONE;          zerofirstchar = REQ_NONE;
6403          }          }
6404    
6405        /* If firstbyte was previously set, convert the subpattern's firstbyte        /* If firstchar was previously set, convert the subpattern's firstchar
6406        into reqbyte if there wasn't one, using the vary flag that was in        into reqchar if there wasn't one, using the vary flag that was in
6407        existence beforehand. */        existence beforehand. */
6408    
6409        else if (subfirstbyte >= 0 && subreqbyte < 0)        else if (subfirstchar >= 0 && subreqchar < 0)
6410          subreqbyte = subfirstbyte | tempreqvary;          subreqchar = subfirstchar | tempreqvary;
6411    
6412        /* If the subpattern set a required byte (or set a first byte that isn't        /* If the subpattern set a required byte (or set a first byte that isn't
6413        really the first byte - see above), set it. */        really the first byte - see above), set it. */
6414    
6415        if (subreqbyte >= 0) reqbyte = subreqbyte;        if (subreqchar >= 0) reqchar = subreqchar;
6416        }        }
6417    
6418      /* For a forward assertion, we take the reqbyte, if set. This can be      /* For a forward assertion, we take the reqchar, if set. This can be
6419      helpful if the pattern that follows the assertion doesn't set a different      helpful if the pattern that follows the assertion doesn't set a different
6420      char. For example, it's useful for /(?=abcde).+/. We can't set firstbyte      char. For example, it's useful for /(?=abcde).+/. We can't set firstchar
6421      for an assertion, however because it leads to incorrect effect for patterns      for an assertion, however because it leads to incorrect effect for patterns
6422      such as /(?=a)a.+/ when the "real" "a" would then become a reqbyte instead      such as /(?=a)a.+/ when the "real" "a" would then become a reqchar instead
6423      of a firstbyte. This is overcome by a scan at the end if there's no      of a firstchar. This is overcome by a scan at the end if there's no
6424      firstbyte, looking for an asserted first char. */      firstchar, looking for an asserted first char. */
6425    
6426      else if (bravalue == OP_ASSERT && subreqbyte >= 0) reqbyte = subreqbyte;      else if (bravalue == OP_ASSERT && subreqchar >= 0) reqchar = subreqchar;
6427      break;     /* End of processing '(' */      break;     /* End of processing '(' */
6428    
6429    
# Line 6101  for (;; ptr++) Line 6456  for (;; ptr++)
6456        /* For metasequences that actually match a character, we disable the        /* For metasequences that actually match a character, we disable the
6457        setting of a first character if it hasn't already been set. */        setting of a first character if it hasn't already been set. */
6458    
6459        if (firstbyte == REQ_UNSET && -c > ESC_b && -c < ESC_Z)        if (firstchar == REQ_UNSET && -c > ESC_b && -c < ESC_Z)
6460          firstbyte = REQ_NONE;          firstchar = REQ_NONE;
6461    
6462        /* Set values to reset to if this is followed by a zero repeat. */        /* Set values to reset to if this is followed by a zero repeat. */
6463    
6464        zerofirstbyte = firstbyte;        zerofirstchar = firstchar;
6465        zeroreqbyte = reqbyte;        zeroreqchar = reqchar;
6466    
6467        /* \g<name> or \g'name' is a subroutine call by name and \g<n> or \g'n'        /* \g<name> or \g'name' is a subroutine call by name and \g<n> or \g'n'
6468        is a subroutine call by number (Oniguruma syntax). In fact, the value        is a subroutine call by number (Oniguruma syntax). In fact, the value
# Line 6118  for (;; ptr++) Line 6473  for (;; ptr++)
6473    
6474        if (-c == ESC_g)        if (-c == ESC_g)
6475          {          {
6476          const uschar *p;          const pcre_uchar *p;
6477          save_hwm = cd->hwm;   /* Normally this is set when '(' is read */          save_hwm = cd->hwm;   /* Normally this is set when '(' is read */
6478          terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?          terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?
6479            CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;            CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;
# Line 6158  for (;; ptr++) Line 6513  for (;; ptr++)
6513          /* Test a signed number in angle brackets or quotes. */          /* Test a signed number in angle brackets or quotes. */
6514    
6515          p = ptr + 2;          p = ptr + 2;
6516          while ((digitab[*p] & ctype_digit) != 0) p++;          while (IS_DIGIT(*p)) p++;
6517          if (*p != terminator)          if (*p != terminator)
6518            {            {
6519            *errorcodeptr = ERR57;            *errorcodeptr = ERR57;
# Line 6186  for (;; ptr++) Line 6541  for (;; ptr++)
6541          goto NAMED_REF_OR_RECURSE;          goto NAMED_REF_OR_RECURSE;
6542          }          }
6543    
6544        /* Back references are handled specially; must disable firstbyte if        /* Back references are handled specially; must disable firstchar if
6545        not set to cope with cases like (?=(\w+))\1: which would otherwise set        not set to cope with cases like (?=(\w+))\1: which would otherwise set
6546        ':' later. */        ':' later. */
6547    
# Line 6196  for (;; ptr++) Line 6551  for (;; ptr++)
6551          recno = -c - ESC_REF;          recno = -c - ESC_REF;
6552    
6553          HANDLE_REFERENCE:    /* Come here from named backref handling */          HANDLE_REFERENCE:    /* Come here from named backref handling */
6554          if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;          if (firstchar == REQ_UNSET) firstchar = REQ_NONE;
6555          previous = code;          previous = code;
6556          *code++ = ((options & PCRE_CASELESS) != 0)? OP_REFI : OP_REF;          *code++ = ((options & PCRE_CASELESS) != 0)? OP_REFI : OP_REF;
6557          PUT2INC(code, 0, recno);          PUT2INC(code, 0, recno);
# Line 6258  for (;; ptr++) Line 6613  for (;; ptr++)
6613            }            }
6614          else          else
6615  #endif  #endif
6616            {          /* In non-UTF-8 mode, we turn \C into OP_ALLANY instead of OP_ANYBYTE
6617            so that it works in DFA mode and in lookbehinds. */
6618    
6619              {
6620            previous = (-c > ESC_b && -c < ESC_Z)? code : NULL;            previous = (-c > ESC_b && -c < ESC_Z)? code : NULL;
6621            *code++ = -c;            *code++ = (!utf8 && c == -ESC_C)? OP_ALLANY : -c;
6622            }            }
6623          }          }
6624        continue;        continue;
# Line 6272  for (;; ptr++) Line 6630  for (;; ptr++)
6630    
6631  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
6632      if (utf8 && c > 127)      if (utf8 && c > 127)
6633        mclength = _pcre_ord2utf8(c, mcbuffer);        mclength = PRIV(ord2utf8)(c, mcbuffer);
6634      else      else
6635  #endif  #endif
6636    
# Line 6316  for (;; ptr++) Line 6674  for (;; ptr++)
6674    
6675      /* Set the first and required bytes appropriately. If no previous first      /* Set the first and required bytes appropriately. If no previous first
6676      byte, set it from this character, but revert to none on a zero repeat.      byte, set it from this character, but revert to none on a zero repeat.
6677      Otherwise, leave the firstbyte value alone, and don't change it on a zero      Otherwise, leave the firstchar value alone, and don't change it on a zero
6678      repeat. */      repeat. */
6679    
6680      if (firstbyte == REQ_UNSET)      if (firstchar == REQ_UNSET)
6681        {        {
6682        zerofirstbyte = REQ_NONE;        zerofirstchar = REQ_NONE;
6683        zeroreqbyte = reqbyte;        zeroreqchar = reqchar;
6684    
6685        /* If the character is more than one byte long, we can set firstbyte        /* If the character is more than one byte long, we can set firstchar
6686        only if it is not to be matched caselessly. */        only if it is not to be matched caselessly. */
6687    
6688        if (mclength == 1 || req_caseopt == 0)        if (mclength == 1 || req_caseopt == 0)
6689          {          {
6690          firstbyte = mcbuffer[0] | req_caseopt;          firstchar = mcbuffer[0] | req_caseopt;
6691          if (mclength != 1) reqbyte = code[-1] | cd->req_varyopt;          if (mclength != 1) reqchar = code[-1] | cd->req_varyopt;
6692          }          }
6693        else firstbyte = reqbyte = REQ_NONE;        else firstchar = reqchar = REQ_NONE;
6694        }        }
6695    
6696      /* firstbyte was previously set; we can set reqbyte only if the length is      /* firstchar was previously set; we can set reqchar only if the length is
6697      1 or the matching is caseful. */      1 or the matching is caseful. */
6698    
6699      else      else
6700        {        {
6701        zerofirstbyte = firstbyte;        zerofirstchar = firstchar;
6702        zeroreqbyte = reqbyte;        zeroreqchar = reqchar;
6703        if (mclength == 1 || req_caseopt == 0)        if (mclength == 1 || req_caseopt == 0)
6704          reqbyte = code[-1] | req_caseopt | cd->req_varyopt;          reqchar = code[-1] | req_caseopt | cd->req_varyopt;
6705        }        }
6706    
6707      break;            /* End of literal character handling */      break;            /* End of literal character handling */
# Line 6383  Arguments: Line 6741  Arguments:
6741    reset_bracount TRUE to reset the count for each branch    reset_bracount TRUE to reset the count for each branch
6742    skipbytes      skip this many bytes at start (for brackets and OP_COND)    skipbytes      skip this many bytes at start (for brackets and OP_COND)
6743    cond_depth     depth of nesting for conditional subpatterns    cond_depth     depth of nesting for conditional subpatterns
6744    firstbyteptr   place to put the first required character, or a negative number    firstcharptr   place to put the first required character, or a negative number
6745    reqbyteptr     place to put the last required character, or a negative number    reqcharptr     place to put the last required character, or a negative number
6746    bcptr          pointer to the chain of currently open branches    bcptr          pointer to the chain of currently open branches
6747    cd             points to the data block with tables pointers etc.    cd             points to the data block with tables pointers etc.
6748    lengthptr      NULL during the real compile phase    lengthptr      NULL during the real compile phase
# Line 6394  Returns:         TRUE on success Line 6752  Returns:         TRUE on success
6752  */  */
6753    
6754  static BOOL  static BOOL
6755  compile_regex(int options, uschar **codeptr, const uschar **ptrptr,  compile_regex(int options, pcre_uchar **codeptr, const pcre_uchar **ptrptr,
6756    int *errorcodeptr, BOOL lookbehind, BOOL reset_bracount, int skipbytes,    int *errorcodeptr, BOOL lookbehind, BOOL reset_bracount, int skipbytes,
6757    int cond_depth, int *firstbyteptr, int *reqbyteptr, branch_chain *bcptr,    int cond_depth, pcre_int32 *firstcharptr, pcre_int32 *reqcharptr,
6758    compile_data *cd, int *lengthptr)    branch_chain *bcptr, compile_data *cd, int *lengthptr)
6759  {  {
6760  const uschar *ptr = *ptrptr;  const pcre_uchar *ptr = *ptrptr;
6761  uschar *code = *codeptr;  pcre_uchar *code = *codeptr;
6762  uschar *last_branch = code;  pcre_uchar *last_branch = code;
6763  uschar *start_bracket = code;  pcre_uchar *start_bracket = code;
6764  uschar *reverse_count = NULL;  pcre_uchar *reverse_count = NULL;
6765  open_capitem capitem;  open_capitem capitem;
6766  int capnumber = 0;  int capnumber = 0;
6767  int firstbyte, reqbyte;  pcre_int32 firstchar, reqchar;
6768  int branchfirstbyte, branchreqbyte;  pcre_int32 branchfirstchar, branchreqchar;
6769  int length;  int length;
6770  int orig_bracount;  int orig_bracount;
6771  int max_bracount;  int max_bracount;
# Line 6416  branch_chain bc; Line 6774  branch_chain bc;
6774  bc.outer = bcptr;  bc.outer = bcptr;
6775  bc.current_branch = code;  bc.current_branch = code;
6776    
6777  firstbyte = reqbyte = REQ_UNSET;  firstchar = reqchar = REQ_UNSET;
6778    
6779  /* Accumulate the length for use in the pre-compile phase. Start with the  /* Accumulate the length for use in the pre-compile phase. Start with the
6780  length of the BRA and KET and any extra bytes that are required at the  length of the BRA and KET and any extra bytes that are required at the
# Line 6475  for (;;) Line 6833  for (;;)
6833    /* Now compile the branch; in the pre-compile phase its length gets added    /* Now compile the branch; in the pre-compile phase its length gets added
6834    into the length. */    into the length. */
6835    
6836    if (!compile_branch(&options, &code, &ptr, errorcodeptr, &branchfirstbyte,    if (!compile_branch(&options, &code, &ptr, errorcodeptr, &branchfirstchar,
6837          &branchreqbyte, &bc, cond_depth, cd,          &branchreqchar, &bc, cond_depth, cd,
6838          (lengthptr == NULL)? NULL : &length))          (lengthptr == NULL)? NULL : &length))
6839      {      {
6840      *ptrptr = ptr;      *ptrptr = ptr;
# Line 6492  for (;;) Line 6850  for (;;)
6850    
6851    if (lengthptr == NULL)    if (lengthptr == NULL)
6852      {      {
6853      /* If this is the first branch, the firstbyte and reqbyte values for the      /* If this is the first branch, the firstchar and reqchar values for the
6854      branch become the values for the regex. */      branch become the values for the regex. */
6855    
6856      if (*last_branch != OP_ALT)      if (*last_branch != OP_ALT)
6857        {        {
6858        firstbyte = branchfirstbyte;        firstchar = branchfirstchar;
6859        reqbyte = branchreqbyte;        reqchar = branchreqchar;
6860        }        }
6861    
6862      /* If this is not the first branch, the first char and reqbyte have to      /* If this is not the first branch, the first char and reqchar have to
6863      match the values from all the previous branches, except that if the      match the values from all the previous branches, except that if the
6864      previous value for reqbyte didn't have REQ_VARY set, it can still match,      previous value for reqchar didn't have REQ_VARY set, it can still match,
6865      and we set REQ_VARY for the regex. */      and we set REQ_VARY for the regex. */
6866    
6867      else      else
6868        {        {
6869        /* If we previously had a firstbyte, but it doesn't match the new branch,        /* If we previously had a firstchar, but it doesn't match the new branch,
6870        we have to abandon the firstbyte for the regex, but if there was        we have to abandon the firstchar for the regex, but if there was
6871        previously no reqbyte, it takes on the value of the old firstbyte. */        previously no reqchar, it takes on the value of the old firstchar. */
6872    
6873        if (firstbyte >= 0 && firstbyte != branchfirstbyte)        if (firstchar >= 0 && firstchar != branchfirstchar)
6874          {          {
6875          if (reqbyte < 0) reqbyte = firstbyte;          if (reqchar < 0) reqchar = firstchar;
6876          firstbyte = REQ_NONE;          firstchar = REQ_NONE;
6877          }          }
6878    
6879        /* If we (now or from before) have no firstbyte, a firstbyte from the        /* If we (now or from before) have no firstchar, a firstchar from the
6880        branch becomes a reqbyte if there isn't a branch reqbyte. */        branch becomes a reqchar if there isn't a branch reqchar. */
6881    
6882        if (firstbyte < 0 && branchfirstbyte >= 0 && branchreqbyte < 0)        if (firstchar < 0 && branchfirstchar >= 0 && branchreqchar < 0)
6883            branchreqbyte = branchfirstbyte;            branchreqchar = branchfirstchar;
6884    
6885        /* Now ensure that the reqbytes match */        /* Now ensure that the reqchars match */
6886    
6887        if ((reqbyte & ~REQ_VARY) != (branchreqbyte & ~REQ_VARY))        if ((reqchar & ~REQ_VARY) != (branchreqchar & ~REQ_VARY))
6888          reqbyte = REQ_NONE;          reqchar = REQ_NONE;
6889        else reqbyte |= branchreqbyte;   /* To "or" REQ_VARY */        else reqchar |= branchreqchar;   /* To "or" REQ_VARY */
6890        }        }
6891    
6892      /* If lookbehind, check that this branch matches a fixed-length string, and      /* If lookbehind, check that this branch matches a fixed-length string, and
# Line 6552  for (;;) Line 6910  for (;;)
6910          }          }
6911        else if (fixed_length < 0)        else if (fixed_length < 0)
6912          {          {
6913          *errorcodeptr = (fixed_length == -2)? ERR36 : ERR25;          *errorcodeptr = (fixed_length == -2)? ERR36 :
6914                            (fixed_length == -4)? ERR70: ERR25;
6915          *ptrptr = ptr;          *ptrptr = ptr;
6916          return FALSE;          return FALSE;
6917          }          }
# Line 6597  for (;;) Line 6956  for (;;)
6956        if (cd->open_caps->flag)        if (cd->open_caps->flag)
6957          {          {
6958          memmove(start_bracket + 1 + LINK_SIZE, start_bracket,          memmove(start_bracket + 1 + LINK_SIZE, start_bracket,
6959            code - start_bracket);            IN_UCHARS(code - start_bracket));
6960          *start_bracket = OP_ONCE;          *start_bracket = OP_ONCE;
6961          code += 1 + LINK_SIZE;          code += 1 + LINK_SIZE;
6962          PUT(start_bracket, 1, (int)(code - start_bracket));          PUT(start_bracket, 1, (int)(code - start_bracket));
# Line 6617  for (;;) Line 6976  for (;;)
6976    
6977      *codeptr = code;      *codeptr = code;
6978      *ptrptr = ptr;      *ptrptr = ptr;
6979      *firstbyteptr = firstbyte;      *firstcharptr = firstchar;
6980      *reqbyteptr = reqbyte;      *reqcharptr = reqchar;
6981      if (lengthptr != NULL)      if (lengthptr != NULL)
6982        {        {
6983        if (OFLOW_MAX - *lengthptr < length)        if (OFLOW_MAX - *lengthptr < length)
# Line 6699  Returns:     TRUE or FALSE Line 7058  Returns:     TRUE or FALSE
7058  */  */
7059    
7060  static BOOL  static BOOL
7061  is_anchored(register const uschar *code, unsigned int bracket_map,  is_anchored(register const pcre_uchar *code, unsigned int bracket_map,
7062    unsigned int backref_map)    unsigned int backref_map)
7063  {  {
7064  do {  do {
7065     const uschar *scode = first_significant_code(code + _pcre_OP_lengths[*code],     const pcre_uchar *scode = first_significant_code(
7066       FALSE);       code + PRIV(OP_lengths)[*code], FALSE);
7067     register int op = *scode;     register int op = *scode;
7068    
7069     /* Non-capturing brackets */     /* Non-capturing brackets */
# Line 6727  do { Line 7086  do {
7086    
7087     /* Other brackets */     /* Other brackets */
7088    
7089     else if (op == OP_ASSERT || op == OP_ONCE || op == OP_COND)     else if (op == OP_ASSERT || op == OP_ONCE || op == OP_ONCE_NC ||
7090                op == OP_COND)
7091       {       {
7092       if (!is_anchored(scode, bracket_map, backref_map)) return FALSE;       if (!is_anchored(scode, bracket_map, backref_map)) return FALSE;
7093       }       }
# Line 6775  Returns:         TRUE or FALSE Line 7135  Returns:         TRUE or FALSE
7135  */  */
7136    
7137  static BOOL  static BOOL
7138  is_startline(const uschar *code, unsigned int bracket_map,  is_startline(const pcre_uchar *code, unsigned int bracket_map,
7139    unsigned int backref_map)    unsigned int backref_map)
7140  {  {
7141  do {  do {
7142     const uschar *scode = first_significant_code(code + _pcre_OP_lengths[*code],     const pcre_uchar *scode = first_significant_code(
7143       FALSE);       code + PRIV(OP_lengths)[*code], FALSE);
7144     register int op = *scode;     register int op = *scode;
7145    
7146     /* If we are at the start of a conditional assertion group, *both* the     /* If we are at the start of a conditional assertion group, *both* the
# Line 6791  do { Line 7151  do {
7151     if (op == OP_COND)     if (op == OP_COND)
7152       {       {
7153       scode += 1 + LINK_SIZE;       scode += 1 + LINK_SIZE;
7154       if (*scode == OP_CALLOUT) scode += _pcre_OP_lengths[OP_CALLOUT];       if (*scode == OP_CALLOUT) scode += PRIV(OP_lengths)[OP_CALLOUT];
7155       switch (*scode)       switch (*scode)
7156         {         {
7157         case OP_CREF:         case OP_CREF:
# Line 6831  do { Line 7191  do {
7191    
7192     /* Other brackets */     /* Other brackets */
7193    
7194     else if (op == OP_ASSERT || op == OP_ONCE)     else if (op == OP_ASSERT || op == OP_ONCE || op == OP_ONCE_NC)
7195       {       {
7196       if (!is_startline(scode, bracket_map, backref_map)) return FALSE;       if (!is_startline(scode, bracket_map, backref_map)) return FALSE;
7197       }       }
# Line 6878  Returns:     -1 or the fixed first char Line 7238  Returns:     -1 or the fixed first char
7238  */  */
7239    
7240  static int  static int
7241  find_firstassertedchar(const uschar *code, BOOL inassert)  find_firstassertedchar(const pcre_uchar *code, BOOL inassert)
7242  {  {
7243  register int c = -1;  register int c = -1;
7244  do {  do {
7245     int d;     int d;
7246     int xl = (*code == OP_CBRA || *code == OP_SCBRA ||     int xl = (*code == OP_CBRA || *code == OP_SCBRA ||
7247               *code == OP_CBRAPOS || *code == OP_SCBRAPOS)? 2:0;               *code == OP_CBRAPOS || *code == OP_SCBRAPOS)? IMM2_SIZE:0;
7248     const uschar *scode = first_significant_code(code + 1+LINK_SIZE + xl, TRUE);     const pcre_uchar *scode = first_significant_code(code + 1+LINK_SIZE + xl,
7249         TRUE);
7250     register int op = *scode;     register int op = *scode;
7251    
7252     switch(op)     switch(op)
# Line 6901  do { Line 7262  do {
7262       case OP_SCBRAPOS:       case OP_SCBRAPOS:
7263       case OP_ASSERT:       case OP_ASSERT:
7264       case OP_ONCE:       case OP_ONCE:
7265         case OP_ONCE_NC:
7266       case OP_COND:       case OP_COND:
7267       if ((d = find_firstassertedchar(scode, op == OP_ASSERT)) < 0)       if ((d = find_firstassertedchar(scode, op == OP_ASSERT)) < 0)
7268         return -1;         return -1;
# Line 6908  do { Line 7270  do {
7270       break;       break;
7271    
7272       case OP_EXACT:       case OP_EXACT:
7273       scode += 2;       scode += IMM2_SIZE;
7274       /* Fall through */       /* Fall through */
7275    
7276       case OP_CHAR:       case OP_CHAR:
# Line 6921  do { Line 7283  do {
7283       break;       break;
7284    
7285       case OP_EXACTI:       case OP_EXACTI:
7286       scode += 2;       scode += IMM2_SIZE;
7287       /* Fall through */       /* Fall through */
7288    
7289       case OP_CHARI:       case OP_CHARI:
# Line 6964  Returns:        pointer to compiled data Line 7326  Returns:        pointer to compiled data
7326                  with errorptr and erroroffset set                  with errorptr and erroroffset set
7327  */  */
7328    
7329    #ifdef COMPILE_PCRE8
7330  PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION  PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION
7331  pcre_compile(const char *pattern, int options, const char **errorptr,  pcre_compile(const char *pattern, int options, const char **errorptr,
7332    int *erroroffset, const unsigned char *tables)    int *erroroffset, const unsigned char *tables)
7333    #else
7334    PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION
7335    pcre16_compile(PCRE_SPTR16 pattern, int options, const char **errorptr,
7336      int *erroroffset, const unsigned char *tables)
7337    #endif
7338  {  {
7339    #ifdef COMPILE_PCRE8
7340  return pcre_compile2(pattern, options, NULL, errorptr, erroroffset, tables);  return pcre_compile2(pattern, options, NULL, errorptr, erroroffset, tables);
7341    #else
7342    return pcre16_compile2(pattern, options, NULL, errorptr, erroroffset, tables);
7343    #endif
7344  }  }
7345    
7346    
7347    #ifdef COMPILE_PCRE8
7348  PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION  PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION
7349  pcre_compile2(const char *pattern, int options, int *errorcodeptr,  pcre_compile2(const char *pattern, int options, int *errorcodeptr,
7350    const char **errorptr, int *erroroffset, const unsigned char *tables)    const char **errorptr, int *erroroffset, const unsigned char *tables)
7351    #else
7352    PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION
7353    pcre16_compile2(PCRE_SPTR16 pattern, int options, int *errorcodeptr,
7354      const char **errorptr, int *erroroffset, const unsigned char *tables)
7355    #endif
7356  {  {
7357  real_pcre *re;  real_pcre *re;
7358  int length = 1;  /* For final END opcode */  int length = 1;  /* For final END opcode */
7359  int firstbyte, reqbyte, newline;  pcre_int32 firstchar, reqchar;
7360    int newline;
7361  int errorcode = 0;  int errorcode = 0;
7362  int skipatstart = 0;  int skipatstart = 0;
7363  BOOL utf8;  BOOL utf8;
7364  size_t size;  size_t size;
7365  uschar *code;  pcre_uchar *code;
7366  const uschar *codestart;  const pcre_uchar *codestart;
7367  const uschar *ptr;  const pcre_uchar *ptr;
7368  compile_data compile_block;  compile_data compile_block;
7369  compile_data *cd = &compile_block;  compile_data *cd = &compile_block;
7370    
# Line 6995  as soon as possible, so that a fairly la Line 7374  as soon as possible, so that a fairly la
7374  this purpose. The same space is used in the second phase for remembering where  this purpose. The same space is used in the second phase for remembering where
7375  to fill in forward references to subpatterns. */  to fill in forward references to subpatterns. */
7376    
7377  uschar cworkspace[COMPILE_WORK_SIZE];  pcre_uchar cworkspace[COMPILE_WORK_SIZE];
7378    
7379  /* Set this early so that early errors get offset 0. */  /* Set this early so that early errors get offset 0. */
7380    
7381  ptr = (const uschar *)pattern;  ptr = (const pcre_uchar *)pattern;
7382    
7383  /* We can't pass back an error message if errorptr is NULL; I guess the best we  /* We can't pass back an error message if errorptr is NULL; I guess the best we
7384  can do is just return NULL, but we can set a code value if there is a code  can do is just return NULL, but we can set a code value if there is a code
# Line 7026  if (erroroffset == NULL) Line 7405  if (erroroffset == NULL)
7405    
7406  /* Set up pointers to the individual character tables */  /* Set up pointers to the individual character tables */
7407    
7408  if (tables == NULL) tables = _pcre_default_tables;  if (tables == NULL) tables = PRIV(default_tables);
7409  cd->lcc = tables + lcc_offset;  cd->lcc = tables + lcc_offset;
7410  cd->fcc = tables + fcc_offset;  cd->fcc = tables + fcc_offset;
7411  cd->cbits = tables + cbits_offset;  cd->cbits = tables + cbits_offset;
# Line 7049  while (ptr[skipatstart] == CHAR_LEFT_PAR Line 7428  while (ptr[skipatstart] == CHAR_LEFT_PAR
7428    int newnl = 0;    int newnl = 0;
7429    int newbsr = 0;    int newbsr = 0;
7430    
7431    if (strncmp((char *)(ptr+skipatstart+2), STRING_UTF8_RIGHTPAR, 5) == 0)    if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF8_RIGHTPAR, 5) == 0)
7432      { skipatstart += 7; options |= PCRE_UTF8; continue; }      { skipatstart += 7; options |= PCRE_UTF8; continue; }
7433    else if (strncmp((char *)(ptr+skipatstart+2), STRING_UCP_RIGHTPAR, 4) == 0)    else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UCP_RIGHTPAR, 4) == 0)
7434      { skipatstart += 6; options |= PCRE_UCP; continue; }      { skipatstart += 6; options |= PCRE_UCP; continue; }
7435    else if (strncmp((char *)(ptr+skipatstart+2), STRING_NO_START_OPT_RIGHTPAR, 13) == 0)    else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_NO_START_OPT_RIGHTPAR, 13) == 0)
7436      { skipatstart += 15; options |= PCRE_NO_START_OPTIMIZE; continue; }      { skipatstart += 15; options |= PCRE_NO_START_OPTIMIZE; continue; }
7437    
7438    if (strncmp((char *)(ptr+skipatstart+2), STRING_CR_RIGHTPAR, 3) == 0)    if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_CR_RIGHTPAR, 3) == 0)
7439      { skipatstart += 5; newnl = PCRE_NEWLINE_CR; }      { skipatstart += 5; newnl = PCRE_NEWLINE_CR; }
7440    else if (strncmp((char *)(ptr+skipatstart+2), STRING_LF_RIGHTPAR, 3)  == 0)    else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_LF_RIGHTPAR, 3)  == 0)
7441      { skipatstart += 5; newnl = PCRE_NEWLINE_LF; }      { skipatstart += 5; newnl = PCRE_NEWLINE_LF; }
7442    else if (strncmp((char *)(ptr+skipatstart+2), STRING_CRLF_RIGHTPAR, 5)  == 0)    else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_CRLF_RIGHTPAR, 5)  == 0)
7443      { skipatstart += 7; newnl = PCRE_NEWLINE_CR + PCRE_NEWLINE_LF; }      { skipatstart += 7; newnl = PCRE_NEWLINE_CR + PCRE_NEWLINE_LF; }
7444    else if (strncmp((char *)(ptr+skipatstart+2), STRING_ANY_RIGHTPAR, 4) == 0)    else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_ANY_RIGHTPAR, 4) == 0)
7445      { skipatstart += 6; newnl = PCRE_NEWLINE_ANY; }      { skipatstart += 6; newnl = PCRE_NEWLINE_ANY; }
7446    else if (strncmp((char *)(ptr+skipatstart+2), STRING_ANYCRLF_RIGHTPAR, 8) == 0)    else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_ANYCRLF_RIGHTPAR, 8) == 0)
7447      { skipatstart += 10; newnl = PCRE_NEWLINE_ANYCRLF; }      { skipatstart += 10; newnl = PCRE_NEWLINE_ANYCRLF; }
7448    
7449    else if (strncmp((char *)(ptr+skipatstart+2), STRING_BSR_ANYCRLF_RIGHTPAR, 12) == 0)    else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_BSR_ANYCRLF_RIGHTPAR, 12) == 0)
7450      { skipatstart += 14; newbsr = PCRE_BSR_ANYCRLF; }      { skipatstart += 14; newbsr = PCRE_BSR_ANYCRLF; }
7451    else if (strncmp((char *)(ptr+skipatstart+2), STRING_BSR_UNICODE_RIGHTPAR, 12) == 0)    else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_BSR_UNICODE_RIGHTPAR, 12) == 0)
7452      { skipatstart += 14; newbsr = PCRE_BSR_UNICODE; }      { skipatstart += 14; newbsr = PCRE_BSR_UNICODE; }
7453    
7454    if (newnl != 0)    if (newnl != 0)
# Line 7082  while (ptr[skipatstart] == CHAR_LEFT_PAR Line 7461  while (ptr[skipatstart] == CHAR_LEFT_PAR
7461  utf8 = (options & PCRE_UTF8) != 0;  utf8 = (options & PCRE_UTF8) != 0;
7462    
7463  /* Can't support UTF8 unless PCRE has been compiled to include the code. The  /* Can't support UTF8 unless PCRE has been compiled to include the code. The
7464  return of an error code from _pcre_valid_utf8() is a new feature, introduced in  return of an error code from PRIV(valid_utf8)() is a new feature, introduced in
7465  release 8.13. It is passed back from pcre_[dfa_]exec(), but at the moment is  release 8.13. It is passed back from pcre_[dfa_]exec(), but at the moment is
7466  not used here. */  not used here. */
7467    
7468  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
7469  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 &&  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 &&
7470       (errorcode = _pcre_valid_utf8((USPTR)pattern, -1, erroroffset)) != 0)       (errorcode = PRIV(valid_utf8)((PCRE_PUCHAR)pattern, -1, erroroffset)) != 0)
7471    {    {
7472    errorcode = ERR44;    errorcode = ERR44;
7473    goto PCRE_EARLY_ERROR_RETURN2;    goto PCRE_EARLY_ERROR_RETURN2;
# Line 7170  cd->backref_map = 0; Line 7549  cd->backref_map = 0;
7549  /* Reflect pattern for debugging output */  /* Reflect pattern for debugging output */
7550    
7551  DPRINTF(("------------------------------------------------------------------\n"));  DPRINTF(("------------------------------------------------------------------\n"));
7552  DPRINTF(("%s\n", pattern));  #ifdef PCRE_DEBUG
7553    print_puchar(stdout, (PCRE_PUCHAR)pattern);
7554    #endif
7555    DPRINTF(("\n"));
7556    
7557  /* Pretend to compile the pattern while actually just accumulating the length  /* Pretend to compile the pattern while actually just accumulating the length
7558  of memory required. This behaviour is triggered by passing a non-NULL final  of memory required. This behaviour is triggered by passing a non-NULL final
# Line 7186  cd->name_table = NULL; Line 7568  cd->name_table = NULL;
7568  cd->start_workspace = cworkspace;  cd->start_workspace = cworkspace;
7569  cd->start_code = cworkspace;  cd->start_code = cworkspace;
7570  cd->hwm = cworkspace;  cd->hwm = cworkspace;
7571  cd->start_pattern = (const uschar *)pattern;  cd->start_pattern = (const pcre_uchar *)pattern;
7572  cd->end_pattern = (const uschar *)(pattern + strlen(pattern));  cd->end_pattern = (const pcre_uchar *)(pattern + STRLEN_UC((const pcre_uchar *)pattern));
7573  cd->req_varyopt = 0;  cd->req_varyopt = 0;
7574  cd->external_options = options;  cd->external_options = options;
7575  cd->external_flags = 0;  cd->external_flags = 0;
# Line 7203  ptr += skipatstart; Line 7585  ptr += skipatstart;
7585  code = cworkspace;  code = cworkspace;
7586  *code = OP_BRA;  *code = OP_BRA;
7587  (void)compile_regex(cd->external_options, &code, &ptr, &errorcode, FALSE,  (void)compile_regex(cd->external_options, &code, &ptr, &errorcode, FALSE,
7588    FALSE, 0, 0, &firstbyte, &reqbyte, NULL, cd, &length);    FALSE, 0, 0, &firstchar, &reqchar, NULL, cd, &length);
7589  if (errorcode != 0) goto PCRE_EARLY_ERROR_RETURN;  if (errorcode != 0) goto PCRE_EARLY_ERROR_RETURN;
7590    
7591  DPRINTF(("end pre-compile: length=%d workspace=%d\n", length,  DPRINTF(("end pre-compile: length=%d workspace=%d\n", length,
7592    cd->hwm - cworkspace));    (int)(cd->hwm - cworkspace)));
7593    
7594  if (length > MAX_PATTERN_SIZE)  if (length > MAX_PATTERN_SIZE)
7595    {    {
# Line 7220  externally provided function. Integer ov Line 7602  externally provided function. Integer ov
7602  because nowadays we limit the maximum value of cd->names_found and  because nowadays we limit the maximum value of cd->names_found and
7603  cd->name_entry_size. */  cd->name_entry_size. */
7604    
7605  size = length + sizeof(real_pcre) + cd->names_found * (cd->name_entry_size + 3);  size = sizeof(real_pcre) + (length + cd->names_found * (cd->name_entry_size + 3)) * sizeof(pcre_uchar);
7606  re = (real_pcre *)(pcre_malloc)(size);  re = (real_pcre *)(pcre_malloc)(size);
7607    
7608  if (re == NULL)  if (re == NULL)
# Line 7240  re->size = (int)size; Line 7622  re->size = (int)size;
7622  re->options = cd->external_options;  re->options = cd->external_options;
7623  re->flags = cd->external_flags;  re->flags = cd->external_flags;
7624  re->dummy1 = 0;  re->dummy1 = 0;
7625  re->first_byte = 0;  re->first_char = 0;
7626  re->req_byte = 0;  re->req_char = 0;
7627  re->name_table_offset = sizeof(real_pcre);  re->name_table_offset = sizeof(real_pcre) / sizeof(pcre_uchar);
7628  re->name_entry_size = cd->name_entry_size;  re->name_entry_size = cd->name_entry_size;
7629  re->name_count = cd->names_found;  re->name_count = cd->names_found;
7630  re->ref_count = 0;  re->ref_count = 0;
7631  re->tables = (tables == _pcre_default_tables)? NULL : tables;  re->tables = (tables == PRIV(default_tables))? NULL : tables;
7632  re->nullpad = NULL;  re->nullpad = NULL;
7633    
7634  /* The starting points of the name/number translation table and of the code are  /* The starting points of the name/number translation table and of the code are
# Line 7260  cd->final_bracount = cd->bracount;  /* S Line 7642  cd->final_bracount = cd->bracount;  /* S
7642  cd->assert_depth = 0;  cd->assert_depth = 0;
7643  cd->bracount = 0;  cd->bracount = 0;
7644  cd->names_found = 0;  cd->names_found = 0;
7645  cd->name_table = (uschar *)re + re->name_table_offset;  cd->name_table = (pcre_uchar *)re + re->name_table_offset;
7646  codestart = cd->name_table + re->name_entry_size * re->name_count;  codestart = cd->name_table + re->name_entry_size * re->name_count;
7647  cd->start_code = codestart;  cd->start_code = codestart;
7648  cd->hwm = cworkspace;  cd->hwm = cworkspace;
# Line 7273  cd->open_caps = NULL; Line 7655  cd->open_caps = NULL;
7655  error, errorcode will be set non-zero, so we don't need to look at the result  error, errorcode will be set non-zero, so we don't need to look at the result
7656  of the function here. */  of the function here. */
7657    
7658  ptr = (const uschar *)pattern + skipatstart;  ptr = (const pcre_uchar *)pattern + skipatstart;
7659  code = (uschar *)codestart;  code = (pcre_uchar *)codestart;
7660  *code = OP_BRA;  *code = OP_BRA;
7661  (void)compile_regex(re->options, &code, &ptr, &errorcode, FALSE, FALSE, 0, 0,  (void)compile_regex(re->options, &code, &ptr, &errorcode, FALSE, FALSE, 0, 0,
7662    &firstbyte, &reqbyte, NULL, cd, NULL);    &firstchar, &reqchar, NULL, cd, NULL);
7663  re->top_bracket = cd->bracount;  re->top_bracket = cd->bracount;
7664  re->top_backref = cd->top_backref;  re->top_backref = cd->top_backref;
7665  re->flags = cd->external_flags;  re->flags = cd->external_flags;
7666    
7667  if (cd->had_accept) reqbyte = REQ_NONE;   /* Must disable after (*ACCEPT) */  if (cd->had_accept) reqchar = REQ_NONE;   /* Must disable after (*ACCEPT) */
7668    
7669  /* If not reached end of pattern on success, there's an excess bracket. */  /* If not reached end of pattern on success, there's an excess bracket. */
7670    
# Line 7302  if (code - codestart > length) errorcode Line 7684  if (code - codestart > length) errorcode
7684  while (errorcode == 0 && cd->hwm > cworkspace)  while (errorcode == 0 && cd->hwm > cworkspace)
7685    {    {
7686    int offset, recno;    int offset, recno;
7687    const uschar *groupptr;    const pcre_uchar *groupptr;
7688    cd->hwm -= LINK_SIZE;    cd->hwm -= LINK_SIZE;
7689    offset = GET(cd->hwm, 0);    offset = GET(cd->hwm, 0);
7690    recno = GET(codestart, offset);    recno = GET(codestart, offset);
7691    groupptr = _pcre_find_bracket(codestart, utf8, recno);    groupptr = PRIV(find_bracket)(codestart, utf8, recno);
7692    if (groupptr == NULL) errorcode = ERR53;    if (groupptr == NULL) errorcode = ERR53;
7693      else PUT(((uschar *)codestart), offset, (int)(groupptr - codestart));      else PUT(((pcre_uchar *)codestart), offset, (int)(groupptr - codestart));
7694    }    }
7695    
7696  /* Give an error if there's back reference to a non-existent capturing  /* Give an error if there's back reference to a non-existent capturing
# Line 7326  length, and set their lengths. */ Line 7708  length, and set their lengths. */
7708    
7709  if (cd->check_lookbehind)  if (cd->check_lookbehind)
7710    {    {
7711    uschar *cc = (uschar *)codestart;    pcre_uchar *cc = (pcre_uchar *)codestart;
7712    
7713    /* Loop, searching for OP_REVERSE items, and process those that do not have    /* Loop, searching for OP_REVERSE items, and process those that do not have
7714    their length set. (Actually, it will also re-process any that have a length    their length set. (Actually, it will also re-process any that have a length
7715    of zero, but that is a pathological case, and it does no harm.) When we find    of zero, but that is a pathological case, and it does no harm.) When we find
7716    one, we temporarily terminate the branch it is in while we scan it. */    one, we temporarily terminate the branch it is in while we scan it. */
7717    
7718    for (cc = (uschar *)_pcre_find_bracket(codestart, utf8, -1);    for (cc = (pcre_uchar *)PRIV(find_bracket)(codestart, utf8, -1);
7719         cc != NULL;         cc != NULL;
7720         cc = (uschar *)_pcre_find_bracket(cc, utf8, -1))         cc = (pcre_uchar *)PRIV(find_bracket)(cc, utf8, -1))
7721      {      {
7722      if (GET(cc, 1) == 0)      if (GET(cc, 1) == 0)
7723        {        {
7724        int fixed_length;        int fixed_length;
7725        uschar *be = cc - 1 - LINK_SIZE + GET(cc, -LINK_SIZE);        pcre_uchar *be = cc - 1 - LINK_SIZE + GET(cc, -LINK_SIZE);
7726        int end_op = *be;        int end_op = *be;
7727        *be = OP_END;        *be = OP_END;
7728        fixed_length = find_fixedlength(cc, (re->options & PCRE_UTF8) != 0, TRUE,        fixed_length = find_fixedlength(cc, (re->options & PCRE_UTF8) != 0, TRUE,
# Line 7349  if (cd->check_lookbehind) Line 7731  if (cd->check_lookbehind)
7731        DPRINTF(("fixed length = %d\n", fixed_length));        DPRINTF(("fixed length = %d\n", fixed_length));
7732        if (fixed_length < 0)        if (fixed_length < 0)
7733          {          {
7734          errorcode = (fixed_length == -2)? ERR36 : ERR25;          errorcode = (fixed_length == -2)? ERR36 :
7735                        (fixed_length == -4)? ERR70 : ERR25;
7736          break;          break;
7737          }          }
7738        PUT(cc, 1, fixed_length);        PUT(cc, 1, fixed_length);
# Line 7364  if (errorcode != 0) Line 7747  if (errorcode != 0)
7747    {    {
7748    (pcre_free)(re);    (pcre_free)(re);
7749    PCRE_EARLY_ERROR_RETURN:    PCRE_EARLY_ERROR_RETURN:
7750    *erroroffset = (int)(ptr - (const uschar *)pattern);    *erroroffset = (int)(ptr - (const pcre_uchar *)pattern);
7751    PCRE_EARLY_ERROR_RETURN2:    PCRE_EARLY_ERROR_RETURN2:
7752    *errorptr = find_error_text(errorcode);    *errorptr = find_error_text(errorcode);
7753    if (errorcodeptr != NULL) *errorcodeptr = errorcode;    if (errorcodeptr != NULL) *errorcodeptr = errorcode;
# Line 7387  if ((re->options & PCRE_ANCHORED) == 0) Line 7770  if ((re->options & PCRE_ANCHORED) == 0)
7770      re->options |= PCRE_ANCHORED;      re->options |= PCRE_ANCHORED;
7771    else    else
7772      {      {
7773      if (firstbyte < 0)      if (firstchar < 0)
7774        firstbyte = find_firstassertedchar(codestart, FALSE);        firstchar = find_firstassertedchar(codestart, FALSE);
7775      if (firstbyte >= 0)   /* Remove caseless flag for non-caseable chars */      if (firstchar >= 0)   /* Remove caseless flag for non-caseable chars */
7776        {        {
7777        int ch = firstbyte & 255;  #ifdef COMPILE_PCRE8
7778        re->first_byte = ((firstbyte & REQ_CASELESS) != 0 &&        re->first_char = firstchar & 0xff;
7779           cd->fcc[ch] == ch)? ch : firstbyte;  #else
7780    #ifdef COMPILE_PCRE16
7781          re->first_char = firstchar & 0xffff;
7782    #endif
7783    #endif
7784          if ((firstchar & REQ_CASELESS) != 0 && MAX_255(re->first_char)
7785            && cd->fcc[re->first_char] != re->first_char)
7786            re->flags |= PCRE_FCH_CASELESS;
7787    
7788        re->flags |= PCRE_FIRSTSET;        re->flags |= PCRE_FIRSTSET;
7789        }        }
7790      else if (is_startline(codestart, 0, cd->backref_map))      else if (is_startline(codestart, 0, cd->backref_map))
# Line 7405  if ((re->options & PCRE_ANCHORED) == 0) Line 7796  if ((re->options & PCRE_ANCHORED) == 0)
7796  variable length item in the regex. Remove the caseless flag for non-caseable  variable length item in the regex. Remove the caseless flag for non-caseable
7797  bytes. */  bytes. */
7798    
7799  if (reqbyte >= 0 &&  if (reqchar >= 0 &&
7800       ((re->options & PCRE_ANCHORED) == 0 || (reqbyte & REQ_VARY) != 0))       ((re->options & PCRE_ANCHORED) == 0 || (reqchar & REQ_VARY) != 0))
7801    {    {
7802    int ch = reqbyte & 255;  #ifdef COMPILE_PCRE8
7803    re->req_byte = ((reqbyte & REQ_CASELESS) != 0 &&    re->req_char = reqchar & 0xff;
7804      cd->fcc[ch] == ch)? (reqbyte & ~REQ_CASELESS) : reqbyte;  #else
7805    #ifdef COMPILE_PCRE16
7806      re->req_char = reqchar & 0xffff;
7807    #endif
7808    #endif
7809      if ((reqchar & REQ_CASELESS) != 0 && MAX_255(re->req_char)
7810        && cd->fcc[re->req_char] != re->req_char)
7811        re->flags |= PCRE_RCH_CASELESS;
7812    
7813    re->flags |= PCRE_REQCHSET;    re->flags |= PCRE_REQCHSET;
7814    }    }
7815    
# Line 7425  printf("Options=%08x\n", re->options); Line 7824  printf("Options=%08x\n", re->options);
7824    
7825  if ((re->flags & PCRE_FIRSTSET) != 0)  if ((re->flags & PCRE_FIRSTSET) != 0)
7826    {    {
7827    int ch = re->first_byte & 255;    pcre_uchar ch = re->first_char;
7828    const char *caseless = ((re->first_byte & REQ_CASELESS) == 0)?    const char *caseless =
7829      "" : " (caseless)";      ((re->flags & PCRE_FCH_CASELESS) == 0)? "" : " (caseless)";
7830    if (isprint(ch)) printf("First char = %c%s\n", ch, caseless);    if (PRINTABLE(ch)) printf("First char = %c%s\n", ch, caseless);
7831      else printf("First char = \\x%02x%s\n", ch, caseless);      else printf("First char = \\x%02x%s\n", ch, caseless);
7832    }    }
7833    
7834  if ((re->flags & PCRE_REQCHSET) != 0)  if ((re->flags & PCRE_REQCHSET) != 0)
7835    {    {
7836    int ch = re->req_byte & 255;    pcre_uchar ch = re->req_char;
7837    const char *caseless = ((re->req_byte & REQ_CASELESS) == 0)?    const char *caseless =
7838      "" : " (caseless)";      ((re->flags & PCRE_RCH_CASELESS) == 0)? "" : " (caseless)";
7839    if (isprint(ch)) printf("Req char = %c%s\n", ch, caseless);    if (PRINTABLE(ch)) printf("Req char = %c%s\n", ch, caseless);
7840      else printf("Req char = \\x%02x%s\n", ch, caseless);      else printf("Req char = \\x%02x%s\n", ch, caseless);
7841    }    }
7842    
# Line 7450  if (code - codestart > length) Line 7849  if (code - codestart > length)
7849    {    {
7850    (pcre_free)(re);    (pcre_free)(re);
7851    *errorptr = find_error_text(ERR23);    *errorptr = find_error_text(ERR23);
7852    *erroroffset = ptr - (uschar *)pattern;    *erroroffset = ptr - (pcre_uchar *)pattern;
7853    if (errorcodeptr != NULL) *errorcodeptr = ERR23;    if (errorcodeptr != NULL) *errorcodeptr = ERR23;
7854    return NULL;    return NULL;
7855    }    }

Legend:
Removed from v.716  
changed lines
  Added in v.774

  ViewVC Help
Powered by ViewVC 1.1.5