/[pcre]/code/branches/pcre16/pcre_compile.c
ViewVC logotype

Diff of /code/branches/pcre16/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

code/trunk/pcre_compile.c revision 613 by ph10, Sat Jul 2 16:59:52 2011 UTC code/branches/pcre16/pcre_compile.c revision 763 by zherczeg, Tue Nov 22 21:46:22 2011 UTC
# Line 231  static const char posix_names[] = Line 231  static const char posix_names[] =
231    STRING_graph0 STRING_print0 STRING_punct0 STRING_space0    STRING_graph0 STRING_print0 STRING_punct0 STRING_space0
232    STRING_word0  STRING_xdigit;    STRING_word0  STRING_xdigit;
233    
234  static const uschar posix_name_lengths[] = {  static const pcre_uint8 posix_name_lengths[] = {
235    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 };    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 };
236    
237  /* Table of class bit maps for each POSIX class. Each class is formed from a  /* Table of class bit maps for each POSIX class. Each class is formed from a
# Line 266  substitutes must be in the order of the Line 266  substitutes must be in the order of the
266  both positive and negative cases. NULL means no substitute. */  both positive and negative cases. NULL means no substitute. */
267    
268  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
269  static const uschar *substitutes[] = {  static const pcre_uchar string_PNd[]  = {
270    (uschar *)"\\P{Nd}",    /* \D */    CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
271    (uschar *)"\\p{Nd}",    /* \d */    CHAR_N, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' };
272    (uschar *)"\\P{Xsp}",   /* \S */       /* NOTE: Xsp is Perl space */  static const pcre_uchar string_pNd[]  = {
273    (uschar *)"\\p{Xsp}",   /* \s */    CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
274    (uschar *)"\\P{Xwd}",   /* \W */    CHAR_N, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' };
275    (uschar *)"\\p{Xwd}"    /* \w */  static const pcre_uchar string_PXsp[] = {
276      CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
277      CHAR_X, CHAR_s, CHAR_p, CHAR_RIGHT_CURLY_BRACKET, '\0' };
278    static const pcre_uchar string_pXsp[] = {
279      CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
280      CHAR_X, CHAR_s, CHAR_p, CHAR_RIGHT_CURLY_BRACKET, '\0' };
281    static const pcre_uchar string_PXwd[] = {
282      CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
283      CHAR_X, CHAR_w, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' };
284    static const pcre_uchar string_pXwd[] = {
285      CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
286      CHAR_X, CHAR_w, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' };
287    
288    static const pcre_uchar *substitutes[] = {
289      string_PNd,           /* \D */
290      string_pNd,           /* \d */
291      string_PXsp,          /* \S */       /* NOTE: Xsp is Perl space */
292      string_pXsp,          /* \s */
293      string_PXwd,          /* \W */
294      string_pXwd           /* \w */
295  };  };
296    
297  static const uschar *posix_substitutes[] = {  static const pcre_uchar string_pL[] =   {
298    (uschar *)"\\p{L}",     /* alpha */    CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
299    (uschar *)"\\p{Ll}",    /* lower */    CHAR_L, CHAR_RIGHT_CURLY_BRACKET, '\0' };
300    (uschar *)"\\p{Lu}",    /* upper */  static const pcre_uchar string_pLl[] =  {
301    (uschar *)"\\p{Xan}",   /* alnum */    CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
302    NULL,                   /* ascii */    CHAR_L, CHAR_l, CHAR_RIGHT_CURLY_BRACKET, '\0' };
303    (uschar *)"\\h",        /* blank */  static const pcre_uchar string_pLu[] =  {
304    NULL,                   /* cntrl */    CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
305    (uschar *)"\\p{Nd}",    /* digit */    CHAR_L, CHAR_u, CHAR_RIGHT_CURLY_BRACKET, '\0' };
306    NULL,                   /* graph */  static const pcre_uchar string_pXan[] = {
307    NULL,                   /* print */    CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
308    NULL,                   /* punct */    CHAR_X, CHAR_a, CHAR_n, CHAR_RIGHT_CURLY_BRACKET, '\0' };
309    (uschar *)"\\p{Xps}",   /* space */    /* NOTE: Xps is POSIX space */  static const pcre_uchar string_h[] =    {
310    (uschar *)"\\p{Xwd}",   /* word */    CHAR_BACKSLASH, CHAR_h, '\0' };
311    NULL,                   /* xdigit */  static const pcre_uchar string_pXps[] = {
312      CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
313      CHAR_X, CHAR_p, CHAR_s, CHAR_RIGHT_CURLY_BRACKET, '\0' };
314    static const pcre_uchar string_PL[] =   {
315      CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
316      CHAR_L, CHAR_RIGHT_CURLY_BRACKET, '\0' };
317    static const pcre_uchar string_PLl[] =  {
318      CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
319      CHAR_L, CHAR_l, CHAR_RIGHT_CURLY_BRACKET, '\0' };
320    static const pcre_uchar string_PLu[] =  {
321      CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
322      CHAR_L, CHAR_u, CHAR_RIGHT_CURLY_BRACKET, '\0' };
323    static const pcre_uchar string_PXan[] = {
324      CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
325      CHAR_X, CHAR_a, CHAR_n, CHAR_RIGHT_CURLY_BRACKET, '\0' };
326    static const pcre_uchar string_H[] =    {
327      CHAR_BACKSLASH, CHAR_H, '\0' };
328    static const pcre_uchar string_PXps[] = {
329      CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
330      CHAR_X, CHAR_p, CHAR_s, CHAR_RIGHT_CURLY_BRACKET, '\0' };
331    
332    static const pcre_uchar *posix_substitutes[] = {
333      string_pL,            /* alpha */
334      string_pLl,           /* lower */
335      string_pLu,           /* upper */
336      string_pXan,          /* alnum */
337      NULL,                 /* ascii */
338      string_h,             /* blank */
339      NULL,                 /* cntrl */
340      string_pNd,           /* digit */
341      NULL,                 /* graph */
342      NULL,                 /* print */
343      NULL,                 /* punct */
344      string_pXps,          /* space */    /* NOTE: Xps is POSIX space */
345      string_pXwd,          /* word */
346      NULL,                 /* xdigit */
347    /* Negated cases */    /* Negated cases */
348    (uschar *)"\\P{L}",     /* ^alpha */    string_PL,            /* ^alpha */
349    (uschar *)"\\P{Ll}",    /* ^lower */    string_PLl,           /* ^lower */
350    (uschar *)"\\P{Lu}",    /* ^upper */    string_PLu,           /* ^upper */
351    (uschar *)"\\P{Xan}",   /* ^alnum */    string_PXan,          /* ^alnum */
352    NULL,                   /* ^ascii */    NULL,                 /* ^ascii */
353    (uschar *)"\\H",        /* ^blank */    string_H,             /* ^blank */
354    NULL,                   /* ^cntrl */    NULL,                 /* ^cntrl */
355    (uschar *)"\\P{Nd}",    /* ^digit */    string_PNd,           /* ^digit */
356    NULL,                   /* ^graph */    NULL,                 /* ^graph */
357    NULL,                   /* ^print */    NULL,                 /* ^print */
358    NULL,                   /* ^punct */    NULL,                 /* ^punct */
359    (uschar *)"\\P{Xps}",   /* ^space */   /* NOTE: Xps is POSIX space */    string_PXps,          /* ^space */   /* NOTE: Xps is POSIX space */
360    (uschar *)"\\P{Xwd}",   /* ^word */    string_PXwd,          /* ^word */
361    NULL                    /* ^xdigit */    NULL                  /* ^xdigit */
362  };  };
363  #define POSIX_SUBSIZE (sizeof(posix_substitutes)/sizeof(uschar *))  #define POSIX_SUBSIZE (sizeof(posix_substitutes) / sizeof(pcre_uchar *))
364  #endif  #endif
365    
366  #define STRING(a)  # a  #define STRING(a)  # a
# Line 393  static const char error_texts[] = Line 447  static const char error_texts[] =
447    "internal error: previously-checked referenced subpattern not found\0"    "internal error: previously-checked referenced subpattern not found\0"
448    "DEFINE group contains more than one branch\0"    "DEFINE group contains more than one branch\0"
449    /* 55 */    /* 55 */
450    "repeating a DEFINE group is not allowed\0"    "repeating a DEFINE group is not allowed\0"  /** DEAD **/
451    "inconsistent NEWLINE options\0"    "inconsistent NEWLINE options\0"
452    "\\g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number\0"    "\\g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number\0"
453    "a numbered reference must not be zero\0"    "a numbered reference must not be zero\0"
# Line 409  static const char error_texts[] = Line 463  static const char error_texts[] =
463    "(*MARK) must have an argument\0"    "(*MARK) must have an argument\0"
464    "this version of PCRE is not compiled with PCRE_UCP support\0"    "this version of PCRE is not compiled with PCRE_UCP support\0"
465    "\\c must be followed by an ASCII character\0"    "\\c must be followed by an ASCII character\0"
466      "\\k is not followed by a braced, angle-bracketed, or quoted name\0"
467      /* 70 */
468      "internal error: unknown opcode in find_fixedlength()\0"
469    ;    ;
470    
471  /* Table to identify digits and hex digits. This is used when compiling  /* Table to identify digits and hex digits. This is used when compiling
# Line 545  static const unsigned char ebcdic_charta Line 602  static const unsigned char ebcdic_charta
602  /* Definition to allow mutual recursion */  /* Definition to allow mutual recursion */
603    
604  static BOOL  static BOOL
605    compile_regex(int, uschar **, const uschar **, int *, BOOL, BOOL, int, int *,    compile_regex(int, pcre_uchar **, const pcre_uchar **, int *, BOOL, BOOL, int, int,
606      int *, branch_chain *, compile_data *, int *);      int *, int *, branch_chain *, compile_data *, int *);
607    
608    
609    
# Line 577  return s; Line 634  return s;
634    
635    
636  /*************************************************  /*************************************************
637    *            Check for counted repeat            *
638    *************************************************/
639    
640    /* This function is called when a '{' is encountered in a place where it might
641    start a quantifier. It looks ahead to see if it really is a quantifier or not.
642    It is only a quantifier if it is one of the forms {ddd} {ddd,} or {ddd,ddd}
643    where the ddds are digits.
644    
645    Arguments:
646      p         pointer to the first char after '{'
647    
648    Returns:    TRUE or FALSE
649    */
650    
651    static BOOL
652    is_counted_repeat(const pcre_uchar *p)
653    {
654    if ((digitab[*p++] & ctype_digit) == 0) return FALSE;
655    while ((digitab[*p] & ctype_digit) != 0) p++;
656    if (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE;
657    
658    if (*p++ != CHAR_COMMA) return FALSE;
659    if (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE;
660    
661    if ((digitab[*p++] & ctype_digit) == 0) return FALSE;
662    while ((digitab[*p] & ctype_digit) != 0) p++;
663    
664    return (*p == CHAR_RIGHT_CURLY_BRACKET);
665    }
666    
667    
668    
669    /*************************************************
670  *            Handle escapes                      *  *            Handle escapes                      *
671  *************************************************/  *************************************************/
672    
# Line 601  Returns:         zero or positive => a d Line 691  Returns:         zero or positive => a d
691  */  */
692    
693  static int  static int
694  check_escape(const uschar **ptrptr, int *errorcodeptr, int bracount,  check_escape(const pcre_uchar **ptrptr, int *errorcodeptr, int bracount,
695    int options, BOOL isclass)    int options, BOOL isclass)
696  {  {
697  BOOL utf8 = (options & PCRE_UTF8) != 0;  BOOL utf8 = (options & PCRE_UTF8) != 0;
698  const uschar *ptr = *ptrptr + 1;  const pcre_uchar *ptr = *ptrptr + 1;
699  int c, i;  int c, i;
700    
701  GETCHARINCTEST(c, ptr);           /* Get character value, increment pointer */  GETCHARINCTEST(c, ptr);           /* Get character value, increment pointer */
# Line 632  else if ((i = escapes[c - 0x48]) != 0) Line 722  else if ((i = escapes[c - 0x48]) != 0)
722    
723  else  else
724    {    {
725    const uschar *oldptr;    const pcre_uchar *oldptr;
726    BOOL braced, negated;    BOOL braced, negated;
727    
728    switch (c)    switch (c)
# Line 642  else Line 732  else
732    
733      case CHAR_l:      case CHAR_l:
734      case CHAR_L:      case CHAR_L:
735        *errorcodeptr = ERR37;
736        break;
737    
738      case CHAR_u:      case CHAR_u:
739        if ((options & PCRE_JAVASCRIPT_COMPAT) != 0)
740          {
741          /* In JavaScript, \u must be followed by four hexadecimal numbers.
742          Otherwise it is a lowercase u letter. */
743          if ((digitab[ptr[1]] & ctype_xdigit) != 0 && (digitab[ptr[2]] & ctype_xdigit) != 0
744               && (digitab[ptr[3]] & ctype_xdigit) != 0 && (digitab[ptr[4]] & ctype_xdigit) != 0)
745            {
746            c = 0;
747            for (i = 0; i < 4; ++i)
748              {
749              register int cc = *(++ptr);
750    #ifndef EBCDIC  /* ASCII/UTF-8 coding */
751              if (cc >= CHAR_a) cc -= 32;               /* Convert to upper case */
752              c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
753    #else           /* EBCDIC coding */
754              if (cc >= CHAR_a && cc <= CHAR_z) cc += 64;  /* Convert to upper case */
755              c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
756    #endif
757              }
758            }
759          }
760        else
761          *errorcodeptr = ERR37;
762        break;
763    
764      case CHAR_U:      case CHAR_U:
765      *errorcodeptr = ERR37;      /* In JavaScript, \U is an uppercase U letter. */
766        if ((options & PCRE_JAVASCRIPT_COMPAT) == 0) *errorcodeptr = ERR37;
767      break;      break;
768    
769      /* \g must be followed by one of a number of specific things:      /* In a character class, \g is just a literal "g". Outside a character
770        class, \g must be followed by one of a number of specific things:
771    
772      (1) A number, either plain or braced. If positive, it is an absolute      (1) A number, either plain or braced. If positive, it is an absolute
773      backreference. If negative, it is a relative backreference. This is a Perl      backreference. If negative, it is a relative backreference. This is a Perl
# Line 664  else Line 784  else
784      the -ESC_g code (cf \k). */      the -ESC_g code (cf \k). */
785    
786      case CHAR_g:      case CHAR_g:
787        if (isclass) break;
788      if (ptr[1] == CHAR_LESS_THAN_SIGN || ptr[1] == CHAR_APOSTROPHE)      if (ptr[1] == CHAR_LESS_THAN_SIGN || ptr[1] == CHAR_APOSTROPHE)
789        {        {
790        c = -ESC_g;        c = -ESC_g;
# Line 674  else Line 795  else
795    
796      if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)      if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)
797        {        {
798        const uschar *p;        const pcre_uchar *p;
799        for (p = ptr+2; *p != 0 && *p != CHAR_RIGHT_CURLY_BRACKET; p++)        for (p = ptr+2; *p != 0 && *p != CHAR_RIGHT_CURLY_BRACKET; p++)
800          if (*p != CHAR_MINUS && (digitab[*p] & ctype_digit) == 0) break;          if (*p != CHAR_MINUS && (digitab[*p] & ctype_digit) == 0) break;
801        if (*p != 0 && *p != CHAR_RIGHT_CURLY_BRACKET)        if (*p != 0 && *p != CHAR_RIGHT_CURLY_BRACKET)
# Line 792  else Line 913  else
913      treated as a data character. */      treated as a data character. */
914    
915      case CHAR_x:      case CHAR_x:
916        if ((options & PCRE_JAVASCRIPT_COMPAT) != 0)
917          {
918          /* In JavaScript, \x must be followed by two hexadecimal numbers.
919          Otherwise it is a lowercase x letter. */
920          if ((digitab[ptr[1]] & ctype_xdigit) != 0 && (digitab[ptr[2]] & ctype_xdigit) != 0)
921            {
922            c = 0;
923            for (i = 0; i < 2; ++i)
924              {
925              register int cc = *(++ptr);
926    #ifndef EBCDIC  /* ASCII/UTF-8 coding */
927              if (cc >= CHAR_a) cc -= 32;               /* Convert to upper case */
928              c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
929    #else           /* EBCDIC coding */
930              if (cc >= CHAR_a && cc <= CHAR_z) cc += 64;  /* Convert to upper case */
931              c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
932    #endif
933              }
934            }
935          break;
936          }
937    
938      if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)      if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)
939        {        {
940        const uschar *pt = ptr + 2;        const pcre_uchar *pt = ptr + 2;
941        int count = 0;        int count = 0;
942    
943        c = 0;        c = 0;
# Line 885  else Line 1028  else
1028    }    }
1029    
1030  /* Perl supports \N{name} for character names, as well as plain \N for "not  /* Perl supports \N{name} for character names, as well as plain \N for "not
1031  newline". PCRE does not support \N{name}. */  newline". PCRE does not support \N{name}. However, it does support
1032    quantification such as \N{2,3}. */
1033    
1034  if (c == -ESC_N && ptr[1] == CHAR_LEFT_CURLY_BRACKET)  if (c == -ESC_N && ptr[1] == CHAR_LEFT_CURLY_BRACKET &&
1035         !is_counted_repeat(ptr+2))
1036    *errorcodeptr = ERR37;    *errorcodeptr = ERR37;
1037    
1038  /* If PCRE_UCP is set, we change the values for \d etc. */  /* If PCRE_UCP is set, we change the values for \d etc. */
# Line 923  Returns:         type value from ucp_typ Line 1068  Returns:         type value from ucp_typ
1068  */  */
1069    
1070  static int  static int
1071  get_ucp(const uschar **ptrptr, BOOL *negptr, int *dptr, int *errorcodeptr)  get_ucp(const pcre_uchar **ptrptr, BOOL *negptr, int *dptr, int *errorcodeptr)
1072  {  {
1073  int c, i, bot, top;  int c, i, bot, top;
1074  const uschar *ptr = *ptrptr;  const pcre_uchar *ptr = *ptrptr;
1075  char name[32];  pcre_uchar name[32];
1076    
1077  c = *(++ptr);  c = *(++ptr);
1078  if (c == 0) goto ERROR_RETURN;  if (c == 0) goto ERROR_RETURN;
# Line 973  top = _pcre_utt_size; Line 1118  top = _pcre_utt_size;
1118  while (bot < top)  while (bot < top)
1119    {    {
1120    i = (bot + top) >> 1;    i = (bot + top) >> 1;
1121    c = strcmp(name, _pcre_utt_names + _pcre_utt[i].name_offset);    c = STRCMP_UC_C8(name, _pcre_utt_names + _pcre_utt[i].name_offset);
1122    if (c == 0)    if (c == 0)
1123      {      {
1124      *dptr = _pcre_utt[i].value;      *dptr = _pcre_utt[i].value;
# Line 997  return -1; Line 1142  return -1;
1142    
1143    
1144  /*************************************************  /*************************************************
 *            Check for counted repeat            *  
 *************************************************/  
   
 /* This function is called when a '{' is encountered in a place where it might  
 start a quantifier. It looks ahead to see if it really is a quantifier or not.  
 It is only a quantifier if it is one of the forms {ddd} {ddd,} or {ddd,ddd}  
 where the ddds are digits.  
   
 Arguments:  
   p         pointer to the first char after '{'  
   
 Returns:    TRUE or FALSE  
 */  
   
 static BOOL  
 is_counted_repeat(const uschar *p)  
 {  
 if ((digitab[*p++] & ctype_digit) == 0) return FALSE;  
 while ((digitab[*p] & ctype_digit) != 0) p++;  
 if (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE;  
   
 if (*p++ != CHAR_COMMA) return FALSE;  
 if (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE;  
   
 if ((digitab[*p++] & ctype_digit) == 0) return FALSE;  
 while ((digitab[*p] & ctype_digit) != 0) p++;  
   
 return (*p == CHAR_RIGHT_CURLY_BRACKET);  
 }  
   
   
   
 /*************************************************  
1145  *         Read repeat counts                     *  *         Read repeat counts                     *
1146  *************************************************/  *************************************************/
1147    
# Line 1048  Returns:         pointer to '}' on succe Line 1160  Returns:         pointer to '}' on succe
1160                   current ptr on error, with errorcodeptr set non-zero                   current ptr on error, with errorcodeptr set non-zero
1161  */  */
1162    
1163  static const uschar *  static const pcre_uchar *
1164  read_repeat_counts(const uschar *p, int *minp, int *maxp, int *errorcodeptr)  read_repeat_counts(const pcre_uchar *p, int *minp, int *maxp, int *errorcodeptr)
1165  {  {
1166  int min = 0;  int min = 0;
1167  int max = -1;  int max = -1;
# Line 1134  Returns:       the number of the named s Line 1246  Returns:       the number of the named s
1246  */  */
1247    
1248  static int  static int
1249  find_parens_sub(uschar **ptrptr, compile_data *cd, const uschar *name, int lorn,  find_parens_sub(pcre_uchar **ptrptr, compile_data *cd, const pcre_uchar *name, int lorn,
1250    BOOL xmode, BOOL utf8, int *count)    BOOL xmode, BOOL utf8, int *count)
1251  {  {
1252  uschar *ptr = *ptrptr;  pcre_uchar *ptr = *ptrptr;
1253  int start_count = *count;  int start_count = *count;
1254  int hwm_count = start_count;  int hwm_count = start_count;
1255  BOOL dup_parens = FALSE;  BOOL dup_parens = FALSE;
# Line 1204  if (ptr[0] == CHAR_LEFT_PARENTHESIS) Line 1316  if (ptr[0] == CHAR_LEFT_PARENTHESIS)
1316          ptr[1] != CHAR_EQUALS_SIGN) || *ptr == CHAR_APOSTROPHE)          ptr[1] != CHAR_EQUALS_SIGN) || *ptr == CHAR_APOSTROPHE)
1317        {        {
1318        int term;        int term;
1319        const uschar *thisname;        const pcre_uchar *thisname;
1320        *count += 1;        *count += 1;
1321        if (name == NULL && *count == lorn) return *count;        if (name == NULL && *count == lorn) return *count;
1322        term = *ptr++;        term = *ptr++;
# Line 1212  if (ptr[0] == CHAR_LEFT_PARENTHESIS) Line 1324  if (ptr[0] == CHAR_LEFT_PARENTHESIS)
1324        thisname = ptr;        thisname = ptr;
1325        while (*ptr != term) ptr++;        while (*ptr != term) ptr++;
1326        if (name != NULL && lorn == ptr - thisname &&        if (name != NULL && lorn == ptr - thisname &&
1327            strncmp((const char *)name, (const char *)thisname, lorn) == 0)            STRNCMP_UC_UC(name, thisname, lorn) == 0)
1328          return *count;          return *count;
1329        term++;        term++;
1330        }        }
# Line 1255  for (; ptr < cd->end_pattern; ptr++) Line 1367  for (; ptr < cd->end_pattern; ptr++)
1367          {          {
1368          if (ptr[2] == CHAR_E)          if (ptr[2] == CHAR_E)
1369            ptr+= 2;            ptr+= 2;
1370          else if (strncmp((const char *)ptr+2,          else if (STRNCMP_UC_C8(ptr + 2,
1371                   STR_Q STR_BACKSLASH STR_E, 3) == 0)                   STR_Q STR_BACKSLASH STR_E, 3) == 0)
1372            ptr += 4;            ptr += 4;
1373          else          else
# Line 1367  Returns:       the number of the found s Line 1479  Returns:       the number of the found s
1479  */  */
1480    
1481  static int  static int
1482  find_parens(compile_data *cd, const uschar *name, int lorn, BOOL xmode,  find_parens(compile_data *cd, const pcre_uchar *name, int lorn, BOOL xmode,
1483    BOOL utf8)    BOOL utf8)
1484  {  {
1485  uschar *ptr = (uschar *)cd->start_pattern;  pcre_uchar *ptr = (pcre_uchar *)cd->start_pattern;
1486  int count = 0;  int count = 0;
1487  int rc;  int rc;
1488    
# Line 1408  Arguments: Line 1520  Arguments:
1520  Returns:       pointer to the first significant opcode  Returns:       pointer to the first significant opcode
1521  */  */
1522    
1523  static const uschar*  static const pcre_uchar*
1524  first_significant_code(const uschar *code, BOOL skipassert)  first_significant_code(const pcre_uchar *code, BOOL skipassert)
1525  {  {
1526  for (;;)  for (;;)
1527    {    {
# Line 1470  Arguments: Line 1582  Arguments:
1582    
1583  Returns:   the fixed length,  Returns:   the fixed length,
1584               or -1 if there is no fixed length,               or -1 if there is no fixed length,
1585               or -2 if \C was encountered               or -2 if \C was encountered (in UTF-8 mode only)
1586               or -3 if an OP_RECURSE item was encountered and atend is FALSE               or -3 if an OP_RECURSE item was encountered and atend is FALSE
1587                 or -4 if an unknown opcode was encountered (internal error)
1588  */  */
1589    
1590  static int  static int
1591  find_fixedlength(uschar *code, BOOL utf8, BOOL atend, compile_data *cd)  find_fixedlength(pcre_uchar *code, BOOL utf8, BOOL atend, compile_data *cd)
1592  {  {
1593  int length = -1;  int length = -1;
1594    
1595  register int branchlength = 0;  register int branchlength = 0;
1596  register uschar *cc = code + 1 + LINK_SIZE;  register pcre_uchar *cc = code + 1 + LINK_SIZE;
1597    
1598  /* Scan along the opcodes for this branch. If we get to the end of the  /* Scan along the opcodes for this branch. If we get to the end of the
1599  branch, check the length against that of the other branches. */  branch, check the length against that of the other branches. */
# Line 1488  branch, check the length against that of Line 1601  branch, check the length against that of
1601  for (;;)  for (;;)
1602    {    {
1603    int d;    int d;
1604    uschar *ce, *cs;    pcre_uchar *ce, *cs;
1605    register int op = *cc;    register int op = *cc;
1606    switch (op)    switch (op)
1607      {      {
1608      /* We only need to continue for OP_CBRA (normal capturing bracket) and      /* We only need to continue for OP_CBRA (normal capturing bracket) and
1609      OP_BRA (normal non-capturing bracket) because the other variants of these      OP_BRA (normal non-capturing bracket) because the other variants of these
1610      opcodes are all concerned with unlimited repeated groups, which of course      opcodes are all concerned with unlimited repeated groups, which of course
1611      are not of fixed length. They will cause a -1 response from the default      are not of fixed length. */
     case of this switch. */  
1612    
1613      case OP_CBRA:      case OP_CBRA:
1614      case OP_BRA:      case OP_BRA:
1615      case OP_ONCE:      case OP_ONCE:
1616        case OP_ONCE_NC:
1617      case OP_COND:      case OP_COND:
1618      d = find_fixedlength(cc + ((op == OP_CBRA)? 2:0), utf8, atend, cd);      d = find_fixedlength(cc + ((op == OP_CBRA)? 2:0), utf8, atend, cd);
1619      if (d < 0) return d;      if (d < 0) return d;
# Line 1509  for (;;) Line 1622  for (;;)
1622      cc += 1 + LINK_SIZE;      cc += 1 + LINK_SIZE;
1623      break;      break;
1624    
1625      /* Reached end of a branch; if it's a ket it is the end of a nested      /* Reached end of a branch; if it's a ket it is the end of a nested call.
1626      call. If it's ALT it is an alternation in a nested call. If it is      If it's ALT it is an alternation in a nested call. An ACCEPT is effectively
1627      END it's the end of the outer call. All can be handled by the same code.      an ALT. If it is END it's the end of the outer call. All can be handled by
1628      Note that we must not include the OP_KETRxxx opcodes here, because they      the same code. Note that we must not include the OP_KETRxxx opcodes here,
1629      all imply an unlimited repeat. */      because they all imply an unlimited repeat. */
1630    
1631      case OP_ALT:      case OP_ALT:
1632      case OP_KET:      case OP_KET:
1633      case OP_END:      case OP_END:
1634        case OP_ACCEPT:
1635        case OP_ASSERT_ACCEPT:
1636      if (length < 0) length = branchlength;      if (length < 0) length = branchlength;
1637        else if (length != branchlength) return -1;        else if (length != branchlength) return -1;
1638      if (*cc != OP_ALT) return length;      if (*cc != OP_ALT) return length;
# Line 1531  for (;;) Line 1646  for (;;)
1646    
1647      case OP_RECURSE:      case OP_RECURSE:
1648      if (!atend) return -3;      if (!atend) return -3;
1649      cs = ce = (uschar *)cd->start_code + GET(cc, 1);  /* Start subpattern */      cs = ce = (pcre_uchar *)cd->start_code + GET(cc, 1);  /* Start subpattern */
1650      do ce += GET(ce, 1); while (*ce == OP_ALT);       /* End subpattern */      do ce += GET(ce, 1); while (*ce == OP_ALT);           /* End subpattern */
1651      if (cc > cs && cc < ce) return -1;                /* Recursion */      if (cc > cs && cc < ce) return -1;                    /* Recursion */
1652      d = find_fixedlength(cs + 2, utf8, atend, cd);      d = find_fixedlength(cs + 2, utf8, atend, cd);
1653      if (d < 0) return d;      if (d < 0) return d;
1654      branchlength += d;      branchlength += d;
# Line 1551  for (;;) Line 1666  for (;;)
1666    
1667      /* Skip over things that don't match chars */      /* Skip over things that don't match chars */
1668    
1669      case OP_REVERSE:      case OP_MARK:
1670      case OP_CREF:      case OP_PRUNE_ARG:
1671      case OP_NCREF:      case OP_SKIP_ARG:
1672      case OP_RREF:      case OP_THEN_ARG:
1673      case OP_NRREF:      cc += cc[1] + _pcre_OP_lengths[*cc];
1674      case OP_DEF:      break;
1675    
1676      case OP_CALLOUT:      case OP_CALLOUT:
     case OP_SOD:  
     case OP_SOM:  
     case OP_SET_SOM:  
     case OP_EOD:  
     case OP_EODN:  
1677      case OP_CIRC:      case OP_CIRC:
1678      case OP_CIRCM:      case OP_CIRCM:
1679        case OP_CLOSE:
1680        case OP_COMMIT:
1681        case OP_CREF:
1682        case OP_DEF:
1683      case OP_DOLL:      case OP_DOLL:
1684      case OP_DOLLM:      case OP_DOLLM:
1685        case OP_EOD:
1686        case OP_EODN:
1687        case OP_FAIL:
1688        case OP_NCREF:
1689        case OP_NRREF:
1690      case OP_NOT_WORD_BOUNDARY:      case OP_NOT_WORD_BOUNDARY:
1691        case OP_PRUNE:
1692        case OP_REVERSE:
1693        case OP_RREF:
1694        case OP_SET_SOM:
1695        case OP_SKIP:
1696        case OP_SOD:
1697        case OP_SOM:
1698        case OP_THEN:
1699      case OP_WORD_BOUNDARY:      case OP_WORD_BOUNDARY:
1700      cc += _pcre_OP_lengths[*cc];      cc += _pcre_OP_lengths[*cc];
1701      break;      break;
# Line 1589  for (;;) Line 1717  for (;;)
1717      need to skip over a multibyte character in UTF8 mode.  */      need to skip over a multibyte character in UTF8 mode.  */
1718    
1719      case OP_EXACT:      case OP_EXACT:
1720        case OP_EXACTI:
1721        case OP_NOTEXACT:
1722        case OP_NOTEXACTI:
1723      branchlength += GET2(cc,1);      branchlength += GET2(cc,1);
1724      cc += 4;      cc += 4;
1725  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1609  for (;;) Line 1740  for (;;)
1740      cc += 2;      cc += 2;
1741      /* Fall through */      /* Fall through */
1742    
1743        case OP_HSPACE:
1744        case OP_VSPACE:
1745        case OP_NOT_HSPACE:
1746        case OP_NOT_VSPACE:
1747      case OP_NOT_DIGIT:      case OP_NOT_DIGIT:
1748      case OP_DIGIT:      case OP_DIGIT:
1749      case OP_NOT_WHITESPACE:      case OP_NOT_WHITESPACE:
# Line 1621  for (;;) Line 1756  for (;;)
1756      cc++;      cc++;
1757      break;      break;
1758    
1759      /* The single-byte matcher isn't allowed */      /* The single-byte matcher isn't allowed. This only happens in UTF-8 mode;
1760        otherwise \C is coded as OP_ALLANY. */
1761    
1762      case OP_ANYBYTE:      case OP_ANYBYTE:
1763      return -2;      return -2;
# Line 1640  for (;;) Line 1776  for (;;)
1776    
1777      switch (*cc)      switch (*cc)
1778        {        {
1779          case OP_CRPLUS:
1780          case OP_CRMINPLUS:
1781        case OP_CRSTAR:        case OP_CRSTAR:
1782        case OP_CRMINSTAR:        case OP_CRMINSTAR:
1783        case OP_CRQUERY:        case OP_CRQUERY:
# Line 1660  for (;;) Line 1798  for (;;)
1798    
1799      /* Anything else is variable length */      /* Anything else is variable length */
1800    
1801      default:      case OP_ANYNL:
1802        case OP_BRAMINZERO:
1803        case OP_BRAPOS:
1804        case OP_BRAPOSZERO:
1805        case OP_BRAZERO:
1806        case OP_CBRAPOS:
1807        case OP_EXTUNI:
1808        case OP_KETRMAX:
1809        case OP_KETRMIN:
1810        case OP_KETRPOS:
1811        case OP_MINPLUS:
1812        case OP_MINPLUSI:
1813        case OP_MINQUERY:
1814        case OP_MINQUERYI:
1815        case OP_MINSTAR:
1816        case OP_MINSTARI:
1817        case OP_MINUPTO:
1818        case OP_MINUPTOI:
1819        case OP_NOTMINPLUS:
1820        case OP_NOTMINPLUSI:
1821        case OP_NOTMINQUERY:
1822        case OP_NOTMINQUERYI:
1823        case OP_NOTMINSTAR:
1824        case OP_NOTMINSTARI:
1825        case OP_NOTMINUPTO:
1826        case OP_NOTMINUPTOI:
1827        case OP_NOTPLUS:
1828        case OP_NOTPLUSI:
1829        case OP_NOTPOSPLUS:
1830        case OP_NOTPOSPLUSI:
1831        case OP_NOTPOSQUERY:
1832        case OP_NOTPOSQUERYI:
1833        case OP_NOTPOSSTAR:
1834        case OP_NOTPOSSTARI:
1835        case OP_NOTPOSUPTO:
1836        case OP_NOTPOSUPTOI:
1837        case OP_NOTQUERY:
1838        case OP_NOTQUERYI:
1839        case OP_NOTSTAR:
1840        case OP_NOTSTARI:
1841        case OP_NOTUPTO:
1842        case OP_NOTUPTOI:
1843        case OP_PLUS:
1844        case OP_PLUSI:
1845        case OP_POSPLUS:
1846        case OP_POSPLUSI:
1847        case OP_POSQUERY:
1848        case OP_POSQUERYI:
1849        case OP_POSSTAR:
1850        case OP_POSSTARI:
1851        case OP_POSUPTO:
1852        case OP_POSUPTOI:
1853        case OP_QUERY:
1854        case OP_QUERYI:
1855        case OP_REF:
1856        case OP_REFI:
1857        case OP_SBRA:
1858        case OP_SBRAPOS:
1859        case OP_SCBRA:
1860        case OP_SCBRAPOS:
1861        case OP_SCOND:
1862        case OP_SKIPZERO:
1863        case OP_STAR:
1864        case OP_STARI:
1865        case OP_TYPEMINPLUS:
1866        case OP_TYPEMINQUERY:
1867        case OP_TYPEMINSTAR:
1868        case OP_TYPEMINUPTO:
1869        case OP_TYPEPLUS:
1870        case OP_TYPEPOSPLUS:
1871        case OP_TYPEPOSQUERY:
1872        case OP_TYPEPOSSTAR:
1873        case OP_TYPEPOSUPTO:
1874        case OP_TYPEQUERY:
1875        case OP_TYPESTAR:
1876        case OP_TYPEUPTO:
1877        case OP_UPTO:
1878        case OP_UPTOI:
1879      return -1;      return -1;
1880    
1881        /* Catch unrecognized opcodes so that when new ones are added they
1882        are not forgotten, as has happened in the past. */
1883    
1884        default:
1885        return -4;
1886      }      }
1887    }    }
1888  /* Control never gets here */  /* Control never gets here */
# Line 1688  Arguments: Line 1909  Arguments:
1909  Returns:      pointer to the opcode for the bracket, or NULL if not found  Returns:      pointer to the opcode for the bracket, or NULL if not found
1910  */  */
1911    
1912  const uschar *  const pcre_uchar *
1913  _pcre_find_bracket(const uschar *code, BOOL utf8, int number)  _pcre_find_bracket(const pcre_uchar *code, BOOL utf8, int number)
1914  {  {
1915  for (;;)  for (;;)
1916    {    {
1917    register int c = *code;    register int c = *code;
1918    
1919    if (c == OP_END) return NULL;    if (c == OP_END) return NULL;
1920    
1921    /* XCLASS is used for classes that cannot be represented just by a bit    /* XCLASS is used for classes that cannot be represented just by a bit
# Line 1706  for (;;) Line 1928  for (;;)
1928    
1929    else if (c == OP_REVERSE)    else if (c == OP_REVERSE)
1930      {      {
1931      if (number < 0) return (uschar *)code;      if (number < 0) return (pcre_uchar *)code;
1932      code += _pcre_OP_lengths[c];      code += _pcre_OP_lengths[c];
1933      }      }
1934    
# Line 1716  for (;;) Line 1938  for (;;)
1938             c == OP_CBRAPOS || c == OP_SCBRAPOS)             c == OP_CBRAPOS || c == OP_SCBRAPOS)
1939      {      {
1940      int n = GET2(code, 1+LINK_SIZE);      int n = GET2(code, 1+LINK_SIZE);
1941      if (n == number) return (uschar *)code;      if (n == number) return (pcre_uchar *)code;
1942      code += _pcre_OP_lengths[c];      code += _pcre_OP_lengths[c];
1943      }      }
1944    
# Line 1755  for (;;) Line 1977  for (;;)
1977        break;        break;
1978    
1979        case OP_THEN_ARG:        case OP_THEN_ARG:
1980        code += code[1+LINK_SIZE];        code += code[1];
1981        break;        break;
1982        }        }
1983    
# Line 1824  Arguments: Line 2046  Arguments:
2046  Returns:      pointer to the opcode for OP_RECURSE, or NULL if not found  Returns:      pointer to the opcode for OP_RECURSE, or NULL if not found
2047  */  */
2048    
2049  static const uschar *  static const pcre_uchar *
2050  find_recurse(const uschar *code, BOOL utf8)  find_recurse(const pcre_uchar *code, BOOL utf8)
2051  {  {
2052  for (;;)  for (;;)
2053    {    {
# Line 1874  for (;;) Line 2096  for (;;)
2096        break;        break;
2097    
2098        case OP_THEN_ARG:        case OP_THEN_ARG:
2099        code += code[1+LINK_SIZE];        code += code[1];
2100        break;        break;
2101        }        }
2102    
# Line 1951  Returns:      TRUE if what is matched co Line 2173  Returns:      TRUE if what is matched co
2173  */  */
2174    
2175  static BOOL  static BOOL
2176  could_be_empty_branch(const uschar *code, const uschar *endcode, BOOL utf8,  could_be_empty_branch(const pcre_uchar *code, const pcre_uchar *endcode,
2177    compile_data *cd)    BOOL utf8, compile_data *cd)
2178  {  {
2179  register int c;  register int c;
2180  for (code = first_significant_code(code + _pcre_OP_lengths[*code], TRUE);  for (code = first_significant_code(code + _pcre_OP_lengths[*code], TRUE);
2181       code < endcode;       code < endcode;
2182       code = first_significant_code(code + _pcre_OP_lengths[c], TRUE))       code = first_significant_code(code + _pcre_OP_lengths[c], TRUE))
2183    {    {
2184    const uschar *ccode;    const pcre_uchar *ccode;
2185    
2186    c = *code;    c = *code;
2187    
# Line 1974  for (code = first_significant_code(code Line 2196  for (code = first_significant_code(code
2196      }      }
2197    
2198    /* For a recursion/subroutine call, if its end has been reached, which    /* For a recursion/subroutine call, if its end has been reached, which
2199    implies a subroutine call, we can scan it. */    implies a backward reference subroutine call, we can scan it. If it's a
2200      forward reference subroutine call, we can't. To detect forward reference
2201      we have to scan up the list that is kept in the workspace. This function is
2202      called only when doing the real compile, not during the pre-compile that
2203      measures the size of the compiled pattern. */
2204    
2205    if (c == OP_RECURSE)    if (c == OP_RECURSE)
2206      {      {
2207      BOOL empty_branch = FALSE;      const pcre_uchar *scode;
2208      const uschar *scode = cd->start_code + GET(code, 1);      BOOL empty_branch;
2209    
2210        /* Test for forward reference */
2211    
2212        for (scode = cd->start_workspace; scode < cd->hwm; scode += LINK_SIZE)
2213          if (GET(scode, 0) == code + 1 - cd->start_code) return TRUE;
2214    
2215        /* Not a forward reference, test for completed backward reference */
2216    
2217        empty_branch = FALSE;
2218        scode = cd->start_code + GET(code, 1);
2219      if (GET(scode, 1) == 0) return TRUE;    /* Unclosed */      if (GET(scode, 1) == 0) return TRUE;    /* Unclosed */
2220    
2221        /* Completed backwards reference */
2222    
2223      do      do
2224        {        {
2225        if (could_be_empty_branch(scode, endcode, utf8, cd))        if (could_be_empty_branch(scode, endcode, utf8, cd))
# Line 1991  for (code = first_significant_code(code Line 2230  for (code = first_significant_code(code
2230        scode += GET(scode, 1);        scode += GET(scode, 1);
2231        }        }
2232      while (*scode == OP_ALT);      while (*scode == OP_ALT);
2233    
2234      if (!empty_branch) return FALSE;  /* All branches are non-empty */      if (!empty_branch) return FALSE;  /* All branches are non-empty */
2235      continue;      continue;
2236      }      }
# Line 2021  for (code = first_significant_code(code Line 2261  for (code = first_significant_code(code
2261    
2262    if (c == OP_BRA  || c == OP_BRAPOS ||    if (c == OP_BRA  || c == OP_BRAPOS ||
2263        c == OP_CBRA || c == OP_CBRAPOS ||        c == OP_CBRA || c == OP_CBRAPOS ||
2264        c == OP_ONCE || c == OP_COND)        c == OP_ONCE || c == OP_ONCE_NC ||
2265          c == OP_COND)
2266      {      {
2267      BOOL empty_branch;      BOOL empty_branch;
2268      if (GET(code, 1) == 0) return TRUE;    /* Hit unclosed bracket */      if (GET(code, 1) == 0) return TRUE;    /* Hit unclosed bracket */
# Line 2193  for (code = first_significant_code(code Line 2434  for (code = first_significant_code(code
2434      break;      break;
2435    
2436      case OP_THEN_ARG:      case OP_THEN_ARG:
2437      code += code[1+LINK_SIZE];      code += code[1];
2438      break;      break;
2439    
2440      /* None of the remaining opcodes are required to match a character. */      /* None of the remaining opcodes are required to match a character. */
# Line 2216  return TRUE; Line 2457  return TRUE;
2457  the current branch of the current pattern to see if it could match the empty  the current branch of the current pattern to see if it could match the empty
2458  string. If it could, we must look outwards for branches at other levels,  string. If it could, we must look outwards for branches at other levels,
2459  stopping when we pass beyond the bracket which is the subject of the recursion.  stopping when we pass beyond the bracket which is the subject of the recursion.
2460    This function is called only during the real compile, not during the
2461    pre-compile.
2462    
2463  Arguments:  Arguments:
2464    code        points to start of the recursion    code        points to start of the recursion
# Line 2228  Returns:      TRUE if what is matched co Line 2471  Returns:      TRUE if what is matched co
2471  */  */
2472    
2473  static BOOL  static BOOL
2474  could_be_empty(const uschar *code, const uschar *endcode, branch_chain *bcptr,  could_be_empty(const pcre_uchar *code, const pcre_uchar *endcode,
2475    BOOL utf8, compile_data *cd)    branch_chain *bcptr, BOOL utf8, compile_data *cd)
2476  {  {
2477  while (bcptr != NULL && bcptr->current_branch >= code)  while (bcptr != NULL && bcptr->current_branch >= code)
2478    {    {
# Line 2266  where Perl recognizes it as the POSIX cl Line 2509  where Perl recognizes it as the POSIX cl
2509  "l\ower". This is a lesser evil that not diagnosing bad classes when Perl does,  "l\ower". This is a lesser evil that not diagnosing bad classes when Perl does,
2510  I think.  I think.
2511    
2512    A user pointed out that PCRE was rejecting [:a[:digit:]] whereas Perl was not.
2513    It seems that the appearance of a nested POSIX class supersedes an apparent
2514    external class. For example, [:a[:digit:]b:] matches "a", "b", ":", or
2515    a digit.
2516    
2517    In Perl, unescaped square brackets may also appear as part of class names. For
2518    example, [:a[:abc]b:] gives unknown POSIX class "[:abc]b:]". However, for
2519    [:a[:abc]b][b:] it gives unknown POSIX class "[:abc]b][b:]", which does not
2520    seem right at all. PCRE does not allow closing square brackets in POSIX class
2521    names.
2522    
2523  Arguments:  Arguments:
2524    ptr      pointer to the initial [    ptr      pointer to the initial [
2525    endptr   where to return the end pointer    endptr   where to return the end pointer
# Line 2274  Returns:   TRUE or FALSE Line 2528  Returns:   TRUE or FALSE
2528  */  */
2529    
2530  static BOOL  static BOOL
2531  check_posix_syntax(const uschar *ptr, const uschar **endptr)  check_posix_syntax(const pcre_uchar *ptr, const pcre_uchar **endptr)
2532  {  {
2533  int terminator;          /* Don't combine these lines; the Solaris cc */  int terminator;          /* Don't combine these lines; the Solaris cc */
2534  terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */  terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */
2535  for (++ptr; *ptr != 0; ptr++)  for (++ptr; *ptr != 0; ptr++)
2536    {    {
2537    if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET) ptr++; else    if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
2538        ptr++;
2539      else if (*ptr == CHAR_RIGHT_SQUARE_BRACKET) return FALSE;
2540      else
2541      {      {
     if (*ptr == CHAR_RIGHT_SQUARE_BRACKET) return FALSE;  
2542      if (*ptr == terminator && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)      if (*ptr == terminator && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
2543        {        {
2544        *endptr = ptr;        *endptr = ptr;
2545        return TRUE;        return TRUE;
2546        }        }
2547        if (*ptr == CHAR_LEFT_SQUARE_BRACKET &&
2548             (ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT ||
2549              ptr[1] == CHAR_EQUALS_SIGN) &&
2550            check_posix_syntax(ptr, endptr))
2551          return FALSE;
2552      }      }
2553    }    }
2554  return FALSE;  return FALSE;
# Line 2311  Returns:     a value representing the na Line 2572  Returns:     a value representing the na
2572  */  */
2573    
2574  static int  static int
2575  check_posix_name(const uschar *ptr, int len)  check_posix_name(const pcre_uchar *ptr, int len)
2576  {  {
2577  const char *pn = posix_names;  const char *pn = posix_names;
2578  register int yield = 0;  register int yield = 0;
2579  while (posix_name_lengths[yield] != 0)  while (posix_name_lengths[yield] != 0)
2580    {    {
2581    if (len == posix_name_lengths[yield] &&    if (len == posix_name_lengths[yield] &&
2582      strncmp((const char *)ptr, pn, len) == 0) return yield;      STRNCMP_UC_C8(ptr, pn, len) == 0) return yield;
2583    pn += posix_name_lengths[yield] + 1;    pn += posix_name_lengths[yield] + 1;
2584    yield++;    yield++;
2585    }    }
# Line 2358  Returns:     nothing Line 2619  Returns:     nothing
2619  */  */
2620    
2621  static void  static void
2622  adjust_recurse(uschar *group, int adjust, BOOL utf8, compile_data *cd,  adjust_recurse(pcre_uchar *group, int adjust, BOOL utf8, compile_data *cd,
2623    uschar *save_hwm)    pcre_uchar *save_hwm)
2624  {  {
2625  uschar *ptr = group;  pcre_uchar *ptr = group;
2626    
2627  while ((ptr = (uschar *)find_recurse(ptr, utf8)) != NULL)  while ((ptr = (pcre_uchar *)find_recurse(ptr, utf8)) != NULL)
2628    {    {
2629    int offset;    int offset;
2630    uschar *hc;    pcre_uchar *hc;
2631    
2632    /* See if this recursion is on the forward reference list. If so, adjust the    /* See if this recursion is on the forward reference list. If so, adjust the
2633    reference. */    reference. */
# Line 2411  Arguments: Line 2672  Arguments:
2672  Returns:         new code pointer  Returns:         new code pointer
2673  */  */
2674    
2675  static uschar *  static pcre_uchar *
2676  auto_callout(uschar *code, const uschar *ptr, compile_data *cd)  auto_callout(pcre_uchar *code, const pcre_uchar *ptr, compile_data *cd)
2677  {  {
2678  *code++ = OP_CALLOUT;  *code++ = OP_CALLOUT;
2679  *code++ = 255;  *code++ = 255;
2680  PUT(code, 0, (int)(ptr - cd->start_pattern));  /* Pattern offset */  PUT(code, 0, (int)(ptr - cd->start_pattern));  /* Pattern offset */
2681  PUT(code, LINK_SIZE, 0);                       /* Default length */  PUT(code, LINK_SIZE, 0);                       /* Default length */
2682  return code + 2*LINK_SIZE;  return code + 2 * LINK_SIZE;
2683  }  }
2684    
2685    
# Line 2440  Returns:             nothing Line 2701  Returns:             nothing
2701  */  */
2702    
2703  static void  static void
2704  complete_callout(uschar *previous_callout, const uschar *ptr, compile_data *cd)  complete_callout(pcre_uchar *previous_callout, const pcre_uchar *ptr, compile_data *cd)
2705  {  {
2706  int length = (int)(ptr - cd->start_pattern - GET(previous_callout, 2));  int length = (int)(ptr - cd->start_pattern - GET(previous_callout, 2));
2707  PUT(previous_callout, 2 + LINK_SIZE, length);  PUT(previous_callout, 2 + LINK_SIZE, length);
# Line 2578  Returns:        TRUE if possessifying is Line 2839  Returns:        TRUE if possessifying is
2839  */  */
2840    
2841  static BOOL  static BOOL
2842  check_auto_possessive(const uschar *previous, BOOL utf8, const uschar *ptr,  check_auto_possessive(const pcre_uchar *previous, BOOL utf8,
2843    int options, compile_data *cd)    const pcre_uchar *ptr, int options, compile_data *cd)
2844  {  {
2845  int c, next;  int c, next;
2846  int op_code = *previous++;  int op_code = *previous++;
# Line 2654  if ((options & PCRE_EXTENDED) != 0) Line 2915  if ((options & PCRE_EXTENDED) != 0)
2915  /* If the next thing is itself optional, we have to give up. */  /* If the next thing is itself optional, we have to give up. */
2916    
2917  if (*ptr == CHAR_ASTERISK || *ptr == CHAR_QUESTION_MARK ||  if (*ptr == CHAR_ASTERISK || *ptr == CHAR_QUESTION_MARK ||
2918    strncmp((char *)ptr, STR_LEFT_CURLY_BRACKET STR_0 STR_COMMA, 3) == 0)    STRNCMP_UC_C8(ptr, STR_LEFT_CURLY_BRACKET STR_0 STR_COMMA, 3) == 0)
2919      return FALSE;      return FALSE;
2920    
2921  /* Now compare the next item with the previous opcode. First, handle cases when  /* Now compare the next item with the previous opcode. First, handle cases when
# Line 2916  switch(op_code) Line 3177  switch(op_code)
3177        to the original \d etc. At this point, ptr will point to a zero byte. */        to the original \d etc. At this point, ptr will point to a zero byte. */
3178    
3179        if (*ptr == CHAR_ASTERISK || *ptr == CHAR_QUESTION_MARK ||        if (*ptr == CHAR_ASTERISK || *ptr == CHAR_QUESTION_MARK ||
3180          strncmp((char *)ptr, STR_LEFT_CURLY_BRACKET STR_0 STR_COMMA, 3) == 0)          STRNCMP_UC_C8(ptr, STR_LEFT_CURLY_BRACKET STR_0 STR_COMMA, 3) == 0)
3181            return FALSE;            return FALSE;
3182    
3183        /* Do the property check. */        /* Do the property check. */
# Line 2997  Arguments: Line 3258  Arguments:
3258    firstbyteptr   set to initial literal character, or < 0 (REQ_UNSET, REQ_NONE)    firstbyteptr   set to initial literal character, or < 0 (REQ_UNSET, REQ_NONE)
3259    reqbyteptr     set to the last literal character required, else < 0    reqbyteptr     set to the last literal character required, else < 0
3260    bcptr          points to current branch chain    bcptr          points to current branch chain
3261      cond_depth     conditional nesting depth
3262    cd             contains pointers to tables etc.    cd             contains pointers to tables etc.
3263    lengthptr      NULL during the real compile phase    lengthptr      NULL during the real compile phase
3264                   points to length accumulator during pre-compile phase                   points to length accumulator during pre-compile phase
# Line 3006  Returns:         TRUE on success Line 3268  Returns:         TRUE on success
3268  */  */
3269    
3270  static BOOL  static BOOL
3271  compile_branch(int *optionsptr, uschar **codeptr, const uschar **ptrptr,  compile_branch(int *optionsptr, pcre_uchar **codeptr,
3272    int *errorcodeptr, int *firstbyteptr, int *reqbyteptr, branch_chain *bcptr,    const pcre_uchar **ptrptr, int *errorcodeptr, int *firstbyteptr,
3273    compile_data *cd, int *lengthptr)    int *reqbyteptr, branch_chain *bcptr, int cond_depth, compile_data *cd,
3274      int *lengthptr)
3275  {  {
3276  int repeat_type, op_type;  int repeat_type, op_type;
3277  int repeat_min = 0, repeat_max = 0;      /* To please picky compilers */  int repeat_min = 0, repeat_max = 0;      /* To please picky compilers */
# Line 3017  int greedy_default, greedy_non_default; Line 3280  int greedy_default, greedy_non_default;
3280  int firstbyte, reqbyte;  int firstbyte, reqbyte;
3281  int zeroreqbyte, zerofirstbyte;  int zeroreqbyte, zerofirstbyte;
3282  int req_caseopt, reqvary, tempreqvary;  int req_caseopt, reqvary, tempreqvary;
3283  int options = *optionsptr;  int options = *optionsptr;               /* May change dynamically */
3284  int after_manual_callout = 0;  int after_manual_callout = 0;
3285  int length_prevgroup = 0;  int length_prevgroup = 0;
3286  register int c;  register int c;
3287  register uschar *code = *codeptr;  register pcre_uchar *code = *codeptr;
3288  uschar *last_code = code;  pcre_uchar *last_code = code;
3289  uschar *orig_code = code;  pcre_uchar *orig_code = code;
3290  uschar *tempcode;  pcre_uchar *tempcode;
3291  BOOL inescq = FALSE;  BOOL inescq = FALSE;
3292  BOOL groupsetfirstbyte = FALSE;  BOOL groupsetfirstbyte = FALSE;
3293  const uschar *ptr = *ptrptr;  const pcre_uchar *ptr = *ptrptr;
3294  const uschar *tempptr;  const pcre_uchar *tempptr;
3295  const uschar *nestptr = NULL;  const pcre_uchar *nestptr = NULL;
3296  uschar *previous = NULL;  pcre_uchar *previous = NULL;
3297  uschar *previous_callout = NULL;  pcre_uchar *previous_callout = NULL;
3298  uschar *save_hwm = NULL;  pcre_uchar *save_hwm = NULL;
3299  uschar classbits[32];  pcre_uchar classbits[32];
3300    
3301    /* We can fish out the UTF-8 setting once and for all into a BOOL, but we
3302    must not do this for other options (e.g. PCRE_EXTENDED) because they may change
3303    dynamically as we process the pattern. */
3304    
3305  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
3306  BOOL class_utf8;  BOOL class_utf8;
3307  BOOL utf8 = (options & PCRE_UTF8) != 0;  BOOL utf8 = (options & PCRE_UTF8) != 0;
3308  uschar *class_utf8data;  pcre_uint8 *class_utf8data;
3309  uschar *class_utf8data_base;  pcre_uint8 *class_utf8data_base;
3310  uschar utf8_char[6];  pcre_uint8 utf8_char[6];
3311  #else  #else
3312  BOOL utf8 = FALSE;  BOOL utf8 = FALSE;
 uschar *utf8_char = NULL;  
3313  #endif  #endif
3314    
3315  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
# Line 3094  for (;; ptr++) Line 3360  for (;; ptr++)
3360    int subfirstbyte;    int subfirstbyte;
3361    int terminator;    int terminator;
3362    int mclength;    int mclength;
3363    uschar mcbuffer[8];    int tempbracount;
3364      pcre_uchar mcbuffer[8];
3365    
3366    /* Get next byte in the pattern */    /* Get next byte in the pattern */
3367    
# Line 3141  for (;; ptr++) Line 3408  for (;; ptr++)
3408        }        }
3409    
3410      *lengthptr += (int)(code - last_code);      *lengthptr += (int)(code - last_code);
3411      DPRINTF(("length=%d added %d c=%c\n", *lengthptr, code - last_code, c));      DPRINTF(("length=%d added %d c=%c\n", *lengthptr, (int)(code - last_code),
3412          c));
3413    
3414      /* If "previous" is set and it is not at the start of the work space, move      /* If "previous" is set and it is not at the start of the work space, move
3415      it back to there, in order to avoid filling up the work space. Otherwise,      it back to there, in order to avoid filling up the work space. Otherwise,
# Line 3215  for (;; ptr++) Line 3483  for (;; ptr++)
3483      previous_callout = NULL;      previous_callout = NULL;
3484      }      }
3485    
3486    /* In extended mode, skip white space and comments */    /* In extended mode, skip white space and comments. */
3487    
3488    if ((options & PCRE_EXTENDED) != 0)    if ((options & PCRE_EXTENDED) != 0)
3489      {      {
# Line 3349  for (;; ptr++) Line 3617  for (;; ptr++)
3617          {          {
3618          if (ptr[1] == CHAR_E)          if (ptr[1] == CHAR_E)
3619            ptr++;            ptr++;
3620          else if (strncmp((const char *)ptr+1,          else if (STRNCMP_UC_C8(ptr + 1,
3621                            STR_Q STR_BACKSLASH STR_E, 3) == 0)                            STR_Q STR_BACKSLASH STR_E, 3) == 0)
3622            ptr += 3;            ptr += 3;
3623          else          else
# Line 3392  for (;; ptr++) Line 3660  for (;; ptr++)
3660      than 256), because in that case the compiled code doesn't use the bit map.      than 256), because in that case the compiled code doesn't use the bit map.
3661      */      */
3662    
3663      memset(classbits, 0, 32 * sizeof(uschar));      memset(classbits, 0, 32 * sizeof(pcre_uint8));
3664    
3665  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
3666      class_utf8 = FALSE;                       /* No chars >= 256 */      class_utf8 = FALSE;                       /* No chars >= 256 */
# Line 3406  for (;; ptr++) Line 3674  for (;; ptr++)
3674    
3675      if (c != 0) do      if (c != 0) do
3676        {        {
3677        const uschar *oldptr;        const pcre_uchar *oldptr;
3678    
3679  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
3680        if (utf8 && c > 127)        if (utf8 && c > 127)
# Line 3452  for (;; ptr++) Line 3720  for (;; ptr++)
3720          {          {
3721          BOOL local_negate = FALSE;          BOOL local_negate = FALSE;
3722          int posix_class, taboffset, tabopt;          int posix_class, taboffset, tabopt;
3723          register const uschar *cbits = cd->cbits;          register const pcre_uint8 *cbits = cd->cbits;
3724          uschar pbits[32];          pcre_uint8 pbits[32];
3725    
3726          if (ptr[1] != CHAR_COLON)          if (ptr[1] != CHAR_COLON)
3727            {            {
# Line 3508  for (;; ptr++) Line 3776  for (;; ptr++)
3776          /* Copy in the first table (always present) */          /* Copy in the first table (always present) */
3777    
3778          memcpy(pbits, cbits + posix_class_maps[posix_class],          memcpy(pbits, cbits + posix_class_maps[posix_class],
3779            32 * sizeof(uschar));            32 * sizeof(pcre_uint8));
3780    
3781          /* If there is a second table, add or remove it as required. */          /* If there is a second table, add or remove it as required. */
3782    
# Line 3570  for (;; ptr++) Line 3838  for (;; ptr++)
3838    
3839          if (c < 0)          if (c < 0)
3840            {            {
3841            register const uschar *cbits = cd->cbits;            register const pcre_uint8 *cbits = cd->cbits;
3842            class_charcount += 2;     /* Greater than 1 is what matters */            class_charcount += 2;     /* Greater than 1 is what matters */
3843    
3844            switch (-c)            switch (-c)
# Line 4184  for (;; ptr++) Line 4452  for (;; ptr++)
4452      op_type = 0;                    /* Default single-char op codes */      op_type = 0;                    /* Default single-char op codes */
4453      possessive_quantifier = FALSE;  /* Default not possessive quantifier */      possessive_quantifier = FALSE;  /* Default not possessive quantifier */
4454    
4455      /* Save start of previous item, in case we have to move it up to make space      /* Save start of previous item, in case we have to move it up in order to
4456      for an inserted OP_ONCE for the additional '+' extension. */      insert something before it. */
4457    
4458      tempcode = previous;      tempcode = previous;
4459    
# Line 4208  for (;; ptr++) Line 4476  for (;; ptr++)
4476        }        }
4477      else repeat_type = greedy_default;      else repeat_type = greedy_default;
4478    
4479        /* If previous was a recursion call, wrap it in atomic brackets so that
4480        previous becomes the atomic group. All recursions were so wrapped in the
4481        past, but it no longer happens for non-repeated recursions. In fact, the
4482        repeated ones could be re-implemented independently so as not to need this,
4483        but for the moment we rely on the code for repeating groups. */
4484    
4485        if (*previous == OP_RECURSE)
4486          {
4487          memmove(previous + 1 + LINK_SIZE, previous, 1 + LINK_SIZE);
4488          *previous = OP_ONCE;
4489          PUT(previous, 1, 2 + 2*LINK_SIZE);
4490          previous[2 + 2*LINK_SIZE] = OP_KET;
4491          PUT(previous, 3 + 2*LINK_SIZE, 2 + 2*LINK_SIZE);
4492          code += 2 + 2 * LINK_SIZE;
4493          length_prevgroup = 3 + 3*LINK_SIZE;
4494    
4495          /* When actually compiling, we need to check whether this was a forward
4496          reference, and if so, adjust the offset. */
4497    
4498          if (lengthptr == NULL && cd->hwm >= cd->start_workspace + LINK_SIZE)
4499            {
4500            int offset = GET(cd->hwm, -LINK_SIZE);
4501            if (offset == previous + 1 - cd->start_code)
4502              PUT(cd->hwm, -LINK_SIZE, offset + 1 + LINK_SIZE);
4503            }
4504          }
4505    
4506        /* Now handle repetition for the different types of item. */
4507    
4508      /* If previous was a character match, abolish the item and generate a      /* If previous was a character match, abolish the item and generate a
4509      repeat item instead. If a char item has a minumum of more than one, ensure      repeat item instead. If a char item has a minumum of more than one, ensure
4510      that it is set in reqbyte - it might not be if a sequence such as x{3} is      that it is set in reqbyte - it might not be if a sequence such as x{3} is
# Line 4226  for (;; ptr++) Line 4523  for (;; ptr++)
4523  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
4524        if (utf8 && (code[-1] & 0x80) != 0)        if (utf8 && (code[-1] & 0x80) != 0)
4525          {          {
4526          uschar *lastchar = code - 1;          pcre_uchar *lastchar = code - 1;
4527          while((*lastchar & 0xc0) == 0x80) lastchar--;          while((*lastchar & 0xc0) == 0x80) lastchar--;
4528          c = code - lastchar;            /* Length of UTF-8 character */          c = code - lastchar;            /* Length of UTF-8 character */
4529          memcpy(utf8_char, lastchar, c); /* Save the char */          memcpy(utf8_char, lastchar, c); /* Save the char */
# Line 4288  for (;; ptr++) Line 4585  for (;; ptr++)
4585    
4586      else if (*previous < OP_EODN)      else if (*previous < OP_EODN)
4587        {        {
4588        uschar *oldcode;        pcre_uchar *oldcode;
4589        int prop_type, prop_value;        int prop_type, prop_value;
4590        op_type = OP_TYPESTAR - OP_STAR;  /* Use type opcodes */        op_type = OP_TYPESTAR - OP_STAR;  /* Use type opcodes */
4591        c = *previous;        c = *previous;
# Line 4499  for (;; ptr++) Line 4796  for (;; ptr++)
4796        }        }
4797    
4798      /* If previous was a bracket group, we may have to replicate it in certain      /* If previous was a bracket group, we may have to replicate it in certain
4799      cases. Note that at this point we can encounter only the "basic" BRA and      cases. Note that at this point we can encounter only the "basic" bracket
4800      KET opcodes, as this is the place where they get converted into the more      opcodes such as BRA and CBRA, as this is the place where they get converted
4801      special varieties. */      into the more special varieties such as BRAPOS and SBRA. A test for >=
4802        OP_ASSERT and <= OP_COND includes ASSERT, ASSERT_NOT, ASSERTBACK,
4803        ASSERTBACK_NOT, ONCE, BRA, CBRA, and COND. Originally, PCRE did not allow
4804        repetition of assertions, but now it does, for Perl compatibility. */
4805    
4806      else if (*previous == OP_BRA  || *previous == OP_CBRA ||      else if (*previous >= OP_ASSERT && *previous <= OP_COND)
              *previous == OP_ONCE || *previous == OP_COND)  
4807        {        {
4808        register int i;        register int i;
4809        int len = (int)(code - previous);        int len = (int)(code - previous);
4810        uschar *bralink = NULL;        pcre_uchar *bralink = NULL;
4811        uschar *brazeroptr = NULL;        pcre_uchar *brazeroptr = NULL;
4812    
4813        /* Repeating a DEFINE group is pointless */        /* Repeating a DEFINE group is pointless, but Perl allows the syntax, so
4814          we just ignore the repeat. */
4815    
4816        if (*previous == OP_COND && previous[LINK_SIZE+1] == OP_DEF)        if (*previous == OP_COND && previous[LINK_SIZE+1] == OP_DEF)
4817            goto END_REPEAT;
4818    
4819          /* There is no sense in actually repeating assertions. The only potential
4820          use of repetition is in cases when the assertion is optional. Therefore,
4821          if the minimum is greater than zero, just ignore the repeat. If the
4822          maximum is not not zero or one, set it to 1. */
4823    
4824          if (*previous < OP_ONCE)    /* Assertion */
4825          {          {
4826          *errorcodeptr = ERR55;          if (repeat_min > 0) goto END_REPEAT;
4827          goto FAILED;          if (repeat_max < 0 || repeat_max > 1) repeat_max = 1;
4828          }          }
4829    
4830        /* The case of a zero minimum is special because of the need to stick        /* The case of a zero minimum is special because of the need to stick
# Line 4537  for (;; ptr++) Line 4845  for (;; ptr++)
4845          **   goto END_REPEAT;          **   goto END_REPEAT;
4846          **   }          **   }
4847    
4848          However, that fails when a group is referenced as a subroutine from          However, that fails when a group or a subgroup within it is referenced
4849          elsewhere in the pattern, so now we stick in OP_SKIPZERO in front of it          as a subroutine from elsewhere in the pattern, so now we stick in
4850          so that it is skipped on execution. As we don't have a list of which          OP_SKIPZERO in front of it so that it is skipped on execution. As we
4851          groups are referenced, we cannot do this selectively.          don't have a list of which groups are referenced, we cannot do this
4852            selectively.
4853    
4854          If the maximum is 1 or unlimited, we just have to stick in the BRAZERO          If the maximum is 1 or unlimited, we just have to stick in the BRAZERO
4855          and do no more at this point. However, we do need to adjust any          and do no more at this point. However, we do need to adjust any
# Line 4630  for (;; ptr++) Line 4939  for (;; ptr++)
4939              if (groupsetfirstbyte && reqbyte < 0) reqbyte = firstbyte;              if (groupsetfirstbyte && reqbyte < 0) reqbyte = firstbyte;
4940              for (i = 1; i < repeat_min; i++)              for (i = 1; i < repeat_min; i++)
4941                {                {
4942                uschar *hc;                pcre_uchar *hc;
4943                uschar *this_hwm = cd->hwm;                pcre_uchar *this_hwm = cd->hwm;
4944                memcpy(code, previous, len);                memcpy(code, previous, len);
4945                for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE)                for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE)
4946                  {                  {
# Line 4682  for (;; ptr++) Line 4991  for (;; ptr++)
4991    
4992          else for (i = repeat_max - 1; i >= 0; i--)          else for (i = repeat_max - 1; i >= 0; i--)
4993            {            {
4994            uschar *hc;            pcre_uchar *hc;
4995            uschar *this_hwm = cd->hwm;            pcre_uchar *this_hwm = cd->hwm;
4996    
4997            *code++ = OP_BRAZERO + repeat_type;            *code++ = OP_BRAZERO + repeat_type;
4998    
# Line 4716  for (;; ptr++) Line 5025  for (;; ptr++)
5025            {            {
5026            int oldlinkoffset;            int oldlinkoffset;
5027            int offset = (int)(code - bralink + 1);            int offset = (int)(code - bralink + 1);
5028            uschar *bra = code - offset;            pcre_uchar *bra = code - offset;
5029            oldlinkoffset = GET(bra, 1);            oldlinkoffset = GET(bra, 1);
5030            bralink = (oldlinkoffset == 0)? NULL : bralink - oldlinkoffset;            bralink = (oldlinkoffset == 0)? NULL : bralink - oldlinkoffset;
5031            *code++ = OP_KET;            *code++ = OP_KET;
# Line 4726  for (;; ptr++) Line 5035  for (;; ptr++)
5035          }          }
5036    
5037        /* If the maximum is unlimited, set a repeater in the final copy. For        /* If the maximum is unlimited, set a repeater in the final copy. For
5038        ONCE brackets, that's all we need to do.        ONCE brackets, that's all we need to do. However, possessively repeated
5039          ONCE brackets can be converted into non-capturing brackets, as the
5040        Otherwise, if the quantifier was possessive, we convert the BRA code to        behaviour of (?:xx)++ is the same as (?>xx)++ and this saves having to
5041        the POS form, and the KET code to KETRPOS. (It turns out to be convenient        deal with possessive ONCEs specially.
5042        at runtime to detect this kind of subpattern at both the start and at the  
5043        end.) The use of special opcodes makes it possible to reduce greatly the        Otherwise, when we are doing the actual compile phase, check to see
5044        stack usage in pcre_exec(). If the group is preceded by OP_BRAZERO,        whether this group is one that could match an empty string. If so,
5045        convert this to OP_BRAPOSZERO. Then cancel the possessive flag so that        convert the initial operator to the S form (e.g. OP_BRA -> OP_SBRA) so
5046        the default action below, of wrapping everything inside atomic brackets,        that runtime checking can be done. [This check is also applied to ONCE
5047        does not happen.        groups at runtime, but in a different way.]
5048    
5049        Then, when we are doing the actual compile phase, check to see whether        Then, if the quantifier was possessive and the bracket is not a
5050        this group is one that could match an empty string. If so, convert the        conditional, we convert the BRA code to the POS form, and the KET code to
5051        initial operator to the S form (e.g. OP_BRA -> OP_SBRA) so that runtime        KETRPOS. (It turns out to be convenient at runtime to detect this kind of
5052        checking can be done. [This check is also applied to ONCE groups at        subpattern at both the start and at the end.) The use of special opcodes
5053        runtime, but in a different way.] */        makes it possible to reduce greatly the stack usage in pcre_exec(). If
5054          the group is preceded by OP_BRAZERO, convert this to OP_BRAPOSZERO.
5055    
5056          Then, if the minimum number of matches is 1 or 0, cancel the possessive
5057          flag so that the default action below, of wrapping everything inside
5058          atomic brackets, does not happen. When the minimum is greater than 1,
5059          there will be earlier copies of the group, and so we still have to wrap
5060          the whole thing. */
5061    
5062        else        else
5063          {          {
5064          uschar *ketcode = code - 1 - LINK_SIZE;          pcre_uchar *ketcode = code - 1 - LINK_SIZE;
5065          uschar *bracode = ketcode - GET(ketcode, 1);          pcre_uchar *bracode = ketcode - GET(ketcode, 1);
5066    
5067          if (*bracode == OP_ONCE)          /* Convert possessive ONCE brackets to non-capturing */
5068    
5069            if ((*bracode == OP_ONCE || *bracode == OP_ONCE_NC) &&
5070                possessive_quantifier) *bracode = OP_BRA;
5071    
5072            /* For non-possessive ONCE brackets, all we need to do is to
5073            set the KET. */
5074    
5075            if (*bracode == OP_ONCE || *bracode == OP_ONCE_NC)
5076            *ketcode = OP_KETRMAX + repeat_type;            *ketcode = OP_KETRMAX + repeat_type;
5077    
5078            /* Handle non-ONCE brackets and possessive ONCEs (which have been
5079            converted to non-capturing above). */
5080    
5081          else          else
5082            {            {
5083            if (possessive_quantifier)            /* In the compile phase, check for empty string matching. */
5084              {  
             *bracode += 1;                   /* Switch to xxxPOS opcodes */  
             *ketcode = OP_KETRPOS;  
             if (brazeroptr != NULL) *brazeroptr = OP_BRAPOSZERO;  
             possessive_quantifier = FALSE;  
             }  
           else *ketcode = OP_KETRMAX + repeat_type;  
   
5085            if (lengthptr == NULL)            if (lengthptr == NULL)
5086              {              {
5087              uschar *scode = bracode;              pcre_uchar *scode = bracode;
5088              do              do
5089                {                {
5090                if (could_be_empty_branch(scode, ketcode, utf8, cd))                if (could_be_empty_branch(scode, ketcode, utf8, cd))
# Line 4775  for (;; ptr++) Line 5096  for (;; ptr++)
5096                }                }
5097              while (*scode == OP_ALT);              while (*scode == OP_ALT);
5098              }              }
5099    
5100              /* Handle possessive quantifiers. */
5101    
5102              if (possessive_quantifier)
5103                {
5104                /* For COND brackets, we wrap the whole thing in a possessively
5105                repeated non-capturing bracket, because we have not invented POS
5106                versions of the COND opcodes. Because we are moving code along, we
5107                must ensure that any pending recursive references are updated. */
5108    
5109                if (*bracode == OP_COND || *bracode == OP_SCOND)
5110                  {
5111                  int nlen = (int)(code - bracode);
5112                  *code = OP_END;
5113                  adjust_recurse(bracode, 1 + LINK_SIZE, utf8, cd, save_hwm);
5114                  memmove(bracode + 1+LINK_SIZE, bracode, nlen);
5115                  code += 1 + LINK_SIZE;
5116                  nlen += 1 + LINK_SIZE;
5117                  *bracode = OP_BRAPOS;
5118                  *code++ = OP_KETRPOS;
5119                  PUTINC(code, 0, nlen);
5120                  PUT(bracode, 1, nlen);
5121                  }
5122    
5123                /* For non-COND brackets, we modify the BRA code and use KETRPOS. */
5124    
5125                else
5126                  {
5127                  *bracode += 1;              /* Switch to xxxPOS opcodes */
5128                  *ketcode = OP_KETRPOS;
5129                  }
5130    
5131                /* If the minimum is zero, mark it as possessive, then unset the
5132                possessive flag when the minimum is 0 or 1. */
5133    
5134                if (brazeroptr != NULL) *brazeroptr = OP_BRAPOSZERO;
5135                if (repeat_min < 2) possessive_quantifier = FALSE;
5136                }
5137    
5138              /* Non-possessive quantifier */
5139    
5140              else *ketcode = OP_KETRMAX + repeat_type;
5141            }            }
5142          }          }
5143        }        }
# Line 4799  for (;; ptr++) Line 5162  for (;; ptr++)
5162      there are special alternative opcodes for this case. For anything else, we      there are special alternative opcodes for this case. For anything else, we
5163      wrap the entire repeated item inside OP_ONCE brackets. Logically, the '+'      wrap the entire repeated item inside OP_ONCE brackets. Logically, the '+'
5164      notation is just syntactic sugar, taken from Sun's Java package, but the      notation is just syntactic sugar, taken from Sun's Java package, but the
5165      special opcodes can optimize it.      special opcodes can optimize it.
5166    
5167      Possessively repeated subpatterns have already been handled in the code      Some (but not all) possessively repeated subpatterns have already been
5168      just above, so possessive_quantifier is always FALSE for them at this      completely handled in the code just above. For them, possessive_quantifier
5169      stage.      is always FALSE at this stage.
5170    
5171      Note that the repeated item starts at tempcode, not at previous, which      Note that the repeated item starts at tempcode, not at previous, which
5172      might be the first part of a string whose (former) last char we repeated.      might be the first part of a string whose (former) last char we repeated.
5173    
# Line 4904  for (;; ptr++) Line 5267  for (;; ptr++)
5267        int i, namelen;        int i, namelen;
5268        int arglen = 0;        int arglen = 0;
5269        const char *vn = verbnames;        const char *vn = verbnames;
5270        const uschar *name = ptr + 1;        const pcre_uchar *name = ptr + 1;
5271        const uschar *arg = NULL;        const pcre_uchar *arg = NULL;
5272        previous = NULL;        previous = NULL;
5273        while ((cd->ctypes[*++ptr] & ctype_letter) != 0) {};        while ((cd->ctypes[*++ptr] & ctype_letter) != 0) {};
5274        namelen = (int)(ptr - name);        namelen = (int)(ptr - name);
5275    
5276          /* It appears that Perl allows any characters whatsoever, other than
5277          a closing parenthesis, to appear in arguments, so we no longer insist on
5278          letters, digits, and underscores. */
5279    
5280        if (*ptr == CHAR_COLON)        if (*ptr == CHAR_COLON)
5281          {          {
5282          arg = ++ptr;          arg = ++ptr;
5283          while ((cd->ctypes[*ptr] & (ctype_letter|ctype_digit)) != 0          while (*ptr != 0 && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
           || *ptr == '_') ptr++;  
5284          arglen = (int)(ptr - arg);          arglen = (int)(ptr - arg);
5285          }          }
5286    
# Line 4929  for (;; ptr++) Line 5295  for (;; ptr++)
5295        for (i = 0; i < verbcount; i++)        for (i = 0; i < verbcount; i++)
5296          {          {
5297          if (namelen == verbs[i].len &&          if (namelen == verbs[i].len &&
5298              strncmp((char *)name, vn, namelen) == 0)              STRNCMP_UC_C8(name, vn, namelen) == 0)
5299            {            {
5300            /* Check for open captures before ACCEPT and convert it to            /* Check for open captures before ACCEPT and convert it to
5301            ASSERT_ACCEPT if in an assertion. */            ASSERT_ACCEPT if in an assertion. */
5302    
5303            if (verbs[i].op == OP_ACCEPT)            if (verbs[i].op == OP_ACCEPT)
# Line 4941  for (;; ptr++) Line 5307  for (;; ptr++)
5307                {                {
5308                *errorcodeptr = ERR59;                *errorcodeptr = ERR59;
5309                goto FAILED;                goto FAILED;
5310                }                }
5311              cd->had_accept = TRUE;              cd->had_accept = TRUE;
5312              for (oc = cd->open_caps; oc != NULL; oc = oc->next)              for (oc = cd->open_caps; oc != NULL; oc = oc->next)
5313                {                {
# Line 4949  for (;; ptr++) Line 5315  for (;; ptr++)
5315                PUT2INC(code, 0, oc->number);                PUT2INC(code, 0, oc->number);
5316                }                }
5317              *code++ = (cd->assert_depth > 0)? OP_ASSERT_ACCEPT : OP_ACCEPT;              *code++ = (cd->assert_depth > 0)? OP_ASSERT_ACCEPT : OP_ACCEPT;
5318    
5319                /* Do not set firstbyte after *ACCEPT */
5320                if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
5321              }              }
5322    
5323            /* Handle other cases with/without an argument */            /* Handle other cases with/without an argument */
# Line 4961  for (;; ptr++) Line 5330  for (;; ptr++)
5330                goto FAILED;                goto FAILED;
5331                }                }
5332              *code = verbs[i].op;              *code = verbs[i].op;
5333              if (*code++ == OP_THEN)              if (*code++ == OP_THEN) cd->external_flags |= PCRE_HASTHEN;
               {  
               PUT(code, 0, code - bcptr->current_branch - 1);  
               code += LINK_SIZE;  
               }  
5334              }              }
5335    
5336            else            else
# Line 4976  for (;; ptr++) Line 5341  for (;; ptr++)
5341                goto FAILED;                goto FAILED;
5342                }                }
5343              *code = verbs[i].op_arg;              *code = verbs[i].op_arg;
5344              if (*code++ == OP_THEN_ARG)              if (*code++ == OP_THEN_ARG) cd->external_flags |= PCRE_HASTHEN;
               {  
               PUT(code, 0, code - bcptr->current_branch - 1);  
               code += LINK_SIZE;  
               }  
5345              *code++ = arglen;              *code++ = arglen;
5346              memcpy(code, arg, arglen);              memcpy(code, arg, arglen);
5347              code += arglen;              code += arglen;
# Line 5005  for (;; ptr++) Line 5366  for (;; ptr++)
5366        {        {
5367        int i, set, unset, namelen;        int i, set, unset, namelen;
5368        int *optset;        int *optset;
5369        const uschar *name;        const pcre_uchar *name;
5370        uschar *slot;        pcre_uchar *slot;
5371    
5372        switch (*(++ptr))        switch (*(++ptr))
5373          {          {
# Line 5158  for (;; ptr++) Line 5519  for (;; ptr++)
5519          slot = cd->name_table;          slot = cd->name_table;
5520          for (i = 0; i < cd->names_found; i++)          for (i = 0; i < cd->names_found; i++)
5521            {            {
5522            if (strncmp((char *)name, (char *)slot+2, namelen) == 0) break;            if (STRNCMP_UC_UC(name, slot+IMM2_SIZE, namelen) == 0) break;
5523            slot += cd->name_entry_size;            slot += cd->name_entry_size;
5524            }            }
5525    
# Line 5215  for (;; ptr++) Line 5576  for (;; ptr++)
5576          /* Similarly, check for the (?(DEFINE) "condition", which is always          /* Similarly, check for the (?(DEFINE) "condition", which is always
5577          false. */          false. */
5578    
5579          else if (namelen == 6 && strncmp((char *)name, STRING_DEFINE, 6) == 0)          else if (namelen == 6 && STRNCMP_UC_C8(name, STRING_DEFINE, 6) == 0)
5580            {            {
5581            code[1+LINK_SIZE] = OP_DEF;            code[1+LINK_SIZE] = OP_DEF;
5582            skipbytes = 1;            skipbytes = 1;
# Line 5242  for (;; ptr++) Line 5603  for (;; ptr++)
5603          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
5604          case CHAR_EQUALS_SIGN:                 /* Positive lookahead */          case CHAR_EQUALS_SIGN:                 /* Positive lookahead */
5605          bravalue = OP_ASSERT;          bravalue = OP_ASSERT;
5606          cd->assert_depth += 1;          cd->assert_depth += 1;
5607          ptr++;          ptr++;
5608          break;          break;
5609    
# Line 5257  for (;; ptr++) Line 5618  for (;; ptr++)
5618            continue;            continue;
5619            }            }
5620          bravalue = OP_ASSERT_NOT;          bravalue = OP_ASSERT_NOT;
5621          cd->assert_depth += 1;          cd->assert_depth += 1;
5622          break;          break;
5623    
5624    
# Line 5267  for (;; ptr++) Line 5628  for (;; ptr++)
5628            {            {
5629            case CHAR_EQUALS_SIGN:               /* Positive lookbehind */            case CHAR_EQUALS_SIGN:               /* Positive lookbehind */
5630            bravalue = OP_ASSERTBACK;            bravalue = OP_ASSERTBACK;
5631            cd->assert_depth += 1;            cd->assert_depth += 1;
5632            ptr += 2;            ptr += 2;
5633            break;            break;
5634    
5635            case CHAR_EXCLAMATION_MARK:          /* Negative lookbehind */            case CHAR_EXCLAMATION_MARK:          /* Negative lookbehind */
5636            bravalue = OP_ASSERTBACK_NOT;            bravalue = OP_ASSERTBACK_NOT;
5637            cd->assert_depth += 1;            cd->assert_depth += 1;
5638            ptr += 2;            ptr += 2;
5639            break;            break;
5640    
# Line 5295  for (;; ptr++) Line 5656  for (;; ptr++)
5656    
5657          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
5658          case CHAR_C:                 /* Callout - may be followed by digits; */          case CHAR_C:                 /* Callout - may be followed by digits; */
5659          previous_callout = code;  /* Save for later completion */          previous_callout = code;     /* Save for later completion */
5660          after_manual_callout = 1; /* Skip one item before completing */          after_manual_callout = 1;    /* Skip one item before completing */
5661          *code++ = OP_CALLOUT;          *code++ = OP_CALLOUT;
5662            {            {
5663            int n = 0;            int n = 0;
# Line 5432  for (;; ptr++) Line 5793  for (;; ptr++)
5793    
5794              if (!dupname)              if (!dupname)
5795                {                {
5796                uschar *cslot = cd->name_table;                pcre_uchar *cslot = cd->name_table;
5797                for (i = 0; i < cd->names_found; i++)                for (i = 0; i < cd->names_found; i++)
5798                  {                  {
5799                  if (cslot != slot)                  if (cslot != slot)
# Line 5488  for (;; ptr++) Line 5849  for (;; ptr++)
5849    
5850          if (lengthptr != NULL)          if (lengthptr != NULL)
5851            {            {
5852            const uschar *temp;            const pcre_uchar *temp;
5853    
5854            if (namelen == 0)            if (namelen == 0)
5855              {              {
# Line 5533  for (;; ptr++) Line 5894  for (;; ptr++)
5894            slot = cd->name_table;            slot = cd->name_table;
5895            for (i = 0; i < cd->names_found; i++)            for (i = 0; i < cd->names_found; i++)
5896              {              {
5897              if (strncmp((char *)name, (char *)slot+2, namelen) == 0 &&              if (STRNCMP_UC_UC(name, slot+IMM2_SIZE, namelen) == 0 &&
5898                  slot[2+namelen] == 0)                  slot[2+namelen] == 0)
5899                break;                break;
5900              slot += cd->name_entry_size;              slot += cd->name_entry_size;
# Line 5570  for (;; ptr++) Line 5931  for (;; ptr++)
5931          case CHAR_0: case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4:          case CHAR_0: case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4:
5932          case CHAR_5: case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9:          case CHAR_5: case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9:
5933            {            {
5934            const uschar *called;            const pcre_uchar *called;
5935            terminator = CHAR_RIGHT_PARENTHESIS;            terminator = CHAR_RIGHT_PARENTHESIS;
5936    
5937            /* Come here from the \g<...> and \g'...' code (Oniguruma            /* Come here from the \g<...> and \g'...' code (Oniguruma
# Line 5664  for (;; ptr++) Line 6025  for (;; ptr++)
6025    
6026                /* Fudge the value of "called" so that when it is inserted as an                /* Fudge the value of "called" so that when it is inserted as an
6027                offset below, what it actually inserted is the reference number                offset below, what it actually inserted is the reference number
6028                of the group. */                of the group. Then remember the forward reference. */
6029    
6030                called = cd->start_code + recno;                called = cd->start_code + recno;
6031                PUTINC(cd->hwm, 0, (int)(code + 2 + LINK_SIZE - cd->start_code));                PUTINC(cd->hwm, 0, (int)(code + 1 - cd->start_code));
6032                }                }
6033    
6034              /* If not a forward reference, and the subpattern is still open,              /* If not a forward reference, and the subpattern is still open,
6035              this is a recursive call. We check to see if this is a left              this is a recursive call. We check to see if this is a left
6036              recursion that could loop for ever, and diagnose that case. */              recursion that could loop for ever, and diagnose that case. We
6037                must not, however, do this check if we are in a conditional
6038                subpattern because the condition might be testing for recursion in
6039                a pattern such as /(?(R)a+|(?R)b)/, which is perfectly valid.
6040                Forever loops are also detected at runtime, so those that occur in
6041                conditional subpatterns will be picked up then. */
6042    
6043              else if (GET(called, 1) == 0 &&              else if (GET(called, 1) == 0 && cond_depth <= 0 &&
6044                       could_be_empty(called, code, bcptr, utf8, cd))                       could_be_empty(called, code, bcptr, utf8, cd))
6045                {                {
6046                *errorcodeptr = ERR40;                *errorcodeptr = ERR40;
# Line 5682  for (;; ptr++) Line 6048  for (;; ptr++)
6048                }                }
6049              }              }
6050    
6051            /* Insert the recursion/subroutine item, automatically wrapped inside            /* Insert the recursion/subroutine item. */
           "once" brackets. Set up a "previous group" length so that a  
           subsequent quantifier will work. */  
   
           *code = OP_ONCE;  
           PUT(code, 1, 2 + 2*LINK_SIZE);  
           code += 1 + LINK_SIZE;  
6052    
6053            *code = OP_RECURSE;            *code = OP_RECURSE;
6054            PUT(code, 1, (int)(called - cd->start_code));            PUT(code, 1, (int)(called - cd->start_code));
6055            code += 1 + LINK_SIZE;            code += 1 + LINK_SIZE;
   
           *code = OP_KET;  
           PUT(code, 1, 2 + 2*LINK_SIZE);  
           code += 1 + LINK_SIZE;  
   
           length_prevgroup = 3 + 3*LINK_SIZE;  
6056            }            }
6057    
6058          /* Can't determine a first byte now */          /* Can't determine a first byte now */
# Line 5813  for (;; ptr++) Line 6167  for (;; ptr++)
6167        skipbytes = 2;        skipbytes = 2;
6168        }        }
6169    
6170      /* Process nested bracketed regex. Assertions may not be repeated, but      /* Process nested bracketed regex. Assertions used not to be repeatable,
6171      other kinds can be. All their opcodes are >= OP_ONCE. We copy code into a      but this was changed for Perl compatibility, so all kinds can now be
6172      non-register variable (tempcode) in order to be able to pass its address      repeated. We copy code into a non-register variable (tempcode) in order to
6173      because some compilers complain otherwise. */      be able to pass its address because some compilers complain otherwise. */
6174    
6175      previous = (bravalue >= OP_ONCE)? code : NULL;      previous = code;                      /* For handling repetition */
6176      *code = bravalue;      *code = bravalue;
6177      tempcode = code;      tempcode = code;
6178      tempreqvary = cd->req_varyopt;     /* Save value before bracket */      tempreqvary = cd->req_varyopt;        /* Save value before bracket */
6179      length_prevgroup = 0;              /* Initialize for pre-compile phase */      tempbracount = cd->bracount;          /* Save value before bracket */
6180        length_prevgroup = 0;                 /* Initialize for pre-compile phase */
6181    
6182      if (!compile_regex(      if (!compile_regex(
6183           newoptions,                   /* The complete new option state */           newoptions,                      /* The complete new option state */
6184           &tempcode,                    /* Where to put code (updated) */           &tempcode,                       /* Where to put code (updated) */
6185           &ptr,                         /* Input pointer (updated) */           &ptr,                            /* Input pointer (updated) */
6186           errorcodeptr,                 /* Where to put an error message */           errorcodeptr,                    /* Where to put an error message */
6187           (bravalue == OP_ASSERTBACK ||           (bravalue == OP_ASSERTBACK ||
6188            bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */            bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */
6189           reset_bracount,               /* True if (?| group */           reset_bracount,                  /* True if (?| group */
6190           skipbytes,                    /* Skip over bracket number */           skipbytes,                       /* Skip over bracket number */
6191           &subfirstbyte,                /* For possible first char */           cond_depth +
6192           &subreqbyte,                  /* For possible last char */             ((bravalue == OP_COND)?1:0),   /* Depth of condition subpatterns */
6193           bcptr,                        /* Current branch chain */           &subfirstbyte,                   /* For possible first char */
6194           cd,                           /* Tables block */           &subreqbyte,                     /* For possible last char */
6195           (lengthptr == NULL)? NULL :   /* Actual compile phase */           bcptr,                           /* Current branch chain */
6196             &length_prevgroup           /* Pre-compile phase */           cd,                              /* Tables block */
6197             (lengthptr == NULL)? NULL :      /* Actual compile phase */
6198               &length_prevgroup              /* Pre-compile phase */
6199           ))           ))
6200        goto FAILED;        goto FAILED;
6201    
6202        /* If this was an atomic group and there are no capturing groups within it,
6203        generate OP_ONCE_NC instead of OP_ONCE. */
6204    
6205        if (bravalue == OP_ONCE && cd->bracount <= tempbracount)
6206          *code = OP_ONCE_NC;
6207    
6208      if (bravalue >= OP_ASSERT && bravalue <= OP_ASSERTBACK_NOT)      if (bravalue >= OP_ASSERT && bravalue <= OP_ASSERTBACK_NOT)
6209        cd->assert_depth -= 1;        cd->assert_depth -= 1;
6210    
6211      /* At the end of compiling, code is still pointing to the start of the      /* At the end of compiling, code is still pointing to the start of the
6212      group, while tempcode has been updated to point past the end of the group      group, while tempcode has been updated to point past the end of the group.
6213      and any option resetting that may follow it. The pattern pointer (ptr)      The pattern pointer (ptr) is on the bracket.
     is on the bracket. */  
6214    
6215      /* If this is a conditional bracket, check that there are no more than      If this is a conditional bracket, check that there are no more than
6216      two branches in the group, or just one if it's a DEFINE group. We do this      two branches in the group, or just one if it's a DEFINE group. We do this
6217      in the real compile phase, not in the pre-pass, where the whole group may      in the real compile phase, not in the pre-pass, where the whole group may
6218      not be available. */      not be available. */
6219    
6220      if (bravalue == OP_COND && lengthptr == NULL)      if (bravalue == OP_COND && lengthptr == NULL)
6221        {        {
6222        uschar *tc = code;        pcre_uchar *tc = code;
6223        int condcount = 0;        int condcount = 0;
6224    
6225        do {        do {
# Line 6032  for (;; ptr++) Line 6394  for (;; ptr++)
6394    
6395        if (-c == ESC_g)        if (-c == ESC_g)
6396          {          {
6397          const uschar *p;          const pcre_uchar *p;
6398          save_hwm = cd->hwm;   /* Normally this is set when '(' is read */          save_hwm = cd->hwm;   /* Normally this is set when '(' is read */
6399          terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?          terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?
6400            CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;            CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;
# Line 6083  for (;; ptr++) Line 6445  for (;; ptr++)
6445          }          }
6446    
6447        /* \k<name> or \k'name' is a back reference by name (Perl syntax).        /* \k<name> or \k'name' is a back reference by name (Perl syntax).
6448        We also support \k{name} (.NET syntax) */        We also support \k{name} (.NET syntax).  */
6449    
6450        if (-c == ESC_k && (ptr[1] == CHAR_LESS_THAN_SIGN ||        if (-c == ESC_k)
           ptr[1] == CHAR_APOSTROPHE || ptr[1] == CHAR_LEFT_CURLY_BRACKET))  
6451          {          {
6452            if ((ptr[1] != CHAR_LESS_THAN_SIGN &&
6453              ptr[1] != CHAR_APOSTROPHE && ptr[1] != CHAR_LEFT_CURLY_BRACKET))
6454              {
6455              *errorcodeptr = ERR69;
6456              break;
6457              }
6458          is_recurse = FALSE;          is_recurse = FALSE;
6459          terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?          terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?
6460            CHAR_GREATER_THAN_SIGN : (*ptr == CHAR_APOSTROPHE)?            CHAR_GREATER_THAN_SIGN : (*ptr == CHAR_APOSTROPHE)?
# Line 6167  for (;; ptr++) Line 6534  for (;; ptr++)
6534            }            }
6535          else          else
6536  #endif  #endif
6537            {          /* In non-UTF-8 mode, we turn \C into OP_ALLANY instead of OP_ANYBYTE
6538            so that it works in DFA mode and in lookbehinds. */
6539    
6540              {
6541            previous = (-c > ESC_b && -c < ESC_Z)? code : NULL;            previous = (-c > ESC_b && -c < ESC_Z)? code : NULL;
6542            *code++ = -c;            *code++ = (!utf8 && c == -ESC_C)? OP_ALLANY : -c;
6543            }            }
6544          }          }
6545        continue;        continue;
# Line 6244  for (;; ptr++) Line 6614  for (;; ptr++)
6614        else firstbyte = reqbyte = REQ_NONE;        else firstbyte = reqbyte = REQ_NONE;
6615        }        }
6616    
6617      /* firstbyte was previously set; we can set reqbyte only the length is      /* firstbyte was previously set; we can set reqbyte only if the length is
6618      1 or the matching is caseful. */      1 or the matching is caseful. */
6619    
6620      else      else
# Line 6291  Arguments: Line 6661  Arguments:
6661    lookbehind     TRUE if this is a lookbehind assertion    lookbehind     TRUE if this is a lookbehind assertion
6662    reset_bracount TRUE to reset the count for each branch    reset_bracount TRUE to reset the count for each branch
6663    skipbytes      skip this many bytes at start (for brackets and OP_COND)    skipbytes      skip this many bytes at start (for brackets and OP_COND)
6664      cond_depth     depth of nesting for conditional subpatterns
6665    firstbyteptr   place to put the first required character, or a negative number    firstbyteptr   place to put the first required character, or a negative number
6666    reqbyteptr     place to put the last required character, or a negative number    reqbyteptr     place to put the last required character, or a negative number
6667    bcptr          pointer to the chain of currently open branches    bcptr          pointer to the chain of currently open branches
# Line 6302  Returns:         TRUE on success Line 6673  Returns:         TRUE on success
6673  */  */
6674    
6675  static BOOL  static BOOL
6676  compile_regex(int options, uschar **codeptr, const uschar **ptrptr,  compile_regex(int options, pcre_uchar **codeptr, const pcre_uchar **ptrptr,
6677    int *errorcodeptr, BOOL lookbehind, BOOL reset_bracount, int skipbytes,    int *errorcodeptr, BOOL lookbehind, BOOL reset_bracount, int skipbytes,
6678    int *firstbyteptr, int *reqbyteptr, branch_chain *bcptr, compile_data *cd,    int cond_depth, int *firstbyteptr, int *reqbyteptr, branch_chain *bcptr,
6679    int *lengthptr)    compile_data *cd, int *lengthptr)
6680  {  {
6681  const uschar *ptr = *ptrptr;  const pcre_uchar *ptr = *ptrptr;
6682  uschar *code = *codeptr;  pcre_uchar *code = *codeptr;
6683  uschar *last_branch = code;  pcre_uchar *last_branch = code;
6684  uschar *start_bracket = code;  pcre_uchar *start_bracket = code;
6685  uschar *reverse_count = NULL;  pcre_uchar *reverse_count = NULL;
6686  open_capitem capitem;  open_capitem capitem;
6687  int capnumber = 0;  int capnumber = 0;
6688  int firstbyte, reqbyte;  int firstbyte, reqbyte;
# Line 6342  pre-compile phase to find out whether an Line 6713  pre-compile phase to find out whether an
6713    
6714  /* If this is a capturing subpattern, add to the chain of open capturing items  /* If this is a capturing subpattern, add to the chain of open capturing items
6715  so that we can detect them if (*ACCEPT) is encountered. This is also used to  so that we can detect them if (*ACCEPT) is encountered. This is also used to
6716  detect groups that contain recursive back references to themselves. Note that  detect groups that contain recursive back references to themselves. Note that
6717  only OP_CBRA need be tested here; changing this opcode to one of its variants,  only OP_CBRA need be tested here; changing this opcode to one of its variants,
6718  e.g. OP_SCBRAPOS, happens later, after the group has been compiled. */  e.g. OP_SCBRAPOS, happens later, after the group has been compiled. */
6719    
6720  if (*code == OP_CBRA)  if (*code == OP_CBRA)
# Line 6384  for (;;) Line 6755  for (;;)
6755    into the length. */    into the length. */
6756    
6757    if (!compile_branch(&options, &code, &ptr, errorcodeptr, &branchfirstbyte,    if (!compile_branch(&options, &code, &ptr, errorcodeptr, &branchfirstbyte,
6758          &branchreqbyte, &bc, cd, (lengthptr == NULL)? NULL : &length))          &branchreqbyte, &bc, cond_depth, cd,
6759            (lengthptr == NULL)? NULL : &length))
6760      {      {
6761      *ptrptr = ptr;      *ptrptr = ptr;
6762      return FALSE;      return FALSE;
# Line 6459  for (;;) Line 6831  for (;;)
6831          }          }
6832        else if (fixed_length < 0)        else if (fixed_length < 0)
6833          {          {
6834          *errorcodeptr = (fixed_length == -2)? ERR36 : ERR25;          *errorcodeptr = (fixed_length == -2)? ERR36 :
6835                            (fixed_length == -4)? ERR70: ERR25;
6836          *ptrptr = ptr;          *ptrptr = ptr;
6837          return FALSE;          return FALSE;
6838          }          }
# Line 6606  Returns:     TRUE or FALSE Line 6979  Returns:     TRUE or FALSE
6979  */  */
6980    
6981  static BOOL  static BOOL
6982  is_anchored(register const uschar *code, unsigned int bracket_map,  is_anchored(register const pcre_uchar *code, unsigned int bracket_map,
6983    unsigned int backref_map)    unsigned int backref_map)
6984  {  {
6985  do {  do {
6986     const uschar *scode = first_significant_code(code + _pcre_OP_lengths[*code],     const pcre_uchar *scode = first_significant_code(
6987       FALSE);       code + _pcre_OP_lengths[*code], FALSE);
6988     register int op = *scode;     register int op = *scode;
6989    
6990     /* Non-capturing brackets */     /* Non-capturing brackets */
# Line 6634  do { Line 7007  do {
7007    
7008     /* Other brackets */     /* Other brackets */
7009    
7010     else if (op == OP_ASSERT || op == OP_ONCE || op == OP_COND)     else if (op == OP_ASSERT || op == OP_ONCE || op == OP_ONCE_NC ||
7011                op == OP_COND)
7012       {       {
7013       if (!is_anchored(scode, bracket_map, backref_map)) return FALSE;       if (!is_anchored(scode, bracket_map, backref_map)) return FALSE;
7014       }       }
# Line 6682  Returns:         TRUE or FALSE Line 7056  Returns:         TRUE or FALSE
7056  */  */
7057    
7058  static BOOL  static BOOL
7059  is_startline(const uschar *code, unsigned int bracket_map,  is_startline(const pcre_uchar *code, unsigned int bracket_map,
7060    unsigned int backref_map)    unsigned int backref_map)
7061  {  {
7062  do {  do {
7063     const uschar *scode = first_significant_code(code + _pcre_OP_lengths[*code],     const pcre_uchar *scode = first_significant_code(
7064       FALSE);       code + _pcre_OP_lengths[*code], FALSE);
7065     register int op = *scode;     register int op = *scode;
7066    
7067     /* If we are at the start of a conditional assertion group, *both* the     /* If we are at the start of a conditional assertion group, *both* the
# Line 6738  do { Line 7112  do {
7112    
7113     /* Other brackets */     /* Other brackets */
7114    
7115     else if (op == OP_ASSERT || op == OP_ONCE)     else if (op == OP_ASSERT || op == OP_ONCE || op == OP_ONCE_NC)
7116       {       {
7117       if (!is_startline(scode, bracket_map, backref_map)) return FALSE;       if (!is_startline(scode, bracket_map, backref_map)) return FALSE;
7118       }       }
# Line 6785  Returns:     -1 or the fixed first char Line 7159  Returns:     -1 or the fixed first char
7159  */  */
7160    
7161  static int  static int
7162  find_firstassertedchar(const uschar *code, BOOL inassert)  find_firstassertedchar(const pcre_uchar *code, BOOL inassert)
7163  {  {
7164  register int c = -1;  register int c = -1;
7165  do {  do {
7166     int d;     int d;
7167     int xl = (*code == OP_CBRA || *code == OP_SCBRA ||     int xl = (*code == OP_CBRA || *code == OP_SCBRA ||
7168               *code == OP_CBRAPOS || *code == OP_SCBRAPOS)? 2:0;               *code == OP_CBRAPOS || *code == OP_SCBRAPOS)? 2:0;
7169     const uschar *scode = first_significant_code(code + 1+LINK_SIZE + xl, TRUE);     const pcre_uchar *scode = first_significant_code(code + 1+LINK_SIZE + xl,
7170         TRUE);
7171     register int op = *scode;     register int op = *scode;
7172    
7173     switch(op)     switch(op)
# Line 6808  do { Line 7183  do {
7183       case OP_SCBRAPOS:       case OP_SCBRAPOS:
7184       case OP_ASSERT:       case OP_ASSERT:
7185       case OP_ONCE:       case OP_ONCE:
7186         case OP_ONCE_NC:
7187       case OP_COND:       case OP_COND:
7188       if ((d = find_firstassertedchar(scode, op == OP_ASSERT)) < 0)       if ((d = find_firstassertedchar(scode, op == OP_ASSERT)) < 0)
7189         return -1;         return -1;
# Line 6817  do { Line 7193  do {
7193       case OP_EXACT:       case OP_EXACT:
7194       scode += 2;       scode += 2;
7195       /* Fall through */       /* Fall through */
7196    
7197       case OP_CHAR:       case OP_CHAR:
7198       case OP_PLUS:       case OP_PLUS:
7199       case OP_MINPLUS:       case OP_MINPLUS:
# Line 6830  do { Line 7206  do {
7206       case OP_EXACTI:       case OP_EXACTI:
7207       scode += 2;       scode += 2;
7208       /* Fall through */       /* Fall through */
7209    
7210       case OP_CHARI:       case OP_CHARI:
7211       case OP_PLUSI:       case OP_PLUSI:
7212       case OP_MINPLUSI:       case OP_MINPLUSI:
# Line 6871  Returns:        pointer to compiled data Line 7247  Returns:        pointer to compiled data
7247                  with errorptr and erroroffset set                  with errorptr and erroroffset set
7248  */  */
7249    
7250    #ifndef COMPILE_PCRE16
7251  PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION  PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION
7252  pcre_compile(const char *pattern, int options, const char **errorptr,  pcre_compile(const char *pattern, int options, const char **errorptr,
7253    int *erroroffset, const unsigned char *tables)    int *erroroffset, const unsigned char *tables)
7254    #else
7255    PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION
7256    pcre16_compile(PCRE_SPTR16 pattern, int options, const char **errorptr,
7257      int *erroroffset, const unsigned char *tables)
7258    #endif
7259  {  {
7260    #ifndef COMPILE_PCRE16
7261  return pcre_compile2(pattern, options, NULL, errorptr, erroroffset, tables);  return pcre_compile2(pattern, options, NULL, errorptr, erroroffset, tables);
7262    #else
7263    return pcre16_compile2(pattern, options, NULL, errorptr, erroroffset, tables);
7264    #endif
7265  }  }
7266    
7267    
7268    #ifndef COMPILE_PCRE16
7269  PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION  PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION
7270  pcre_compile2(const char *pattern, int options, int *errorcodeptr,  pcre_compile2(const char *pattern, int options, int *errorcodeptr,
7271    const char **errorptr, int *erroroffset, const unsigned char *tables)    const char **errorptr, int *erroroffset, const unsigned char *tables)
7272    #else
7273    PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION
7274    pcre16_compile2(PCRE_SPTR16 pattern, int options, int *errorcodeptr,
7275      const char **errorptr, int *erroroffset, const unsigned char *tables)
7276    #endif
7277  {  {
7278  real_pcre *re;  real_pcre *re;
7279  int length = 1;  /* For final END opcode */  int length = 1;  /* For final END opcode */
# Line 6890  int errorcode = 0; Line 7282  int errorcode = 0;
7282  int skipatstart = 0;  int skipatstart = 0;
7283  BOOL utf8;  BOOL utf8;
7284  size_t size;  size_t size;
7285  uschar *code;  pcre_uchar *code;
7286  const uschar *codestart;  const pcre_uchar *codestart;
7287  const uschar *ptr;  const pcre_uchar *ptr;
7288  compile_data compile_block;  compile_data compile_block;
7289  compile_data *cd = &compile_block;  compile_data *cd = &compile_block;
7290    
# Line 6902  as soon as possible, so that a fairly la Line 7294  as soon as possible, so that a fairly la
7294  this purpose. The same space is used in the second phase for remembering where  this purpose. The same space is used in the second phase for remembering where
7295  to fill in forward references to subpatterns. */  to fill in forward references to subpatterns. */
7296    
7297  uschar cworkspace[COMPILE_WORK_SIZE];  pcre_uchar cworkspace[COMPILE_WORK_SIZE];
7298    
7299  /* Set this early so that early errors get offset 0. */  /* Set this early so that early errors get offset 0. */
7300    
7301  ptr = (const uschar *)pattern;  ptr = (const pcre_uchar *)pattern;
7302    
7303  /* We can't pass back an error message if errorptr is NULL; I guess the best we  /* We can't pass back an error message if errorptr is NULL; I guess the best we
7304  can do is just return NULL, but we can set a code value if there is a code  can do is just return NULL, but we can set a code value if there is a code
# Line 6956  while (ptr[skipatstart] == CHAR_LEFT_PAR Line 7348  while (ptr[skipatstart] == CHAR_LEFT_PAR
7348    int newnl = 0;    int newnl = 0;
7349    int newbsr = 0;    int newbsr = 0;
7350    
7351    if (strncmp((char *)(ptr+skipatstart+2), STRING_UTF8_RIGHTPAR, 5) == 0)    if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF8_RIGHTPAR, 5) == 0)
7352      { skipatstart += 7; options |= PCRE_UTF8; continue; }      { skipatstart += 7; options |= PCRE_UTF8; continue; }
7353    else if (strncmp((char *)(ptr+skipatstart+2), STRING_UCP_RIGHTPAR, 4) == 0)    else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UCP_RIGHTPAR, 4) == 0)
7354      { skipatstart += 6; options |= PCRE_UCP; continue; }      { skipatstart += 6; options |= PCRE_UCP; continue; }
7355    else if (strncmp((char *)(ptr+skipatstart+2), STRING_NO_START_OPT_RIGHTPAR, 13) == 0)    else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_NO_START_OPT_RIGHTPAR, 13) == 0)
7356      { skipatstart += 15; options |= PCRE_NO_START_OPTIMIZE; continue; }      { skipatstart += 15; options |= PCRE_NO_START_OPTIMIZE; continue; }
7357    
7358    if (strncmp((char *)(ptr+skipatstart+2), STRING_CR_RIGHTPAR, 3) == 0)    if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_CR_RIGHTPAR, 3) == 0)
7359      { skipatstart += 5; newnl = PCRE_NEWLINE_CR; }      { skipatstart += 5; newnl = PCRE_NEWLINE_CR; }
7360    else if (strncmp((char *)(ptr+skipatstart+2), STRING_LF_RIGHTPAR, 3)  == 0)    else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_LF_RIGHTPAR, 3)  == 0)
7361      { skipatstart += 5; newnl = PCRE_NEWLINE_LF; }      { skipatstart += 5; newnl = PCRE_NEWLINE_LF; }
7362    else if (strncmp((char *)(ptr+skipatstart+2), STRING_CRLF_RIGHTPAR, 5)  == 0)    else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_CRLF_RIGHTPAR, 5)  == 0)
7363      { skipatstart += 7; newnl = PCRE_NEWLINE_CR + PCRE_NEWLINE_LF; }      { skipatstart += 7; newnl = PCRE_NEWLINE_CR + PCRE_NEWLINE_LF; }
7364    else if (strncmp((char *)(ptr+skipatstart+2), STRING_ANY_RIGHTPAR, 4) == 0)    else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_ANY_RIGHTPAR, 4) == 0)
7365      { skipatstart += 6; newnl = PCRE_NEWLINE_ANY; }      { skipatstart += 6; newnl = PCRE_NEWLINE_ANY; }
7366    else if (strncmp((char *)(ptr+skipatstart+2), STRING_ANYCRLF_RIGHTPAR, 8) == 0)    else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_ANYCRLF_RIGHTPAR, 8) == 0)
7367      { skipatstart += 10; newnl = PCRE_NEWLINE_ANYCRLF; }      { skipatstart += 10; newnl = PCRE_NEWLINE_ANYCRLF; }
7368    
7369    else if (strncmp((char *)(ptr+skipatstart+2), STRING_BSR_ANYCRLF_RIGHTPAR, 12) == 0)    else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_BSR_ANYCRLF_RIGHTPAR, 12) == 0)
7370      { skipatstart += 14; newbsr = PCRE_BSR_ANYCRLF; }      { skipatstart += 14; newbsr = PCRE_BSR_ANYCRLF; }
7371    else if (strncmp((char *)(ptr+skipatstart+2), STRING_BSR_UNICODE_RIGHTPAR, 12) == 0)    else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_BSR_UNICODE_RIGHTPAR, 12) == 0)
7372      { skipatstart += 14; newbsr = PCRE_BSR_UNICODE; }      { skipatstart += 14; newbsr = PCRE_BSR_UNICODE; }
7373    
7374    if (newnl != 0)    if (newnl != 0)
# Line 6990  utf8 = (options & PCRE_UTF8) != 0; Line 7382  utf8 = (options & PCRE_UTF8) != 0;
7382    
7383  /* Can't support UTF8 unless PCRE has been compiled to include the code. The  /* Can't support UTF8 unless PCRE has been compiled to include the code. The
7384  return of an error code from _pcre_valid_utf8() is a new feature, introduced in  return of an error code from _pcre_valid_utf8() is a new feature, introduced in
7385  release 8.13. It is passed back from pcre_[dfa_]exec(), but at the moment is  release 8.13. It is passed back from pcre_[dfa_]exec(), but at the moment is
7386  not used here. */  not used here. */
7387    
7388  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
7389  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 &&  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 &&
7390       (errorcode = _pcre_valid_utf8((USPTR)pattern, -1, erroroffset)) != 0)       (errorcode = _pcre_valid_utf8((PCRE_PUCHAR)pattern, -1, erroroffset)) != 0)
7391    {    {
7392    errorcode = ERR44;    errorcode = ERR44;
7393    goto PCRE_EARLY_ERROR_RETURN2;    goto PCRE_EARLY_ERROR_RETURN2;
# Line 7020  if ((options & PCRE_UCP) != 0) Line 7412  if ((options & PCRE_UCP) != 0)
7412    
7413  /* Check validity of \R options. */  /* Check validity of \R options. */
7414    
7415  switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))  if ((options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) ==
7416         (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
7417    {    {
7418    case 0:    errorcode = ERR56;
7419    case PCRE_BSR_ANYCRLF:    goto PCRE_EARLY_ERROR_RETURN;
   case PCRE_BSR_UNICODE:  
   break;  
   default: errorcode = ERR56; goto PCRE_EARLY_ERROR_RETURN;  
7420    }    }
7421    
7422  /* Handle different types of newline. The three bits give seven cases. The  /* Handle different types of newline. The three bits give seven cases. The
# Line 7095  cd->name_table = NULL; Line 7485  cd->name_table = NULL;
7485  cd->start_workspace = cworkspace;  cd->start_workspace = cworkspace;
7486  cd->start_code = cworkspace;  cd->start_code = cworkspace;
7487  cd->hwm = cworkspace;  cd->hwm = cworkspace;
7488  cd->start_pattern = (const uschar *)pattern;  cd->start_pattern = (const pcre_uchar *)pattern;
7489  cd->end_pattern = (const uschar *)(pattern + strlen(pattern));  cd->end_pattern = (const pcre_uchar *)(pattern + STRLEN_UC(pattern));
7490  cd->req_varyopt = 0;  cd->req_varyopt = 0;
7491  cd->external_options = options;  cd->external_options = options;
7492  cd->external_flags = 0;  cd->external_flags = 0;
# Line 7111  outside can help speed up starting point Line 7501  outside can help speed up starting point
7501  ptr += skipatstart;  ptr += skipatstart;
7502  code = cworkspace;  code = cworkspace;
7503  *code = OP_BRA;  *code = OP_BRA;
7504  (void)compile_regex(cd->external_options, &code, &ptr, &errorcode, FALSE,  (void)compile_regex(cd->external_options, &code, &ptr, &errorcode, FALSE,
7505    FALSE, 0, &firstbyte, &reqbyte, NULL, cd, &length);    FALSE, 0, 0, &firstbyte, &reqbyte, NULL, cd, &length);
7506  if (errorcode != 0) goto PCRE_EARLY_ERROR_RETURN;  if (errorcode != 0) goto PCRE_EARLY_ERROR_RETURN;
7507    
7508  DPRINTF(("end pre-compile: length=%d workspace=%d\n", length,  DPRINTF(("end pre-compile: length=%d workspace=%d\n", length,
# Line 7169  cd->final_bracount = cd->bracount;  /* S Line 7559  cd->final_bracount = cd->bracount;  /* S
7559  cd->assert_depth = 0;  cd->assert_depth = 0;
7560  cd->bracount = 0;  cd->bracount = 0;
7561  cd->names_found = 0;  cd->names_found = 0;
7562  cd->name_table = (uschar *)re + re->name_table_offset;  cd->name_table = (pcre_uchar *)re + re->name_table_offset;
7563  codestart = cd->name_table + re->name_entry_size * re->name_count;  codestart = cd->name_table + re->name_entry_size * re->name_count;
7564  cd->start_code = codestart;  cd->start_code = codestart;
7565  cd->hwm = cworkspace;  cd->hwm = cworkspace;
# Line 7182  cd->open_caps = NULL; Line 7572  cd->open_caps = NULL;
7572  error, errorcode will be set non-zero, so we don't need to look at the result  error, errorcode will be set non-zero, so we don't need to look at the result
7573  of the function here. */  of the function here. */
7574    
7575  ptr = (const uschar *)pattern + skipatstart;  ptr = (const pcre_uchar *)pattern + skipatstart;
7576  code = (uschar *)codestart;  code = (pcre_uchar *)codestart;
7577  *code = OP_BRA;  *code = OP_BRA;
7578  (void)compile_regex(re->options, &code, &ptr, &errorcode, FALSE, FALSE, 0,  (void)compile_regex(re->options, &code, &ptr, &errorcode, FALSE, FALSE, 0, 0,
7579    &firstbyte, &reqbyte, NULL, cd, NULL);    &firstbyte, &reqbyte, NULL, cd, NULL);
7580  re->top_bracket = cd->bracount;  re->top_bracket = cd->bracount;
7581  re->top_backref = cd->top_backref;  re->top_backref = cd->top_backref;
7582  re->flags = cd->external_flags;  re->flags = cd->external_flags;
7583    
7584  if (cd->had_accept) reqbyte = -1;   /* Must disable after (*ACCEPT) */  if (cd->had_accept) reqbyte = REQ_NONE;   /* Must disable after (*ACCEPT) */
7585    
7586  /* If not reached end of pattern on success, there's an excess bracket. */  /* If not reached end of pattern on success, there's an excess bracket. */
7587    
# Line 7211  if (code - codestart > length) errorcode Line 7601  if (code - codestart > length) errorcode
7601  while (errorcode == 0 && cd->hwm > cworkspace)  while (errorcode == 0 && cd->hwm > cworkspace)
7602    {    {
7603    int offset, recno;    int offset, recno;
7604    const uschar *groupptr;    const pcre_uchar *groupptr;
7605    cd->hwm -= LINK_SIZE;    cd->hwm -= LINK_SIZE;
7606    offset = GET(cd->hwm, 0);    offset = GET(cd->hwm, 0);
7607    recno = GET(codestart, offset);    recno = GET(codestart, offset);
7608    groupptr = _pcre_find_bracket(codestart, utf8, recno);    groupptr = _pcre_find_bracket(codestart, utf8, recno);
7609    if (groupptr == NULL) errorcode = ERR53;    if (groupptr == NULL) errorcode = ERR53;
7610      else PUT(((uschar *)codestart), offset, (int)(groupptr - codestart));      else PUT(((pcre_uchar *)codestart), offset, (int)(groupptr - codestart));
7611    }    }
7612    
7613  /* Give an error if there's back reference to a non-existent capturing  /* Give an error if there's back reference to a non-existent capturing
# Line 7235  length, and set their lengths. */ Line 7625  length, and set their lengths. */
7625    
7626  if (cd->check_lookbehind)  if (cd->check_lookbehind)
7627    {    {
7628    uschar *cc = (uschar *)codestart;    pcre_uchar *cc = (pcre_uchar *)codestart;
7629    
7630    /* Loop, searching for OP_REVERSE items, and process those that do not have    /* Loop, searching for OP_REVERSE items, and process those that do not have
7631    their length set. (Actually, it will also re-process any that have a length    their length set. (Actually, it will also re-process any that have a length
7632    of zero, but that is a pathological case, and it does no harm.) When we find    of zero, but that is a pathological case, and it does no harm.) When we find
7633    one, we temporarily terminate the branch it is in while we scan it. */    one, we temporarily terminate the branch it is in while we scan it. */
7634    
7635    for (cc = (uschar *)_pcre_find_bracket(codestart, utf8, -1);    for (cc = (pcre_uchar *)_pcre_find_bracket(codestart, utf8, -1);
7636         cc != NULL;         cc != NULL;
7637         cc = (uschar *)_pcre_find_bracket(cc, utf8, -1))         cc = (pcre_uchar *)_pcre_find_bracket(cc, utf8, -1))
7638      {      {
7639      if (GET(cc, 1) == 0)      if (GET(cc, 1) == 0)
7640        {        {
7641        int fixed_length;        int fixed_length;
7642        uschar *be = cc - 1 - LINK_SIZE + GET(cc, -LINK_SIZE);        pcre_uchar *be = cc - 1 - LINK_SIZE + GET(cc, -LINK_SIZE);
7643        int end_op = *be;        int end_op = *be;
7644        *be = OP_END;        *be = OP_END;
7645        fixed_length = find_fixedlength(cc, (re->options & PCRE_UTF8) != 0, TRUE,        fixed_length = find_fixedlength(cc, (re->options & PCRE_UTF8) != 0, TRUE,
# Line 7258  if (cd->check_lookbehind) Line 7648  if (cd->check_lookbehind)
7648        DPRINTF(("fixed length = %d\n", fixed_length));        DPRINTF(("fixed length = %d\n", fixed_length));
7649        if (fixed_length < 0)        if (fixed_length < 0)
7650          {          {
7651          errorcode = (fixed_length == -2)? ERR36 : ERR25;          errorcode = (fixed_length == -2)? ERR36 :
7652                        (fixed_length == -4)? ERR70 : ERR25;
7653          break;          break;
7654          }          }
7655        PUT(cc, 1, fixed_length);        PUT(cc, 1, fixed_length);
# Line 7273  if (errorcode != 0) Line 7664  if (errorcode != 0)
7664    {    {
7665    (pcre_free)(re);    (pcre_free)(re);
7666    PCRE_EARLY_ERROR_RETURN:    PCRE_EARLY_ERROR_RETURN:
7667    *erroroffset = (int)(ptr - (const uschar *)pattern);    *erroroffset = (int)(ptr - (const pcre_uchar *)pattern);
7668    PCRE_EARLY_ERROR_RETURN2:    PCRE_EARLY_ERROR_RETURN2:
7669    *errorptr = find_error_text(errorcode);    *errorptr = find_error_text(errorcode);
7670    if (errorcodeptr != NULL) *errorcodeptr = errorcode;    if (errorcodeptr != NULL) *errorcodeptr = errorcode;
# Line 7359  if (code - codestart > length) Line 7750  if (code - codestart > length)
7750    {    {
7751    (pcre_free)(re);    (pcre_free)(re);
7752    *errorptr = find_error_text(ERR23);    *errorptr = find_error_text(ERR23);
7753    *erroroffset = ptr - (uschar *)pattern;    *erroroffset = ptr - (pcre_uchar *)pattern;
7754    if (errorcodeptr != NULL) *errorcodeptr = ERR23;    if (errorcodeptr != NULL) *errorcodeptr = ERR23;
7755    return NULL;    return NULL;
7756    }    }

Legend:
Removed from v.613  
changed lines
  Added in v.763

  ViewVC Help
Powered by ViewVC 1.1.5