/[pcre]/code/branches/pcre16/pcre_compile.c
ViewVC logotype

Diff of /code/branches/pcre16/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

code/trunk/pcre_compile.c revision 654 by ph10, Tue Aug 2 11:00:40 2011 UTC code/branches/pcre16/pcre_compile.c revision 759 by zherczeg, Mon Nov 21 18:57:34 2011 UTC
# Line 231  static const char posix_names[] = Line 231  static const char posix_names[] =
231    STRING_graph0 STRING_print0 STRING_punct0 STRING_space0    STRING_graph0 STRING_print0 STRING_punct0 STRING_space0
232    STRING_word0  STRING_xdigit;    STRING_word0  STRING_xdigit;
233    
234  static const uschar posix_name_lengths[] = {  static const pcre_uint8 posix_name_lengths[] = {
235    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 };    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 };
236    
237  /* Table of class bit maps for each POSIX class. Each class is formed from a  /* Table of class bit maps for each POSIX class. Each class is formed from a
# Line 266  substitutes must be in the order of the Line 266  substitutes must be in the order of the
266  both positive and negative cases. NULL means no substitute. */  both positive and negative cases. NULL means no substitute. */
267    
268  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
269  static const uschar *substitutes[] = {  static const pcre_uchar string_PNd[]  = {
270    (uschar *)"\\P{Nd}",    /* \D */    CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
271    (uschar *)"\\p{Nd}",    /* \d */    CHAR_N, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' };
272    (uschar *)"\\P{Xsp}",   /* \S */       /* NOTE: Xsp is Perl space */  static const pcre_uchar string_pNd[]  = {
273    (uschar *)"\\p{Xsp}",   /* \s */    CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
274    (uschar *)"\\P{Xwd}",   /* \W */    CHAR_N, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' };
275    (uschar *)"\\p{Xwd}"    /* \w */  static const pcre_uchar string_PXsp[] = {
276      CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
277      CHAR_X, CHAR_s, CHAR_p, CHAR_RIGHT_CURLY_BRACKET, '\0' };
278    static const pcre_uchar string_pXsp[] = {
279      CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
280      CHAR_X, CHAR_s, CHAR_p, CHAR_RIGHT_CURLY_BRACKET, '\0' };
281    static const pcre_uchar string_PXwd[] = {
282      CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
283      CHAR_X, CHAR_w, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' };
284    static const pcre_uchar string_pXwd[] = {
285      CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
286      CHAR_X, CHAR_w, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' };
287    
288    static const pcre_uchar *substitutes[] = {
289      string_PNd,           /* \D */
290      string_pNd,           /* \d */
291      string_PXsp,          /* \S */       /* NOTE: Xsp is Perl space */
292      string_pXsp,          /* \s */
293      string_PXwd,          /* \W */
294      string_pXwd           /* \w */
295  };  };
296    
297  static const uschar *posix_substitutes[] = {  static const pcre_uchar string_pL[] =   {
298    (uschar *)"\\p{L}",     /* alpha */    CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
299    (uschar *)"\\p{Ll}",    /* lower */    CHAR_L, CHAR_RIGHT_CURLY_BRACKET, '\0' };
300    (uschar *)"\\p{Lu}",    /* upper */  static const pcre_uchar string_pLl[] =  {
301    (uschar *)"\\p{Xan}",   /* alnum */    CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
302    NULL,                   /* ascii */    CHAR_L, CHAR_l, CHAR_RIGHT_CURLY_BRACKET, '\0' };
303    (uschar *)"\\h",        /* blank */  static const pcre_uchar string_pLu[] =  {
304    NULL,                   /* cntrl */    CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
305    (uschar *)"\\p{Nd}",    /* digit */    CHAR_L, CHAR_u, CHAR_RIGHT_CURLY_BRACKET, '\0' };
306    NULL,                   /* graph */  static const pcre_uchar string_pXan[] = {
307    NULL,                   /* print */    CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
308    NULL,                   /* punct */    CHAR_X, CHAR_a, CHAR_n, CHAR_RIGHT_CURLY_BRACKET, '\0' };
309    (uschar *)"\\p{Xps}",   /* space */    /* NOTE: Xps is POSIX space */  static const pcre_uchar string_h[] =    {
310    (uschar *)"\\p{Xwd}",   /* word */    CHAR_BACKSLASH, CHAR_h, '\0' };
311    NULL,                   /* xdigit */  static const pcre_uchar string_pXps[] = {
312      CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
313      CHAR_X, CHAR_p, CHAR_s, CHAR_RIGHT_CURLY_BRACKET, '\0' };
314    static const pcre_uchar string_PL[] =   {
315      CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
316      CHAR_L, CHAR_RIGHT_CURLY_BRACKET, '\0' };
317    static const pcre_uchar string_PLl[] =  {
318      CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
319      CHAR_L, CHAR_l, CHAR_RIGHT_CURLY_BRACKET, '\0' };
320    static const pcre_uchar string_PLu[] =  {
321      CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
322      CHAR_L, CHAR_u, CHAR_RIGHT_CURLY_BRACKET, '\0' };
323    static const pcre_uchar string_PXan[] = {
324      CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
325      CHAR_X, CHAR_a, CHAR_n, CHAR_RIGHT_CURLY_BRACKET, '\0' };
326    static const pcre_uchar string_H[] =    {
327      CHAR_BACKSLASH, CHAR_H, '\0' };
328    static const pcre_uchar string_PXps[] = {
329      CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
330      CHAR_X, CHAR_p, CHAR_s, CHAR_RIGHT_CURLY_BRACKET, '\0' };
331    
332    static const pcre_uchar *posix_substitutes[] = {
333      string_pL,            /* alpha */
334      string_pLl,           /* lower */
335      string_pLu,           /* upper */
336      string_pXan,          /* alnum */
337      NULL,                 /* ascii */
338      string_h,             /* blank */
339      NULL,                 /* cntrl */
340      string_pNd,           /* digit */
341      NULL,                 /* graph */
342      NULL,                 /* print */
343      NULL,                 /* punct */
344      string_pXps,          /* space */    /* NOTE: Xps is POSIX space */
345      string_pXwd,          /* word */
346      NULL,                 /* xdigit */
347    /* Negated cases */    /* Negated cases */
348    (uschar *)"\\P{L}",     /* ^alpha */    string_PL,            /* ^alpha */
349    (uschar *)"\\P{Ll}",    /* ^lower */    string_PLl,           /* ^lower */
350    (uschar *)"\\P{Lu}",    /* ^upper */    string_PLu,           /* ^upper */
351    (uschar *)"\\P{Xan}",   /* ^alnum */    string_PXan,          /* ^alnum */
352    NULL,                   /* ^ascii */    NULL,                 /* ^ascii */
353    (uschar *)"\\H",        /* ^blank */    string_H,             /* ^blank */
354    NULL,                   /* ^cntrl */    NULL,                 /* ^cntrl */
355    (uschar *)"\\P{Nd}",    /* ^digit */    string_PNd,           /* ^digit */
356    NULL,                   /* ^graph */    NULL,                 /* ^graph */
357    NULL,                   /* ^print */    NULL,                 /* ^print */
358    NULL,                   /* ^punct */    NULL,                 /* ^punct */
359    (uschar *)"\\P{Xps}",   /* ^space */   /* NOTE: Xps is POSIX space */    string_PXps,          /* ^space */   /* NOTE: Xps is POSIX space */
360    (uschar *)"\\P{Xwd}",   /* ^word */    string_PXwd,          /* ^word */
361    NULL                    /* ^xdigit */    NULL                  /* ^xdigit */
362  };  };
363  #define POSIX_SUBSIZE (sizeof(posix_substitutes)/sizeof(uschar *))  #define POSIX_SUBSIZE (sizeof(posix_substitutes) / sizeof(pcre_uchar *))
364  #endif  #endif
365    
366  #define STRING(a)  # a  #define STRING(a)  # a
# Line 410  static const char error_texts[] = Line 464  static const char error_texts[] =
464    "this version of PCRE is not compiled with PCRE_UCP support\0"    "this version of PCRE is not compiled with PCRE_UCP support\0"
465    "\\c must be followed by an ASCII character\0"    "\\c must be followed by an ASCII character\0"
466    "\\k is not followed by a braced, angle-bracketed, or quoted name\0"    "\\k is not followed by a braced, angle-bracketed, or quoted name\0"
467      /* 70 */
468      "internal error: unknown opcode in find_fixedlength()\0"
469    ;    ;
470    
471  /* Table to identify digits and hex digits. This is used when compiling  /* Table to identify digits and hex digits. This is used when compiling
# Line 546  static const unsigned char ebcdic_charta Line 602  static const unsigned char ebcdic_charta
602  /* Definition to allow mutual recursion */  /* Definition to allow mutual recursion */
603    
604  static BOOL  static BOOL
605    compile_regex(int, uschar **, const uschar **, int *, BOOL, BOOL, int, int,    compile_regex(int, pcre_uchar **, const pcre_uchar **, int *, BOOL, BOOL, int, int,
606      int *, int *, branch_chain *, compile_data *, int *);      int *, int *, branch_chain *, compile_data *, int *);
607    
608    
# Line 593  Returns:    TRUE or FALSE Line 649  Returns:    TRUE or FALSE
649  */  */
650    
651  static BOOL  static BOOL
652  is_counted_repeat(const uschar *p)  is_counted_repeat(const pcre_uchar *p)
653  {  {
654  if ((digitab[*p++] & ctype_digit) == 0) return FALSE;  if ((digitab[*p++] & ctype_digit) == 0) return FALSE;
655  while ((digitab[*p] & ctype_digit) != 0) p++;  while ((digitab[*p] & ctype_digit) != 0) p++;
# Line 635  Returns:         zero or positive => a d Line 691  Returns:         zero or positive => a d
691  */  */
692    
693  static int  static int
694  check_escape(const uschar **ptrptr, int *errorcodeptr, int bracount,  check_escape(const pcre_uchar **ptrptr, int *errorcodeptr, int bracount,
695    int options, BOOL isclass)    int options, BOOL isclass)
696  {  {
697  BOOL utf8 = (options & PCRE_UTF8) != 0;  BOOL utf8 = (options & PCRE_UTF8) != 0;
698  const uschar *ptr = *ptrptr + 1;  const pcre_uchar *ptr = *ptrptr + 1;
699  int c, i;  int c, i;
700    
701  GETCHARINCTEST(c, ptr);           /* Get character value, increment pointer */  GETCHARINCTEST(c, ptr);           /* Get character value, increment pointer */
# Line 666  else if ((i = escapes[c - 0x48]) != 0) Line 722  else if ((i = escapes[c - 0x48]) != 0)
722    
723  else  else
724    {    {
725    const uschar *oldptr;    const pcre_uchar *oldptr;
726    BOOL braced, negated;    BOOL braced, negated;
727    
728    switch (c)    switch (c)
# Line 676  else Line 732  else
732    
733      case CHAR_l:      case CHAR_l:
734      case CHAR_L:      case CHAR_L:
735        *errorcodeptr = ERR37;
736        break;
737    
738      case CHAR_u:      case CHAR_u:
739        if ((options & PCRE_JAVASCRIPT_COMPAT) != 0)
740          {
741          /* In JavaScript, \u must be followed by four hexadecimal numbers.
742          Otherwise it is a lowercase u letter. */
743          if ((digitab[ptr[1]] & ctype_xdigit) != 0 && (digitab[ptr[2]] & ctype_xdigit) != 0
744               && (digitab[ptr[3]] & ctype_xdigit) != 0 && (digitab[ptr[4]] & ctype_xdigit) != 0)
745            {
746            c = 0;
747            for (i = 0; i < 4; ++i)
748              {
749              register int cc = *(++ptr);
750    #ifndef EBCDIC  /* ASCII/UTF-8 coding */
751              if (cc >= CHAR_a) cc -= 32;               /* Convert to upper case */
752              c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
753    #else           /* EBCDIC coding */
754              if (cc >= CHAR_a && cc <= CHAR_z) cc += 64;  /* Convert to upper case */
755              c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
756    #endif
757              }
758            }
759          }
760        else
761          *errorcodeptr = ERR37;
762        break;
763    
764      case CHAR_U:      case CHAR_U:
765      *errorcodeptr = ERR37;      /* In JavaScript, \U is an uppercase U letter. */
766        if ((options & PCRE_JAVASCRIPT_COMPAT) == 0) *errorcodeptr = ERR37;
767      break;      break;
768    
769      /* In a character class, \g is just a literal "g". Outside a character      /* In a character class, \g is just a literal "g". Outside a character
# Line 710  else Line 795  else
795    
796      if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)      if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)
797        {        {
798        const uschar *p;        const pcre_uchar *p;
799        for (p = ptr+2; *p != 0 && *p != CHAR_RIGHT_CURLY_BRACKET; p++)        for (p = ptr+2; *p != 0 && *p != CHAR_RIGHT_CURLY_BRACKET; p++)
800          if (*p != CHAR_MINUS && (digitab[*p] & ctype_digit) == 0) break;          if (*p != CHAR_MINUS && (digitab[*p] & ctype_digit) == 0) break;
801        if (*p != 0 && *p != CHAR_RIGHT_CURLY_BRACKET)        if (*p != 0 && *p != CHAR_RIGHT_CURLY_BRACKET)
# Line 828  else Line 913  else
913      treated as a data character. */      treated as a data character. */
914    
915      case CHAR_x:      case CHAR_x:
916        if ((options & PCRE_JAVASCRIPT_COMPAT) != 0)
917          {
918          /* In JavaScript, \x must be followed by two hexadecimal numbers.
919          Otherwise it is a lowercase x letter. */
920          if ((digitab[ptr[1]] & ctype_xdigit) != 0 && (digitab[ptr[2]] & ctype_xdigit) != 0)
921            {
922            c = 0;
923            for (i = 0; i < 2; ++i)
924              {
925              register int cc = *(++ptr);
926    #ifndef EBCDIC  /* ASCII/UTF-8 coding */
927              if (cc >= CHAR_a) cc -= 32;               /* Convert to upper case */
928              c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
929    #else           /* EBCDIC coding */
930              if (cc >= CHAR_a && cc <= CHAR_z) cc += 64;  /* Convert to upper case */
931              c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
932    #endif
933              }
934            }
935          break;
936          }
937    
938      if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)      if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)
939        {        {
940        const uschar *pt = ptr + 2;        const pcre_uchar *pt = ptr + 2;
941        int count = 0;        int count = 0;
942    
943        c = 0;        c = 0;
# Line 961  Returns:         type value from ucp_typ Line 1068  Returns:         type value from ucp_typ
1068  */  */
1069    
1070  static int  static int
1071  get_ucp(const uschar **ptrptr, BOOL *negptr, int *dptr, int *errorcodeptr)  get_ucp(const pcre_uchar **ptrptr, BOOL *negptr, int *dptr, int *errorcodeptr)
1072  {  {
1073  int c, i, bot, top;  int c, i, bot, top;
1074  const uschar *ptr = *ptrptr;  const pcre_uchar *ptr = *ptrptr;
1075  char name[32];  char name[32];
1076    
1077  c = *(++ptr);  c = *(++ptr);
# Line 1053  Returns:         pointer to '}' on succe Line 1160  Returns:         pointer to '}' on succe
1160                   current ptr on error, with errorcodeptr set non-zero                   current ptr on error, with errorcodeptr set non-zero
1161  */  */
1162    
1163  static const uschar *  static const pcre_uchar *
1164  read_repeat_counts(const uschar *p, int *minp, int *maxp, int *errorcodeptr)  read_repeat_counts(const pcre_uchar *p, int *minp, int *maxp, int *errorcodeptr)
1165  {  {
1166  int min = 0;  int min = 0;
1167  int max = -1;  int max = -1;
# Line 1139  Returns:       the number of the named s Line 1246  Returns:       the number of the named s
1246  */  */
1247    
1248  static int  static int
1249  find_parens_sub(uschar **ptrptr, compile_data *cd, const uschar *name, int lorn,  find_parens_sub(pcre_uchar **ptrptr, compile_data *cd, const pcre_uchar *name, int lorn,
1250    BOOL xmode, BOOL utf8, int *count)    BOOL xmode, BOOL utf8, int *count)
1251  {  {
1252  uschar *ptr = *ptrptr;  pcre_uchar *ptr = *ptrptr;
1253  int start_count = *count;  int start_count = *count;
1254  int hwm_count = start_count;  int hwm_count = start_count;
1255  BOOL dup_parens = FALSE;  BOOL dup_parens = FALSE;
# Line 1209  if (ptr[0] == CHAR_LEFT_PARENTHESIS) Line 1316  if (ptr[0] == CHAR_LEFT_PARENTHESIS)
1316          ptr[1] != CHAR_EQUALS_SIGN) || *ptr == CHAR_APOSTROPHE)          ptr[1] != CHAR_EQUALS_SIGN) || *ptr == CHAR_APOSTROPHE)
1317        {        {
1318        int term;        int term;
1319        const uschar *thisname;        const pcre_uchar *thisname;
1320        *count += 1;        *count += 1;
1321        if (name == NULL && *count == lorn) return *count;        if (name == NULL && *count == lorn) return *count;
1322        term = *ptr++;        term = *ptr++;
# Line 1372  Returns:       the number of the found s Line 1479  Returns:       the number of the found s
1479  */  */
1480    
1481  static int  static int
1482  find_parens(compile_data *cd, const uschar *name, int lorn, BOOL xmode,  find_parens(compile_data *cd, const pcre_uchar *name, int lorn, BOOL xmode,
1483    BOOL utf8)    BOOL utf8)
1484  {  {
1485  uschar *ptr = (uschar *)cd->start_pattern;  pcre_uchar *ptr = (pcre_uchar *)cd->start_pattern;
1486  int count = 0;  int count = 0;
1487  int rc;  int rc;
1488    
# Line 1413  Arguments: Line 1520  Arguments:
1520  Returns:       pointer to the first significant opcode  Returns:       pointer to the first significant opcode
1521  */  */
1522    
1523  static const uschar*  static const pcre_uchar*
1524  first_significant_code(const uschar *code, BOOL skipassert)  first_significant_code(const pcre_uchar *code, BOOL skipassert)
1525  {  {
1526  for (;;)  for (;;)
1527    {    {
# Line 1475  Arguments: Line 1582  Arguments:
1582    
1583  Returns:   the fixed length,  Returns:   the fixed length,
1584               or -1 if there is no fixed length,               or -1 if there is no fixed length,
1585               or -2 if \C was encountered               or -2 if \C was encountered (in UTF-8 mode only)
1586               or -3 if an OP_RECURSE item was encountered and atend is FALSE               or -3 if an OP_RECURSE item was encountered and atend is FALSE
1587                 or -4 if an unknown opcode was encountered (internal error)
1588  */  */
1589    
1590  static int  static int
1591  find_fixedlength(uschar *code, BOOL utf8, BOOL atend, compile_data *cd)  find_fixedlength(pcre_uchar *code, BOOL utf8, BOOL atend, compile_data *cd)
1592  {  {
1593  int length = -1;  int length = -1;
1594    
1595  register int branchlength = 0;  register int branchlength = 0;
1596  register uschar *cc = code + 1 + LINK_SIZE;  register pcre_uchar *cc = code + 1 + LINK_SIZE;
1597    
1598  /* Scan along the opcodes for this branch. If we get to the end of the  /* Scan along the opcodes for this branch. If we get to the end of the
1599  branch, check the length against that of the other branches. */  branch, check the length against that of the other branches. */
# Line 1493  branch, check the length against that of Line 1601  branch, check the length against that of
1601  for (;;)  for (;;)
1602    {    {
1603    int d;    int d;
1604    uschar *ce, *cs;    pcre_uchar *ce, *cs;
1605    register int op = *cc;    register int op = *cc;
1606    switch (op)    switch (op)
1607      {      {
1608      /* We only need to continue for OP_CBRA (normal capturing bracket) and      /* We only need to continue for OP_CBRA (normal capturing bracket) and
1609      OP_BRA (normal non-capturing bracket) because the other variants of these      OP_BRA (normal non-capturing bracket) because the other variants of these
1610      opcodes are all concerned with unlimited repeated groups, which of course      opcodes are all concerned with unlimited repeated groups, which of course
1611      are not of fixed length. They will cause a -1 response from the default      are not of fixed length. */
     case of this switch. */  
1612    
1613      case OP_CBRA:      case OP_CBRA:
1614      case OP_BRA:      case OP_BRA:
1615      case OP_ONCE:      case OP_ONCE:
1616        case OP_ONCE_NC:
1617      case OP_COND:      case OP_COND:
1618      d = find_fixedlength(cc + ((op == OP_CBRA)? 2:0), utf8, atend, cd);      d = find_fixedlength(cc + ((op == OP_CBRA)? 2:0), utf8, atend, cd);
1619      if (d < 0) return d;      if (d < 0) return d;
# Line 1514  for (;;) Line 1622  for (;;)
1622      cc += 1 + LINK_SIZE;      cc += 1 + LINK_SIZE;
1623      break;      break;
1624    
1625      /* Reached end of a branch; if it's a ket it is the end of a nested      /* Reached end of a branch; if it's a ket it is the end of a nested call.
1626      call. If it's ALT it is an alternation in a nested call. If it is      If it's ALT it is an alternation in a nested call. An ACCEPT is effectively
1627      END it's the end of the outer call. All can be handled by the same code.      an ALT. If it is END it's the end of the outer call. All can be handled by
1628      Note that we must not include the OP_KETRxxx opcodes here, because they      the same code. Note that we must not include the OP_KETRxxx opcodes here,
1629      all imply an unlimited repeat. */      because they all imply an unlimited repeat. */
1630    
1631      case OP_ALT:      case OP_ALT:
1632      case OP_KET:      case OP_KET:
1633      case OP_END:      case OP_END:
1634        case OP_ACCEPT:
1635        case OP_ASSERT_ACCEPT:
1636      if (length < 0) length = branchlength;      if (length < 0) length = branchlength;
1637        else if (length != branchlength) return -1;        else if (length != branchlength) return -1;
1638      if (*cc != OP_ALT) return length;      if (*cc != OP_ALT) return length;
# Line 1536  for (;;) Line 1646  for (;;)
1646    
1647      case OP_RECURSE:      case OP_RECURSE:
1648      if (!atend) return -3;      if (!atend) return -3;
1649      cs = ce = (uschar *)cd->start_code + GET(cc, 1);  /* Start subpattern */      cs = ce = (pcre_uchar *)cd->start_code + GET(cc, 1);  /* Start subpattern */
1650      do ce += GET(ce, 1); while (*ce == OP_ALT);       /* End subpattern */      do ce += GET(ce, 1); while (*ce == OP_ALT);           /* End subpattern */
1651      if (cc > cs && cc < ce) return -1;                /* Recursion */      if (cc > cs && cc < ce) return -1;                    /* Recursion */
1652      d = find_fixedlength(cs + 2, utf8, atend, cd);      d = find_fixedlength(cs + 2, utf8, atend, cd);
1653      if (d < 0) return d;      if (d < 0) return d;
1654      branchlength += d;      branchlength += d;
# Line 1556  for (;;) Line 1666  for (;;)
1666    
1667      /* Skip over things that don't match chars */      /* Skip over things that don't match chars */
1668    
1669      case OP_REVERSE:      case OP_MARK:
1670      case OP_CREF:      case OP_PRUNE_ARG:
1671      case OP_NCREF:      case OP_SKIP_ARG:
1672      case OP_RREF:      case OP_THEN_ARG:
1673      case OP_NRREF:      cc += cc[1] + _pcre_OP_lengths[*cc];
1674      case OP_DEF:      break;
1675    
1676      case OP_CALLOUT:      case OP_CALLOUT:
     case OP_SOD:  
     case OP_SOM:  
     case OP_SET_SOM:  
     case OP_EOD:  
     case OP_EODN:  
1677      case OP_CIRC:      case OP_CIRC:
1678      case OP_CIRCM:      case OP_CIRCM:
1679        case OP_CLOSE:
1680        case OP_COMMIT:
1681        case OP_CREF:
1682        case OP_DEF:
1683      case OP_DOLL:      case OP_DOLL:
1684      case OP_DOLLM:      case OP_DOLLM:
1685        case OP_EOD:
1686        case OP_EODN:
1687        case OP_FAIL:
1688        case OP_NCREF:
1689        case OP_NRREF:
1690      case OP_NOT_WORD_BOUNDARY:      case OP_NOT_WORD_BOUNDARY:
1691        case OP_PRUNE:
1692        case OP_REVERSE:
1693        case OP_RREF:
1694        case OP_SET_SOM:
1695        case OP_SKIP:
1696        case OP_SOD:
1697        case OP_SOM:
1698        case OP_THEN:
1699      case OP_WORD_BOUNDARY:      case OP_WORD_BOUNDARY:
1700      cc += _pcre_OP_lengths[*cc];      cc += _pcre_OP_lengths[*cc];
1701      break;      break;
# Line 1594  for (;;) Line 1717  for (;;)
1717      need to skip over a multibyte character in UTF8 mode.  */      need to skip over a multibyte character in UTF8 mode.  */
1718    
1719      case OP_EXACT:      case OP_EXACT:
1720        case OP_EXACTI:
1721        case OP_NOTEXACT:
1722        case OP_NOTEXACTI:
1723      branchlength += GET2(cc,1);      branchlength += GET2(cc,1);
1724      cc += 4;      cc += 4;
1725  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1614  for (;;) Line 1740  for (;;)
1740      cc += 2;      cc += 2;
1741      /* Fall through */      /* Fall through */
1742    
1743        case OP_HSPACE:
1744        case OP_VSPACE:
1745        case OP_NOT_HSPACE:
1746        case OP_NOT_VSPACE:
1747      case OP_NOT_DIGIT:      case OP_NOT_DIGIT:
1748      case OP_DIGIT:      case OP_DIGIT:
1749      case OP_NOT_WHITESPACE:      case OP_NOT_WHITESPACE:
# Line 1626  for (;;) Line 1756  for (;;)
1756      cc++;      cc++;
1757      break;      break;
1758    
1759      /* The single-byte matcher isn't allowed */      /* The single-byte matcher isn't allowed. This only happens in UTF-8 mode;
1760        otherwise \C is coded as OP_ALLANY. */
1761    
1762      case OP_ANYBYTE:      case OP_ANYBYTE:
1763      return -2;      return -2;
# Line 1645  for (;;) Line 1776  for (;;)
1776    
1777      switch (*cc)      switch (*cc)
1778        {        {
1779          case OP_CRPLUS:
1780          case OP_CRMINPLUS:
1781        case OP_CRSTAR:        case OP_CRSTAR:
1782        case OP_CRMINSTAR:        case OP_CRMINSTAR:
1783        case OP_CRQUERY:        case OP_CRQUERY:
# Line 1665  for (;;) Line 1798  for (;;)
1798    
1799      /* Anything else is variable length */      /* Anything else is variable length */
1800    
1801      default:      case OP_ANYNL:
1802        case OP_BRAMINZERO:
1803        case OP_BRAPOS:
1804        case OP_BRAPOSZERO:
1805        case OP_BRAZERO:
1806        case OP_CBRAPOS:
1807        case OP_EXTUNI:
1808        case OP_KETRMAX:
1809        case OP_KETRMIN:
1810        case OP_KETRPOS:
1811        case OP_MINPLUS:
1812        case OP_MINPLUSI:
1813        case OP_MINQUERY:
1814        case OP_MINQUERYI:
1815        case OP_MINSTAR:
1816        case OP_MINSTARI:
1817        case OP_MINUPTO:
1818        case OP_MINUPTOI:
1819        case OP_NOTMINPLUS:
1820        case OP_NOTMINPLUSI:
1821        case OP_NOTMINQUERY:
1822        case OP_NOTMINQUERYI:
1823        case OP_NOTMINSTAR:
1824        case OP_NOTMINSTARI:
1825        case OP_NOTMINUPTO:
1826        case OP_NOTMINUPTOI:
1827        case OP_NOTPLUS:
1828        case OP_NOTPLUSI:
1829        case OP_NOTPOSPLUS:
1830        case OP_NOTPOSPLUSI:
1831        case OP_NOTPOSQUERY:
1832        case OP_NOTPOSQUERYI:
1833        case OP_NOTPOSSTAR:
1834        case OP_NOTPOSSTARI:
1835        case OP_NOTPOSUPTO:
1836        case OP_NOTPOSUPTOI:
1837        case OP_NOTQUERY:
1838        case OP_NOTQUERYI:
1839        case OP_NOTSTAR:
1840        case OP_NOTSTARI:
1841        case OP_NOTUPTO:
1842        case OP_NOTUPTOI:
1843        case OP_PLUS:
1844        case OP_PLUSI:
1845        case OP_POSPLUS:
1846        case OP_POSPLUSI:
1847        case OP_POSQUERY:
1848        case OP_POSQUERYI:
1849        case OP_POSSTAR:
1850        case OP_POSSTARI:
1851        case OP_POSUPTO:
1852        case OP_POSUPTOI:
1853        case OP_QUERY:
1854        case OP_QUERYI:
1855        case OP_REF:
1856        case OP_REFI:
1857        case OP_SBRA:
1858        case OP_SBRAPOS:
1859        case OP_SCBRA:
1860        case OP_SCBRAPOS:
1861        case OP_SCOND:
1862        case OP_SKIPZERO:
1863        case OP_STAR:
1864        case OP_STARI:
1865        case OP_TYPEMINPLUS:
1866        case OP_TYPEMINQUERY:
1867        case OP_TYPEMINSTAR:
1868        case OP_TYPEMINUPTO:
1869        case OP_TYPEPLUS:
1870        case OP_TYPEPOSPLUS:
1871        case OP_TYPEPOSQUERY:
1872        case OP_TYPEPOSSTAR:
1873        case OP_TYPEPOSUPTO:
1874        case OP_TYPEQUERY:
1875        case OP_TYPESTAR:
1876        case OP_TYPEUPTO:
1877        case OP_UPTO:
1878        case OP_UPTOI:
1879      return -1;      return -1;
1880    
1881        /* Catch unrecognized opcodes so that when new ones are added they
1882        are not forgotten, as has happened in the past. */
1883    
1884        default:
1885        return -4;
1886      }      }
1887    }    }
1888  /* Control never gets here */  /* Control never gets here */
# Line 1693  Arguments: Line 1909  Arguments:
1909  Returns:      pointer to the opcode for the bracket, or NULL if not found  Returns:      pointer to the opcode for the bracket, or NULL if not found
1910  */  */
1911    
1912  const uschar *  const pcre_uchar *
1913  _pcre_find_bracket(const uschar *code, BOOL utf8, int number)  _pcre_find_bracket(const pcre_uchar *code, BOOL utf8, int number)
1914  {  {
1915  for (;;)  for (;;)
1916    {    {
# Line 1712  for (;;) Line 1928  for (;;)
1928    
1929    else if (c == OP_REVERSE)    else if (c == OP_REVERSE)
1930      {      {
1931      if (number < 0) return (uschar *)code;      if (number < 0) return (pcre_uchar *)code;
1932      code += _pcre_OP_lengths[c];      code += _pcre_OP_lengths[c];
1933      }      }
1934    
# Line 1722  for (;;) Line 1938  for (;;)
1938             c == OP_CBRAPOS || c == OP_SCBRAPOS)             c == OP_CBRAPOS || c == OP_SCBRAPOS)
1939      {      {
1940      int n = GET2(code, 1+LINK_SIZE);      int n = GET2(code, 1+LINK_SIZE);
1941      if (n == number) return (uschar *)code;      if (n == number) return (pcre_uchar *)code;
1942      code += _pcre_OP_lengths[c];      code += _pcre_OP_lengths[c];
1943      }      }
1944    
# Line 1761  for (;;) Line 1977  for (;;)
1977        break;        break;
1978    
1979        case OP_THEN_ARG:        case OP_THEN_ARG:
1980        code += code[1+LINK_SIZE];        code += code[1];
1981        break;        break;
1982        }        }
1983    
# Line 1830  Arguments: Line 2046  Arguments:
2046  Returns:      pointer to the opcode for OP_RECURSE, or NULL if not found  Returns:      pointer to the opcode for OP_RECURSE, or NULL if not found
2047  */  */
2048    
2049  static const uschar *  static const pcre_uchar *
2050  find_recurse(const uschar *code, BOOL utf8)  find_recurse(const pcre_uchar *code, BOOL utf8)
2051  {  {
2052  for (;;)  for (;;)
2053    {    {
# Line 1880  for (;;) Line 2096  for (;;)
2096        break;        break;
2097    
2098        case OP_THEN_ARG:        case OP_THEN_ARG:
2099        code += code[1+LINK_SIZE];        code += code[1];
2100        break;        break;
2101        }        }
2102    
# Line 1957  Returns:      TRUE if what is matched co Line 2173  Returns:      TRUE if what is matched co
2173  */  */
2174    
2175  static BOOL  static BOOL
2176  could_be_empty_branch(const uschar *code, const uschar *endcode, BOOL utf8,  could_be_empty_branch(const pcre_uchar *code, const pcre_uchar *endcode,
2177    compile_data *cd)    BOOL utf8, compile_data *cd)
2178  {  {
2179  register int c;  register int c;
2180  for (code = first_significant_code(code + _pcre_OP_lengths[*code], TRUE);  for (code = first_significant_code(code + _pcre_OP_lengths[*code], TRUE);
2181       code < endcode;       code < endcode;
2182       code = first_significant_code(code + _pcre_OP_lengths[c], TRUE))       code = first_significant_code(code + _pcre_OP_lengths[c], TRUE))
2183    {    {
2184    const uschar *ccode;    const pcre_uchar *ccode;
2185    
2186    c = *code;    c = *code;
2187    
# Line 1988  for (code = first_significant_code(code Line 2204  for (code = first_significant_code(code
2204    
2205    if (c == OP_RECURSE)    if (c == OP_RECURSE)
2206      {      {
2207      const uschar *scode;      const pcre_uchar *scode;
2208      BOOL empty_branch;      BOOL empty_branch;
2209    
2210      /* Test for forward reference */      /* Test for forward reference */
# Line 2045  for (code = first_significant_code(code Line 2261  for (code = first_significant_code(code
2261    
2262    if (c == OP_BRA  || c == OP_BRAPOS ||    if (c == OP_BRA  || c == OP_BRAPOS ||
2263        c == OP_CBRA || c == OP_CBRAPOS ||        c == OP_CBRA || c == OP_CBRAPOS ||
2264        c == OP_ONCE || c == OP_COND)        c == OP_ONCE || c == OP_ONCE_NC ||
2265          c == OP_COND)
2266      {      {
2267      BOOL empty_branch;      BOOL empty_branch;
2268      if (GET(code, 1) == 0) return TRUE;    /* Hit unclosed bracket */      if (GET(code, 1) == 0) return TRUE;    /* Hit unclosed bracket */
# Line 2217  for (code = first_significant_code(code Line 2434  for (code = first_significant_code(code
2434      break;      break;
2435    
2436      case OP_THEN_ARG:      case OP_THEN_ARG:
2437      code += code[1+LINK_SIZE];      code += code[1];
2438      break;      break;
2439    
2440      /* None of the remaining opcodes are required to match a character. */      /* None of the remaining opcodes are required to match a character. */
# Line 2254  Returns:      TRUE if what is matched co Line 2471  Returns:      TRUE if what is matched co
2471  */  */
2472    
2473  static BOOL  static BOOL
2474  could_be_empty(const uschar *code, const uschar *endcode, branch_chain *bcptr,  could_be_empty(const pcre_uchar *code, const pcre_uchar *endcode,
2475    BOOL utf8, compile_data *cd)    branch_chain *bcptr, BOOL utf8, compile_data *cd)
2476  {  {
2477  while (bcptr != NULL && bcptr->current_branch >= code)  while (bcptr != NULL && bcptr->current_branch >= code)
2478    {    {
# Line 2295  I think. Line 2512  I think.
2512  A user pointed out that PCRE was rejecting [:a[:digit:]] whereas Perl was not.  A user pointed out that PCRE was rejecting [:a[:digit:]] whereas Perl was not.
2513  It seems that the appearance of a nested POSIX class supersedes an apparent  It seems that the appearance of a nested POSIX class supersedes an apparent
2514  external class. For example, [:a[:digit:]b:] matches "a", "b", ":", or  external class. For example, [:a[:digit:]b:] matches "a", "b", ":", or
2515  a digit. Also, unescaped square brackets may also appear as part of class  a digit.
2516  names. For example, [:a[:abc]b:] gives unknown class "[:abc]b:]"in Perl.  
2517    In Perl, unescaped square brackets may also appear as part of class names. For
2518    example, [:a[:abc]b:] gives unknown POSIX class "[:abc]b:]". However, for
2519    [:a[:abc]b][b:] it gives unknown POSIX class "[:abc]b][b:]", which does not
2520    seem right at all. PCRE does not allow closing square brackets in POSIX class
2521    names.
2522    
2523  Arguments:  Arguments:
2524    ptr      pointer to the initial [    ptr      pointer to the initial [
# Line 2306  Returns:   TRUE or FALSE Line 2528  Returns:   TRUE or FALSE
2528  */  */
2529    
2530  static BOOL  static BOOL
2531  check_posix_syntax(const uschar *ptr, const uschar **endptr)  check_posix_syntax(const pcre_uchar *ptr, const pcre_uchar **endptr)
2532  {  {
2533  int terminator;          /* Don't combine these lines; the Solaris cc */  int terminator;          /* Don't combine these lines; the Solaris cc */
2534  terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */  terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */
# Line 2314  for (++ptr; *ptr != 0; ptr++) Line 2536  for (++ptr; *ptr != 0; ptr++)
2536    {    {
2537    if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)    if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
2538      ptr++;      ptr++;
2539      else if (*ptr == CHAR_RIGHT_SQUARE_BRACKET) return FALSE;
2540    else    else
2541      {      {
2542      if (*ptr == terminator && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)      if (*ptr == terminator && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
# Line 2349  Returns:     a value representing the na Line 2572  Returns:     a value representing the na
2572  */  */
2573    
2574  static int  static int
2575  check_posix_name(const uschar *ptr, int len)  check_posix_name(const pcre_uchar *ptr, int len)
2576  {  {
2577  const char *pn = posix_names;  const char *pn = posix_names;
2578  register int yield = 0;  register int yield = 0;
# Line 2396  Returns:     nothing Line 2619  Returns:     nothing
2619  */  */
2620    
2621  static void  static void
2622  adjust_recurse(uschar *group, int adjust, BOOL utf8, compile_data *cd,  adjust_recurse(pcre_uchar *group, int adjust, BOOL utf8, compile_data *cd,
2623    uschar *save_hwm)    pcre_uchar *save_hwm)
2624  {  {
2625  uschar *ptr = group;  pcre_uchar *ptr = group;
2626    
2627  while ((ptr = (uschar *)find_recurse(ptr, utf8)) != NULL)  while ((ptr = (pcre_uchar *)find_recurse(ptr, utf8)) != NULL)
2628    {    {
2629    int offset;    int offset;
2630    uschar *hc;    pcre_uchar *hc;
2631    
2632    /* See if this recursion is on the forward reference list. If so, adjust the    /* See if this recursion is on the forward reference list. If so, adjust the
2633    reference. */    reference. */
# Line 2449  Arguments: Line 2672  Arguments:
2672  Returns:         new code pointer  Returns:         new code pointer
2673  */  */
2674    
2675  static uschar *  static pcre_uchar *
2676  auto_callout(uschar *code, const uschar *ptr, compile_data *cd)  auto_callout(pcre_uchar *code, const pcre_uchar *ptr, compile_data *cd)
2677  {  {
2678  *code++ = OP_CALLOUT;  *code++ = OP_CALLOUT;
2679  *code++ = 255;  *code++ = 255;
2680  PUT(code, 0, (int)(ptr - cd->start_pattern));  /* Pattern offset */  PUT(code, 0, (int)(ptr - cd->start_pattern));  /* Pattern offset */
2681  PUT(code, LINK_SIZE, 0);                       /* Default length */  PUT(code, LINK_SIZE, 0);                       /* Default length */
2682  return code + 2*LINK_SIZE;  return code + 2 * LINK_SIZE;
2683  }  }
2684    
2685    
# Line 2478  Returns:             nothing Line 2701  Returns:             nothing
2701  */  */
2702    
2703  static void  static void
2704  complete_callout(uschar *previous_callout, const uschar *ptr, compile_data *cd)  complete_callout(pcre_uchar *previous_callout, const pcre_uchar *ptr, compile_data *cd)
2705  {  {
2706  int length = (int)(ptr - cd->start_pattern - GET(previous_callout, 2));  int length = (int)(ptr - cd->start_pattern - GET(previous_callout, 2));
2707  PUT(previous_callout, 2 + LINK_SIZE, length);  PUT(previous_callout, 2 + LINK_SIZE, length);
# Line 2616  Returns:        TRUE if possessifying is Line 2839  Returns:        TRUE if possessifying is
2839  */  */
2840    
2841  static BOOL  static BOOL
2842  check_auto_possessive(const uschar *previous, BOOL utf8, const uschar *ptr,  check_auto_possessive(const pcre_uchar *previous, BOOL utf8,
2843    int options, compile_data *cd)    const pcre_uchar *ptr, int options, compile_data *cd)
2844  {  {
2845  int c, next;  int c, next;
2846  int op_code = *previous++;  int op_code = *previous++;
# Line 3045  Returns:         TRUE on success Line 3268  Returns:         TRUE on success
3268  */  */
3269    
3270  static BOOL  static BOOL
3271  compile_branch(int *optionsptr, uschar **codeptr, const uschar **ptrptr,  compile_branch(int *optionsptr, pcre_uchar **codeptr,
3272    int *errorcodeptr, int *firstbyteptr, int *reqbyteptr, branch_chain *bcptr,    const pcre_uchar **ptrptr, int *errorcodeptr, int *firstbyteptr,
3273    int cond_depth, compile_data *cd, int *lengthptr)    int *reqbyteptr, branch_chain *bcptr, int cond_depth, compile_data *cd,
3274      int *lengthptr)
3275  {  {
3276  int repeat_type, op_type;  int repeat_type, op_type;
3277  int repeat_min = 0, repeat_max = 0;      /* To please picky compilers */  int repeat_min = 0, repeat_max = 0;      /* To please picky compilers */
# Line 3060  int options = *optionsptr; Line 3284  int options = *optionsptr;
3284  int after_manual_callout = 0;  int after_manual_callout = 0;
3285  int length_prevgroup = 0;  int length_prevgroup = 0;
3286  register int c;  register int c;
3287  register uschar *code = *codeptr;  register pcre_uchar *code = *codeptr;
3288  uschar *last_code = code;  pcre_uchar *last_code = code;
3289  uschar *orig_code = code;  pcre_uchar *orig_code = code;
3290  uschar *tempcode;  pcre_uchar *tempcode;
3291  BOOL inescq = FALSE;  BOOL inescq = FALSE;
3292  BOOL groupsetfirstbyte = FALSE;  BOOL groupsetfirstbyte = FALSE;
3293  const uschar *ptr = *ptrptr;  const pcre_uchar *ptr = *ptrptr;
3294  const uschar *tempptr;  const pcre_uchar *tempptr;
3295  const uschar *nestptr = NULL;  const pcre_uchar *nestptr = NULL;
3296  uschar *previous = NULL;  pcre_uchar *previous = NULL;
3297  uschar *previous_callout = NULL;  pcre_uchar *previous_callout = NULL;
3298  uschar *save_hwm = NULL;  pcre_uchar *save_hwm = NULL;
3299  uschar classbits[32];  pcre_uchar classbits[32];
3300    
3301  /* We can fish out the UTF-8 setting once and for all into a BOOL, but we  /* We can fish out the UTF-8 setting once and for all into a BOOL, but we
3302  must not do this for other options (e.g. PCRE_EXTENDED) because they may change  must not do this for other options (e.g. PCRE_EXTENDED) because they may change
# Line 3081  dynamically as we process the pattern. * Line 3305  dynamically as we process the pattern. *
3305  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
3306  BOOL class_utf8;  BOOL class_utf8;
3307  BOOL utf8 = (options & PCRE_UTF8) != 0;  BOOL utf8 = (options & PCRE_UTF8) != 0;
3308  uschar *class_utf8data;  pcre_uint8 *class_utf8data;
3309  uschar *class_utf8data_base;  pcre_uint8 *class_utf8data_base;
3310  uschar utf8_char[6];  pcre_uint8 utf8_char[6];
3311  #else  #else
3312  BOOL utf8 = FALSE;  BOOL utf8 = FALSE;
 uschar *utf8_char = NULL;  
3313  #endif  #endif
3314    
3315  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
# Line 3137  for (;; ptr++) Line 3360  for (;; ptr++)
3360    int subfirstbyte;    int subfirstbyte;
3361    int terminator;    int terminator;
3362    int mclength;    int mclength;
3363    uschar mcbuffer[8];    int tempbracount;
3364      pcre_uchar mcbuffer[8];
3365    
3366    /* Get next byte in the pattern */    /* Get next byte in the pattern */
3367    
# Line 3184  for (;; ptr++) Line 3408  for (;; ptr++)
3408        }        }
3409    
3410      *lengthptr += (int)(code - last_code);      *lengthptr += (int)(code - last_code);
3411      DPRINTF(("length=%d added %d c=%c\n", *lengthptr, code - last_code, c));      DPRINTF(("length=%d added %d c=%c\n", *lengthptr, (int)(code - last_code),
3412          c));
3413    
3414      /* If "previous" is set and it is not at the start of the work space, move      /* If "previous" is set and it is not at the start of the work space, move
3415      it back to there, in order to avoid filling up the work space. Otherwise,      it back to there, in order to avoid filling up the work space. Otherwise,
# Line 3435  for (;; ptr++) Line 3660  for (;; ptr++)
3660      than 256), because in that case the compiled code doesn't use the bit map.      than 256), because in that case the compiled code doesn't use the bit map.
3661      */      */
3662    
3663      memset(classbits, 0, 32 * sizeof(uschar));      memset(classbits, 0, 32 * sizeof(pcre_uint8));
3664    
3665  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
3666      class_utf8 = FALSE;                       /* No chars >= 256 */      class_utf8 = FALSE;                       /* No chars >= 256 */
# Line 3449  for (;; ptr++) Line 3674  for (;; ptr++)
3674    
3675      if (c != 0) do      if (c != 0) do
3676        {        {
3677        const uschar *oldptr;        const pcre_uchar *oldptr;
3678    
3679  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
3680        if (utf8 && c > 127)        if (utf8 && c > 127)
# Line 3495  for (;; ptr++) Line 3720  for (;; ptr++)
3720          {          {
3721          BOOL local_negate = FALSE;          BOOL local_negate = FALSE;
3722          int posix_class, taboffset, tabopt;          int posix_class, taboffset, tabopt;
3723          register const uschar *cbits = cd->cbits;          register const pcre_uint8 *cbits = cd->cbits;
3724          uschar pbits[32];          pcre_uint8 pbits[32];
3725    
3726          if (ptr[1] != CHAR_COLON)          if (ptr[1] != CHAR_COLON)
3727            {            {
# Line 3551  for (;; ptr++) Line 3776  for (;; ptr++)
3776          /* Copy in the first table (always present) */          /* Copy in the first table (always present) */
3777    
3778          memcpy(pbits, cbits + posix_class_maps[posix_class],          memcpy(pbits, cbits + posix_class_maps[posix_class],
3779            32 * sizeof(uschar));            32 * sizeof(pcre_uint8));
3780    
3781          /* If there is a second table, add or remove it as required. */          /* If there is a second table, add or remove it as required. */
3782    
# Line 3613  for (;; ptr++) Line 3838  for (;; ptr++)
3838    
3839          if (c < 0)          if (c < 0)
3840            {            {
3841            register const uschar *cbits = cd->cbits;            register const pcre_uint8 *cbits = cd->cbits;
3842            class_charcount += 2;     /* Greater than 1 is what matters */            class_charcount += 2;     /* Greater than 1 is what matters */
3843    
3844            switch (-c)            switch (-c)
# Line 4256  for (;; ptr++) Line 4481  for (;; ptr++)
4481      past, but it no longer happens for non-repeated recursions. In fact, the      past, but it no longer happens for non-repeated recursions. In fact, the
4482      repeated ones could be re-implemented independently so as not to need this,      repeated ones could be re-implemented independently so as not to need this,
4483      but for the moment we rely on the code for repeating groups. */      but for the moment we rely on the code for repeating groups. */
4484    
4485      if (*previous == OP_RECURSE)      if (*previous == OP_RECURSE)
4486        {        {
4487        memmove(previous + 1 + LINK_SIZE, previous, 1 + LINK_SIZE);        memmove(previous + 1 + LINK_SIZE, previous, 1 + LINK_SIZE);
# Line 4298  for (;; ptr++) Line 4523  for (;; ptr++)
4523  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
4524        if (utf8 && (code[-1] & 0x80) != 0)        if (utf8 && (code[-1] & 0x80) != 0)
4525          {          {
4526          uschar *lastchar = code - 1;          pcre_uchar *lastchar = code - 1;
4527          while((*lastchar & 0xc0) == 0x80) lastchar--;          while((*lastchar & 0xc0) == 0x80) lastchar--;
4528          c = code - lastchar;            /* Length of UTF-8 character */          c = code - lastchar;            /* Length of UTF-8 character */
4529          memcpy(utf8_char, lastchar, c); /* Save the char */          memcpy(utf8_char, lastchar, c); /* Save the char */
# Line 4360  for (;; ptr++) Line 4585  for (;; ptr++)
4585    
4586      else if (*previous < OP_EODN)      else if (*previous < OP_EODN)
4587        {        {
4588        uschar *oldcode;        pcre_uchar *oldcode;
4589        int prop_type, prop_value;        int prop_type, prop_value;
4590        op_type = OP_TYPESTAR - OP_STAR;  /* Use type opcodes */        op_type = OP_TYPESTAR - OP_STAR;  /* Use type opcodes */
4591        c = *previous;        c = *previous;
# Line 4582  for (;; ptr++) Line 4807  for (;; ptr++)
4807        {        {
4808        register int i;        register int i;
4809        int len = (int)(code - previous);        int len = (int)(code - previous);
4810        uschar *bralink = NULL;        pcre_uchar *bralink = NULL;
4811        uschar *brazeroptr = NULL;        pcre_uchar *brazeroptr = NULL;
4812    
4813        /* Repeating a DEFINE group is pointless, but Perl allows the syntax, so        /* Repeating a DEFINE group is pointless, but Perl allows the syntax, so
4814        we just ignore the repeat. */        we just ignore the repeat. */
# Line 4714  for (;; ptr++) Line 4939  for (;; ptr++)
4939              if (groupsetfirstbyte && reqbyte < 0) reqbyte = firstbyte;              if (groupsetfirstbyte && reqbyte < 0) reqbyte = firstbyte;
4940              for (i = 1; i < repeat_min; i++)              for (i = 1; i < repeat_min; i++)
4941                {                {
4942                uschar *hc;                pcre_uchar *hc;
4943                uschar *this_hwm = cd->hwm;                pcre_uchar *this_hwm = cd->hwm;
4944                memcpy(code, previous, len);                memcpy(code, previous, len);
4945                for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE)                for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE)
4946                  {                  {
# Line 4766  for (;; ptr++) Line 4991  for (;; ptr++)
4991    
4992          else for (i = repeat_max - 1; i >= 0; i--)          else for (i = repeat_max - 1; i >= 0; i--)
4993            {            {
4994            uschar *hc;            pcre_uchar *hc;
4995            uschar *this_hwm = cd->hwm;            pcre_uchar *this_hwm = cd->hwm;
4996    
4997            *code++ = OP_BRAZERO + repeat_type;            *code++ = OP_BRAZERO + repeat_type;
4998    
# Line 4800  for (;; ptr++) Line 5025  for (;; ptr++)
5025            {            {
5026            int oldlinkoffset;            int oldlinkoffset;
5027            int offset = (int)(code - bralink + 1);            int offset = (int)(code - bralink + 1);
5028            uschar *bra = code - offset;            pcre_uchar *bra = code - offset;
5029            oldlinkoffset = GET(bra, 1);            oldlinkoffset = GET(bra, 1);
5030            bralink = (oldlinkoffset == 0)? NULL : bralink - oldlinkoffset;            bralink = (oldlinkoffset == 0)? NULL : bralink - oldlinkoffset;
5031            *code++ = OP_KET;            *code++ = OP_KET;
# Line 4814  for (;; ptr++) Line 5039  for (;; ptr++)
5039        ONCE brackets can be converted into non-capturing brackets, as the        ONCE brackets can be converted into non-capturing brackets, as the
5040        behaviour of (?:xx)++ is the same as (?>xx)++ and this saves having to        behaviour of (?:xx)++ is the same as (?>xx)++ and this saves having to
5041        deal with possessive ONCEs specially.        deal with possessive ONCEs specially.
5042    
5043        Otherwise, if the quantifier was possessive, we convert the BRA code to        Otherwise, when we are doing the actual compile phase, check to see
5044        the POS form, and the KET code to KETRPOS. (It turns out to be convenient        whether this group is one that could match an empty string. If so,
5045        at runtime to detect this kind of subpattern at both the start and at the        convert the initial operator to the S form (e.g. OP_BRA -> OP_SBRA) so
5046        end.) The use of special opcodes makes it possible to reduce greatly the        that runtime checking can be done. [This check is also applied to ONCE
5047        stack usage in pcre_exec(). If the group is preceded by OP_BRAZERO,        groups at runtime, but in a different way.]
5048        convert this to OP_BRAPOSZERO. Then cancel the possessive flag so that  
5049        the default action below, of wrapping everything inside atomic brackets,        Then, if the quantifier was possessive and the bracket is not a
5050        does not happen.        conditional, we convert the BRA code to the POS form, and the KET code to
5051          KETRPOS. (It turns out to be convenient at runtime to detect this kind of
5052        Then, when we are doing the actual compile phase, check to see whether        subpattern at both the start and at the end.) The use of special opcodes
5053        this group is one that could match an empty string. If so, convert the        makes it possible to reduce greatly the stack usage in pcre_exec(). If
5054        initial operator to the S form (e.g. OP_BRA -> OP_SBRA) so that runtime        the group is preceded by OP_BRAZERO, convert this to OP_BRAPOSZERO.
5055        checking can be done. [This check is also applied to ONCE groups at  
5056        runtime, but in a different way.] */        Then, if the minimum number of matches is 1 or 0, cancel the possessive
5057          flag so that the default action below, of wrapping everything inside
5058          atomic brackets, does not happen. When the minimum is greater than 1,
5059          there will be earlier copies of the group, and so we still have to wrap
5060          the whole thing. */
5061    
5062        else        else
5063          {          {
5064          uschar *ketcode = code - 1 - LINK_SIZE;          pcre_uchar *ketcode = code - 1 - LINK_SIZE;
5065          uschar *bracode = ketcode - GET(ketcode, 1);          pcre_uchar *bracode = ketcode - GET(ketcode, 1);
5066    
5067          if (*bracode == OP_ONCE && possessive_quantifier) *bracode = OP_BRA;          /* Convert possessive ONCE brackets to non-capturing */
5068          if (*bracode == OP_ONCE)  
5069            if ((*bracode == OP_ONCE || *bracode == OP_ONCE_NC) &&
5070                possessive_quantifier) *bracode = OP_BRA;
5071    
5072            /* For non-possessive ONCE brackets, all we need to do is to
5073            set the KET. */
5074    
5075            if (*bracode == OP_ONCE || *bracode == OP_ONCE_NC)
5076            *ketcode = OP_KETRMAX + repeat_type;            *ketcode = OP_KETRMAX + repeat_type;
5077    
5078            /* Handle non-ONCE brackets and possessive ONCEs (which have been
5079            converted to non-capturing above). */
5080    
5081          else          else
5082            {            {
5083            if (possessive_quantifier)            /* In the compile phase, check for empty string matching. */
5084              {  
             *bracode += 1;                   /* Switch to xxxPOS opcodes */  
             *ketcode = OP_KETRPOS;  
             if (brazeroptr != NULL) *brazeroptr = OP_BRAPOSZERO;  
             possessive_quantifier = FALSE;  
             }  
           else *ketcode = OP_KETRMAX + repeat_type;  
   
5085            if (lengthptr == NULL)            if (lengthptr == NULL)
5086              {              {
5087              uschar *scode = bracode;              pcre_uchar *scode = bracode;
5088              do              do
5089                {                {
5090                if (could_be_empty_branch(scode, ketcode, utf8, cd))                if (could_be_empty_branch(scode, ketcode, utf8, cd))
# Line 4863  for (;; ptr++) Line 5096  for (;; ptr++)
5096                }                }
5097              while (*scode == OP_ALT);              while (*scode == OP_ALT);
5098              }              }
5099    
5100              /* Handle possessive quantifiers. */
5101    
5102              if (possessive_quantifier)
5103                {
5104                /* For COND brackets, we wrap the whole thing in a possessively
5105                repeated non-capturing bracket, because we have not invented POS
5106                versions of the COND opcodes. Because we are moving code along, we
5107                must ensure that any pending recursive references are updated. */
5108    
5109                if (*bracode == OP_COND || *bracode == OP_SCOND)
5110                  {
5111                  int nlen = (int)(code - bracode);
5112                  *code = OP_END;
5113                  adjust_recurse(bracode, 1 + LINK_SIZE, utf8, cd, save_hwm);
5114                  memmove(bracode + 1+LINK_SIZE, bracode, nlen);
5115                  code += 1 + LINK_SIZE;
5116                  nlen += 1 + LINK_SIZE;
5117                  *bracode = OP_BRAPOS;
5118                  *code++ = OP_KETRPOS;
5119                  PUTINC(code, 0, nlen);
5120                  PUT(bracode, 1, nlen);
5121                  }
5122    
5123                /* For non-COND brackets, we modify the BRA code and use KETRPOS. */
5124    
5125                else
5126                  {
5127                  *bracode += 1;              /* Switch to xxxPOS opcodes */
5128                  *ketcode = OP_KETRPOS;
5129                  }
5130    
5131                /* If the minimum is zero, mark it as possessive, then unset the
5132                possessive flag when the minimum is 0 or 1. */
5133    
5134                if (brazeroptr != NULL) *brazeroptr = OP_BRAPOSZERO;
5135                if (repeat_min < 2) possessive_quantifier = FALSE;
5136                }
5137    
5138              /* Non-possessive quantifier */
5139    
5140              else *ketcode = OP_KETRMAX + repeat_type;
5141            }            }
5142          }          }
5143        }        }
# Line 4889  for (;; ptr++) Line 5164  for (;; ptr++)
5164      notation is just syntactic sugar, taken from Sun's Java package, but the      notation is just syntactic sugar, taken from Sun's Java package, but the
5165      special opcodes can optimize it.      special opcodes can optimize it.
5166    
5167      Possessively repeated subpatterns have already been handled in the code      Some (but not all) possessively repeated subpatterns have already been
5168      just above, so possessive_quantifier is always FALSE for them at this      completely handled in the code just above. For them, possessive_quantifier
5169      stage.      is always FALSE at this stage.
5170    
5171      Note that the repeated item starts at tempcode, not at previous, which      Note that the repeated item starts at tempcode, not at previous, which
5172      might be the first part of a string whose (former) last char we repeated.      might be the first part of a string whose (former) last char we repeated.
# Line 4992  for (;; ptr++) Line 5267  for (;; ptr++)
5267        int i, namelen;        int i, namelen;
5268        int arglen = 0;        int arglen = 0;
5269        const char *vn = verbnames;        const char *vn = verbnames;
5270        const uschar *name = ptr + 1;        const pcre_uchar *name = ptr + 1;
5271        const uschar *arg = NULL;        const pcre_uchar *arg = NULL;
5272        previous = NULL;        previous = NULL;
5273        while ((cd->ctypes[*++ptr] & ctype_letter) != 0) {};        while ((cd->ctypes[*++ptr] & ctype_letter) != 0) {};
5274        namelen = (int)(ptr - name);        namelen = (int)(ptr - name);
# Line 5040  for (;; ptr++) Line 5315  for (;; ptr++)
5315                PUT2INC(code, 0, oc->number);                PUT2INC(code, 0, oc->number);
5316                }                }
5317              *code++ = (cd->assert_depth > 0)? OP_ASSERT_ACCEPT : OP_ACCEPT;              *code++ = (cd->assert_depth > 0)? OP_ASSERT_ACCEPT : OP_ACCEPT;
5318    
5319                /* Do not set firstbyte after *ACCEPT */
5320                if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
5321              }              }
5322    
5323            /* Handle other cases with/without an argument */            /* Handle other cases with/without an argument */
# Line 5052  for (;; ptr++) Line 5330  for (;; ptr++)
5330                goto FAILED;                goto FAILED;
5331                }                }
5332              *code = verbs[i].op;              *code = verbs[i].op;
5333              if (*code++ == OP_THEN)              if (*code++ == OP_THEN) cd->external_flags |= PCRE_HASTHEN;
               {  
               PUT(code, 0, code - bcptr->current_branch - 1);  
               code += LINK_SIZE;  
               }  
5334              }              }
5335    
5336            else            else
# Line 5067  for (;; ptr++) Line 5341  for (;; ptr++)
5341                goto FAILED;                goto FAILED;
5342                }                }
5343              *code = verbs[i].op_arg;              *code = verbs[i].op_arg;
5344              if (*code++ == OP_THEN_ARG)              if (*code++ == OP_THEN_ARG) cd->external_flags |= PCRE_HASTHEN;
               {  
               PUT(code, 0, code - bcptr->current_branch - 1);  
               code += LINK_SIZE;  
               }  
5345              *code++ = arglen;              *code++ = arglen;
5346              memcpy(code, arg, arglen);              memcpy(code, arg, arglen);
5347              code += arglen;              code += arglen;
# Line 5096  for (;; ptr++) Line 5366  for (;; ptr++)
5366        {        {
5367        int i, set, unset, namelen;        int i, set, unset, namelen;
5368        int *optset;        int *optset;
5369        const uschar *name;        const pcre_uchar *name;
5370        uschar *slot;        pcre_uchar *slot;
5371    
5372        switch (*(++ptr))        switch (*(++ptr))
5373          {          {
# Line 5386  for (;; ptr++) Line 5656  for (;; ptr++)
5656    
5657          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
5658          case CHAR_C:                 /* Callout - may be followed by digits; */          case CHAR_C:                 /* Callout - may be followed by digits; */
5659          previous_callout = code;  /* Save for later completion */          previous_callout = code;     /* Save for later completion */
5660          after_manual_callout = 1; /* Skip one item before completing */          after_manual_callout = 1;    /* Skip one item before completing */
5661          *code++ = OP_CALLOUT;          *code++ = OP_CALLOUT;
5662            {            {
5663            int n = 0;            int n = 0;
# Line 5523  for (;; ptr++) Line 5793  for (;; ptr++)
5793    
5794              if (!dupname)              if (!dupname)
5795                {                {
5796                uschar *cslot = cd->name_table;                pcre_uchar *cslot = cd->name_table;
5797                for (i = 0; i < cd->names_found; i++)                for (i = 0; i < cd->names_found; i++)
5798                  {                  {
5799                  if (cslot != slot)                  if (cslot != slot)
# Line 5579  for (;; ptr++) Line 5849  for (;; ptr++)
5849    
5850          if (lengthptr != NULL)          if (lengthptr != NULL)
5851            {            {
5852            const uschar *temp;            const pcre_uchar *temp;
5853    
5854            if (namelen == 0)            if (namelen == 0)
5855              {              {
# Line 5661  for (;; ptr++) Line 5931  for (;; ptr++)
5931          case CHAR_0: case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4:          case CHAR_0: case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4:
5932          case CHAR_5: case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9:          case CHAR_5: case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9:
5933            {            {
5934            const uschar *called;            const pcre_uchar *called;
5935            terminator = CHAR_RIGHT_PARENTHESIS;            terminator = CHAR_RIGHT_PARENTHESIS;
5936    
5937            /* Come here from the \g<...> and \g'...' code (Oniguruma            /* Come here from the \g<...> and \g'...' code (Oniguruma
# Line 5906  for (;; ptr++) Line 6176  for (;; ptr++)
6176      *code = bravalue;      *code = bravalue;
6177      tempcode = code;      tempcode = code;
6178      tempreqvary = cd->req_varyopt;        /* Save value before bracket */      tempreqvary = cd->req_varyopt;        /* Save value before bracket */
6179        tempbracount = cd->bracount;          /* Save value before bracket */
6180      length_prevgroup = 0;                 /* Initialize for pre-compile phase */      length_prevgroup = 0;                 /* Initialize for pre-compile phase */
6181    
6182      if (!compile_regex(      if (!compile_regex(
# Line 5928  for (;; ptr++) Line 6199  for (;; ptr++)
6199           ))           ))
6200        goto FAILED;        goto FAILED;
6201    
6202        /* If this was an atomic group and there are no capturing groups within it,
6203        generate OP_ONCE_NC instead of OP_ONCE. */
6204    
6205        if (bravalue == OP_ONCE && cd->bracount <= tempbracount)
6206          *code = OP_ONCE_NC;
6207    
6208      if (bravalue >= OP_ASSERT && bravalue <= OP_ASSERTBACK_NOT)      if (bravalue >= OP_ASSERT && bravalue <= OP_ASSERTBACK_NOT)
6209        cd->assert_depth -= 1;        cd->assert_depth -= 1;
6210    
6211      /* At the end of compiling, code is still pointing to the start of the      /* At the end of compiling, code is still pointing to the start of the
6212      group, while tempcode has been updated to point past the end of the group      group, while tempcode has been updated to point past the end of the group.
6213      and any option resetting that may follow it. The pattern pointer (ptr)      The pattern pointer (ptr) is on the bracket.
     is on the bracket. */  
6214    
6215      /* If this is a conditional bracket, check that there are no more than      If this is a conditional bracket, check that there are no more than
6216      two branches in the group, or just one if it's a DEFINE group. We do this      two branches in the group, or just one if it's a DEFINE group. We do this
6217      in the real compile phase, not in the pre-pass, where the whole group may      in the real compile phase, not in the pre-pass, where the whole group may
6218      not be available. */      not be available. */
6219    
6220      if (bravalue == OP_COND && lengthptr == NULL)      if (bravalue == OP_COND && lengthptr == NULL)
6221        {        {
6222        uschar *tc = code;        pcre_uchar *tc = code;
6223        int condcount = 0;        int condcount = 0;
6224    
6225        do {        do {
# Line 6118  for (;; ptr++) Line 6394  for (;; ptr++)
6394    
6395        if (-c == ESC_g)        if (-c == ESC_g)
6396          {          {
6397          const uschar *p;          const pcre_uchar *p;
6398          save_hwm = cd->hwm;   /* Normally this is set when '(' is read */          save_hwm = cd->hwm;   /* Normally this is set when '(' is read */
6399          terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?          terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?
6400            CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;            CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;
# Line 6258  for (;; ptr++) Line 6534  for (;; ptr++)
6534            }            }
6535          else          else
6536  #endif  #endif
6537            {          /* In non-UTF-8 mode, we turn \C into OP_ALLANY instead of OP_ANYBYTE
6538            so that it works in DFA mode and in lookbehinds. */
6539    
6540              {
6541            previous = (-c > ESC_b && -c < ESC_Z)? code : NULL;            previous = (-c > ESC_b && -c < ESC_Z)? code : NULL;
6542            *code++ = -c;            *code++ = (!utf8 && c == -ESC_C)? OP_ALLANY : -c;
6543            }            }
6544          }          }
6545        continue;        continue;
# Line 6335  for (;; ptr++) Line 6614  for (;; ptr++)
6614        else firstbyte = reqbyte = REQ_NONE;        else firstbyte = reqbyte = REQ_NONE;
6615        }        }
6616    
6617      /* firstbyte was previously set; we can set reqbyte only the length is      /* firstbyte was previously set; we can set reqbyte only if the length is
6618      1 or the matching is caseful. */      1 or the matching is caseful. */
6619    
6620      else      else
# Line 6394  Returns:         TRUE on success Line 6673  Returns:         TRUE on success
6673  */  */
6674    
6675  static BOOL  static BOOL
6676  compile_regex(int options, uschar **codeptr, const uschar **ptrptr,  compile_regex(int options, pcre_uchar **codeptr, const pcre_uchar **ptrptr,
6677    int *errorcodeptr, BOOL lookbehind, BOOL reset_bracount, int skipbytes,    int *errorcodeptr, BOOL lookbehind, BOOL reset_bracount, int skipbytes,
6678    int cond_depth, int *firstbyteptr, int *reqbyteptr, branch_chain *bcptr,    int cond_depth, int *firstbyteptr, int *reqbyteptr, branch_chain *bcptr,
6679    compile_data *cd, int *lengthptr)    compile_data *cd, int *lengthptr)
6680  {  {
6681  const uschar *ptr = *ptrptr;  const pcre_uchar *ptr = *ptrptr;
6682  uschar *code = *codeptr;  pcre_uchar *code = *codeptr;
6683  uschar *last_branch = code;  pcre_uchar *last_branch = code;
6684  uschar *start_bracket = code;  pcre_uchar *start_bracket = code;
6685  uschar *reverse_count = NULL;  pcre_uchar *reverse_count = NULL;
6686  open_capitem capitem;  open_capitem capitem;
6687  int capnumber = 0;  int capnumber = 0;
6688  int firstbyte, reqbyte;  int firstbyte, reqbyte;
# Line 6552  for (;;) Line 6831  for (;;)
6831          }          }
6832        else if (fixed_length < 0)        else if (fixed_length < 0)
6833          {          {
6834          *errorcodeptr = (fixed_length == -2)? ERR36 : ERR25;          *errorcodeptr = (fixed_length == -2)? ERR36 :
6835                            (fixed_length == -4)? ERR70: ERR25;
6836          *ptrptr = ptr;          *ptrptr = ptr;
6837          return FALSE;          return FALSE;
6838          }          }
# Line 6699  Returns:     TRUE or FALSE Line 6979  Returns:     TRUE or FALSE
6979  */  */
6980    
6981  static BOOL  static BOOL
6982  is_anchored(register const uschar *code, unsigned int bracket_map,  is_anchored(register const pcre_uchar *code, unsigned int bracket_map,
6983    unsigned int backref_map)    unsigned int backref_map)
6984  {  {
6985  do {  do {
6986     const uschar *scode = first_significant_code(code + _pcre_OP_lengths[*code],     const pcre_uchar *scode = first_significant_code(
6987       FALSE);       code + _pcre_OP_lengths[*code], FALSE);
6988     register int op = *scode;     register int op = *scode;
6989    
6990     /* Non-capturing brackets */     /* Non-capturing brackets */
# Line 6727  do { Line 7007  do {
7007    
7008     /* Other brackets */     /* Other brackets */
7009    
7010     else if (op == OP_ASSERT || op == OP_ONCE || op == OP_COND)     else if (op == OP_ASSERT || op == OP_ONCE || op == OP_ONCE_NC ||
7011                op == OP_COND)
7012       {       {
7013       if (!is_anchored(scode, bracket_map, backref_map)) return FALSE;       if (!is_anchored(scode, bracket_map, backref_map)) return FALSE;
7014       }       }
# Line 6775  Returns:         TRUE or FALSE Line 7056  Returns:         TRUE or FALSE
7056  */  */
7057    
7058  static BOOL  static BOOL
7059  is_startline(const uschar *code, unsigned int bracket_map,  is_startline(const pcre_uchar *code, unsigned int bracket_map,
7060    unsigned int backref_map)    unsigned int backref_map)
7061  {  {
7062  do {  do {
7063     const uschar *scode = first_significant_code(code + _pcre_OP_lengths[*code],     const pcre_uchar *scode = first_significant_code(
7064       FALSE);       code + _pcre_OP_lengths[*code], FALSE);
7065     register int op = *scode;     register int op = *scode;
7066    
7067     /* If we are at the start of a conditional assertion group, *both* the     /* If we are at the start of a conditional assertion group, *both* the
# Line 6831  do { Line 7112  do {
7112    
7113     /* Other brackets */     /* Other brackets */
7114    
7115     else if (op == OP_ASSERT || op == OP_ONCE)     else if (op == OP_ASSERT || op == OP_ONCE || op == OP_ONCE_NC)
7116       {       {
7117       if (!is_startline(scode, bracket_map, backref_map)) return FALSE;       if (!is_startline(scode, bracket_map, backref_map)) return FALSE;
7118       }       }
# Line 6878  Returns:     -1 or the fixed first char Line 7159  Returns:     -1 or the fixed first char
7159  */  */
7160    
7161  static int  static int
7162  find_firstassertedchar(const uschar *code, BOOL inassert)  find_firstassertedchar(const pcre_uchar *code, BOOL inassert)
7163  {  {
7164  register int c = -1;  register int c = -1;
7165  do {  do {
7166     int d;     int d;
7167     int xl = (*code == OP_CBRA || *code == OP_SCBRA ||     int xl = (*code == OP_CBRA || *code == OP_SCBRA ||
7168               *code == OP_CBRAPOS || *code == OP_SCBRAPOS)? 2:0;               *code == OP_CBRAPOS || *code == OP_SCBRAPOS)? 2:0;
7169     const uschar *scode = first_significant_code(code + 1+LINK_SIZE + xl, TRUE);     const pcre_uchar *scode = first_significant_code(code + 1+LINK_SIZE + xl,
7170         TRUE);
7171     register int op = *scode;     register int op = *scode;
7172    
7173     switch(op)     switch(op)
# Line 6901  do { Line 7183  do {
7183       case OP_SCBRAPOS:       case OP_SCBRAPOS:
7184       case OP_ASSERT:       case OP_ASSERT:
7185       case OP_ONCE:       case OP_ONCE:
7186         case OP_ONCE_NC:
7187       case OP_COND:       case OP_COND:
7188       if ((d = find_firstassertedchar(scode, op == OP_ASSERT)) < 0)       if ((d = find_firstassertedchar(scode, op == OP_ASSERT)) < 0)
7189         return -1;         return -1;
# Line 6964  Returns:        pointer to compiled data Line 7247  Returns:        pointer to compiled data
7247                  with errorptr and erroroffset set                  with errorptr and erroroffset set
7248  */  */
7249    
7250    #ifndef COMPILE_PCRE16
7251  PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION  PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION
7252  pcre_compile(const char *pattern, int options, const char **errorptr,  pcre_compile(const char *pattern, int options, const char **errorptr,
7253    int *erroroffset, const unsigned char *tables)    int *erroroffset, const unsigned char *tables)
7254    #else
7255    PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION
7256    pcre16_compile(PCRE_SPTR16 pattern, int options, const char **errorptr,
7257      int *erroroffset, const unsigned char *tables)
7258    #endif
7259  {  {
7260    #ifndef COMPILE_PCRE16
7261  return pcre_compile2(pattern, options, NULL, errorptr, erroroffset, tables);  return pcre_compile2(pattern, options, NULL, errorptr, erroroffset, tables);
7262    #else
7263    return pcre16_compile2(pattern, options, NULL, errorptr, erroroffset, tables);
7264    #endif
7265  }  }
7266    
7267    
7268    #ifndef COMPILE_PCRE16
7269  PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION  PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION
7270  pcre_compile2(const char *pattern, int options, int *errorcodeptr,  pcre_compile2(const char *pattern, int options, int *errorcodeptr,
7271    const char **errorptr, int *erroroffset, const unsigned char *tables)    const char **errorptr, int *erroroffset, const unsigned char *tables)
7272    #else
7273    PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION
7274    pcre16_compile2(PCRE_SPTR16 pattern, int options, int *errorcodeptr,
7275      const char **errorptr, int *erroroffset, const unsigned char *tables)
7276    #endif
7277  {  {
7278  real_pcre *re;  real_pcre *re;
7279  int length = 1;  /* For final END opcode */  int length = 1;  /* For final END opcode */
# Line 6983  int errorcode = 0; Line 7282  int errorcode = 0;
7282  int skipatstart = 0;  int skipatstart = 0;
7283  BOOL utf8;  BOOL utf8;
7284  size_t size;  size_t size;
7285  uschar *code;  pcre_uchar *code;
7286  const uschar *codestart;  const pcre_uchar *codestart;
7287  const uschar *ptr;  const pcre_uchar *ptr;
7288  compile_data compile_block;  compile_data compile_block;
7289  compile_data *cd = &compile_block;  compile_data *cd = &compile_block;
7290    
# Line 6995  as soon as possible, so that a fairly la Line 7294  as soon as possible, so that a fairly la
7294  this purpose. The same space is used in the second phase for remembering where  this purpose. The same space is used in the second phase for remembering where
7295  to fill in forward references to subpatterns. */  to fill in forward references to subpatterns. */
7296    
7297  uschar cworkspace[COMPILE_WORK_SIZE];  pcre_uchar cworkspace[COMPILE_WORK_SIZE];
7298    
7299  /* Set this early so that early errors get offset 0. */  /* Set this early so that early errors get offset 0. */
7300    
7301  ptr = (const uschar *)pattern;  ptr = (const pcre_uchar *)pattern;
7302    
7303  /* We can't pass back an error message if errorptr is NULL; I guess the best we  /* We can't pass back an error message if errorptr is NULL; I guess the best we
7304  can do is just return NULL, but we can set a code value if there is a code  can do is just return NULL, but we can set a code value if there is a code
# Line 7088  not used here. */ Line 7387  not used here. */
7387    
7388  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
7389  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 &&  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 &&
7390       (errorcode = _pcre_valid_utf8((USPTR)pattern, -1, erroroffset)) != 0)       (errorcode = _pcre_valid_utf8((PCRE_PUCHAR)pattern, -1, erroroffset)) != 0)
7391    {    {
7392    errorcode = ERR44;    errorcode = ERR44;
7393    goto PCRE_EARLY_ERROR_RETURN2;    goto PCRE_EARLY_ERROR_RETURN2;
# Line 7186  cd->name_table = NULL; Line 7485  cd->name_table = NULL;
7485  cd->start_workspace = cworkspace;  cd->start_workspace = cworkspace;
7486  cd->start_code = cworkspace;  cd->start_code = cworkspace;
7487  cd->hwm = cworkspace;  cd->hwm = cworkspace;
7488  cd->start_pattern = (const uschar *)pattern;  cd->start_pattern = (const pcre_uchar *)pattern;
7489  cd->end_pattern = (const uschar *)(pattern + strlen(pattern));  cd->end_pattern = (const pcre_uchar *)(pattern + strlen(pattern));
7490  cd->req_varyopt = 0;  cd->req_varyopt = 0;
7491  cd->external_options = options;  cd->external_options = options;
7492  cd->external_flags = 0;  cd->external_flags = 0;
# Line 7260  cd->final_bracount = cd->bracount;  /* S Line 7559  cd->final_bracount = cd->bracount;  /* S
7559  cd->assert_depth = 0;  cd->assert_depth = 0;
7560  cd->bracount = 0;  cd->bracount = 0;
7561  cd->names_found = 0;  cd->names_found = 0;
7562  cd->name_table = (uschar *)re + re->name_table_offset;  cd->name_table = (pcre_uchar *)re + re->name_table_offset;
7563  codestart = cd->name_table + re->name_entry_size * re->name_count;  codestart = cd->name_table + re->name_entry_size * re->name_count;
7564  cd->start_code = codestart;  cd->start_code = codestart;
7565  cd->hwm = cworkspace;  cd->hwm = cworkspace;
# Line 7273  cd->open_caps = NULL; Line 7572  cd->open_caps = NULL;
7572  error, errorcode will be set non-zero, so we don't need to look at the result  error, errorcode will be set non-zero, so we don't need to look at the result
7573  of the function here. */  of the function here. */
7574    
7575  ptr = (const uschar *)pattern + skipatstart;  ptr = (const pcre_uchar *)pattern + skipatstart;
7576  code = (uschar *)codestart;  code = (pcre_uchar *)codestart;
7577  *code = OP_BRA;  *code = OP_BRA;
7578  (void)compile_regex(re->options, &code, &ptr, &errorcode, FALSE, FALSE, 0, 0,  (void)compile_regex(re->options, &code, &ptr, &errorcode, FALSE, FALSE, 0, 0,
7579    &firstbyte, &reqbyte, NULL, cd, NULL);    &firstbyte, &reqbyte, NULL, cd, NULL);
# Line 7282  re->top_bracket = cd->bracount; Line 7581  re->top_bracket = cd->bracount;
7581  re->top_backref = cd->top_backref;  re->top_backref = cd->top_backref;
7582  re->flags = cd->external_flags;  re->flags = cd->external_flags;
7583    
7584  if (cd->had_accept) reqbyte = -1;   /* Must disable after (*ACCEPT) */  if (cd->had_accept) reqbyte = REQ_NONE;   /* Must disable after (*ACCEPT) */
7585    
7586  /* If not reached end of pattern on success, there's an excess bracket. */  /* If not reached end of pattern on success, there's an excess bracket. */
7587    
# Line 7302  if (code - codestart > length) errorcode Line 7601  if (code - codestart > length) errorcode
7601  while (errorcode == 0 && cd->hwm > cworkspace)  while (errorcode == 0 && cd->hwm > cworkspace)
7602    {    {
7603    int offset, recno;    int offset, recno;
7604    const uschar *groupptr;    const pcre_uchar *groupptr;
7605    cd->hwm -= LINK_SIZE;    cd->hwm -= LINK_SIZE;
7606    offset = GET(cd->hwm, 0);    offset = GET(cd->hwm, 0);
7607    recno = GET(codestart, offset);    recno = GET(codestart, offset);
7608    groupptr = _pcre_find_bracket(codestart, utf8, recno);    groupptr = _pcre_find_bracket(codestart, utf8, recno);
7609    if (groupptr == NULL) errorcode = ERR53;    if (groupptr == NULL) errorcode = ERR53;
7610      else PUT(((uschar *)codestart), offset, (int)(groupptr - codestart));      else PUT(((pcre_uchar *)codestart), offset, (int)(groupptr - codestart));
7611    }    }
7612    
7613  /* Give an error if there's back reference to a non-existent capturing  /* Give an error if there's back reference to a non-existent capturing
# Line 7326  length, and set their lengths. */ Line 7625  length, and set their lengths. */
7625    
7626  if (cd->check_lookbehind)  if (cd->check_lookbehind)
7627    {    {
7628    uschar *cc = (uschar *)codestart;    pcre_uchar *cc = (pcre_uchar *)codestart;
7629    
7630    /* Loop, searching for OP_REVERSE items, and process those that do not have    /* Loop, searching for OP_REVERSE items, and process those that do not have
7631    their length set. (Actually, it will also re-process any that have a length    their length set. (Actually, it will also re-process any that have a length
7632    of zero, but that is a pathological case, and it does no harm.) When we find    of zero, but that is a pathological case, and it does no harm.) When we find
7633    one, we temporarily terminate the branch it is in while we scan it. */    one, we temporarily terminate the branch it is in while we scan it. */
7634    
7635    for (cc = (uschar *)_pcre_find_bracket(codestart, utf8, -1);    for (cc = (pcre_uchar *)_pcre_find_bracket(codestart, utf8, -1);
7636         cc != NULL;         cc != NULL;
7637         cc = (uschar *)_pcre_find_bracket(cc, utf8, -1))         cc = (pcre_uchar *)_pcre_find_bracket(cc, utf8, -1))
7638      {      {
7639      if (GET(cc, 1) == 0)      if (GET(cc, 1) == 0)
7640        {        {
7641        int fixed_length;        int fixed_length;
7642        uschar *be = cc - 1 - LINK_SIZE + GET(cc, -LINK_SIZE);        pcre_uchar *be = cc - 1 - LINK_SIZE + GET(cc, -LINK_SIZE);
7643        int end_op = *be;        int end_op = *be;
7644        *be = OP_END;        *be = OP_END;
7645        fixed_length = find_fixedlength(cc, (re->options & PCRE_UTF8) != 0, TRUE,        fixed_length = find_fixedlength(cc, (re->options & PCRE_UTF8) != 0, TRUE,
# Line 7349  if (cd->check_lookbehind) Line 7648  if (cd->check_lookbehind)
7648        DPRINTF(("fixed length = %d\n", fixed_length));        DPRINTF(("fixed length = %d\n", fixed_length));
7649        if (fixed_length < 0)        if (fixed_length < 0)
7650          {          {
7651          errorcode = (fixed_length == -2)? ERR36 : ERR25;          errorcode = (fixed_length == -2)? ERR36 :
7652                        (fixed_length == -4)? ERR70 : ERR25;
7653          break;          break;
7654          }          }
7655        PUT(cc, 1, fixed_length);        PUT(cc, 1, fixed_length);
# Line 7364  if (errorcode != 0) Line 7664  if (errorcode != 0)
7664    {    {
7665    (pcre_free)(re);    (pcre_free)(re);
7666    PCRE_EARLY_ERROR_RETURN:    PCRE_EARLY_ERROR_RETURN:
7667    *erroroffset = (int)(ptr - (const uschar *)pattern);    *erroroffset = (int)(ptr - (const pcre_uchar *)pattern);
7668    PCRE_EARLY_ERROR_RETURN2:    PCRE_EARLY_ERROR_RETURN2:
7669    *errorptr = find_error_text(errorcode);    *errorptr = find_error_text(errorcode);
7670    if (errorcodeptr != NULL) *errorcodeptr = errorcode;    if (errorcodeptr != NULL) *errorcodeptr = errorcode;
# Line 7450  if (code - codestart > length) Line 7750  if (code - codestart > length)
7750    {    {
7751    (pcre_free)(re);    (pcre_free)(re);
7752    *errorptr = find_error_text(ERR23);    *errorptr = find_error_text(ERR23);
7753    *erroroffset = ptr - (uschar *)pattern;    *erroroffset = ptr - (pcre_uchar *)pattern;
7754    if (errorcodeptr != NULL) *errorcodeptr = ERR23;    if (errorcodeptr != NULL) *errorcodeptr = ERR23;
7755    return NULL;    return NULL;
7756    }    }

Legend:
Removed from v.654  
changed lines
  Added in v.759

  ViewVC Help
Powered by ViewVC 1.1.5