/[pcre]/code/branches/pcre16/pcre_compile.c
ViewVC logotype

Diff of /code/branches/pcre16/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

code/trunk/pcre_compile.c revision 716 by ph10, Tue Oct 4 16:38:05 2011 UTC code/branches/pcre16/pcre_compile.c revision 756 by ph10, Mon Nov 21 10:48:42 2011 UTC
# Line 231  static const char posix_names[] = Line 231  static const char posix_names[] =
231    STRING_graph0 STRING_print0 STRING_punct0 STRING_space0    STRING_graph0 STRING_print0 STRING_punct0 STRING_space0
232    STRING_word0  STRING_xdigit;    STRING_word0  STRING_xdigit;
233    
234  static const uschar posix_name_lengths[] = {  static const pcre_uint8 posix_name_lengths[] = {
235    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 };    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 };
236    
237  /* Table of class bit maps for each POSIX class. Each class is formed from a  /* Table of class bit maps for each POSIX class. Each class is formed from a
# Line 266  substitutes must be in the order of the Line 266  substitutes must be in the order of the
266  both positive and negative cases. NULL means no substitute. */  both positive and negative cases. NULL means no substitute. */
267    
268  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
269  static const uschar *substitutes[] = {  static const pcre_uchar literal_PNd[]  = { '\\', 'P', '{', 'N', 'd', '}', '\0' };
270    (uschar *)"\\P{Nd}",    /* \D */  static const pcre_uchar literal_pNd[]  = { '\\', 'p', '{', 'N', 'd', '}', '\0' };
271    (uschar *)"\\p{Nd}",    /* \d */  static const pcre_uchar literal_PXsp[] = { '\\', 'P', '{', 'X', 's', 'p', '}', '\0' };
272    (uschar *)"\\P{Xsp}",   /* \S */       /* NOTE: Xsp is Perl space */  static const pcre_uchar literal_pXsp[] = { '\\', 'p', '{', 'X', 's', 'p', '}', '\0' };
273    (uschar *)"\\p{Xsp}",   /* \s */  static const pcre_uchar literal_PXwd[] = { '\\', 'P', '{', 'X', 'w', 'd', '}', '\0' };
274    (uschar *)"\\P{Xwd}",   /* \W */  static const pcre_uchar literal_pXwd[] = { '\\', 'p', '{', 'X', 'w', 'd', '}', '\0' };
275    (uschar *)"\\p{Xwd}"    /* \w */  
276    static const pcre_uchar *substitutes[] = {
277      literal_PNd,           /* \D */
278      literal_pNd,           /* \d */
279      literal_PXsp,          /* \S */       /* NOTE: Xsp is Perl space */
280      literal_pXsp,          /* \s */
281      literal_PXwd,          /* \W */
282      literal_pXwd           /* \w */
283  };  };
284    
285  static const uschar *posix_substitutes[] = {  static const pcre_uchar literal_pL[] =   { '\\', 'p', '{', 'L', '}', '\0' };
286    (uschar *)"\\p{L}",     /* alpha */  static const pcre_uchar literal_pLl[] =  { '\\', 'p', '{', 'L', 'l', '}', '\0' };
287    (uschar *)"\\p{Ll}",    /* lower */  static const pcre_uchar literal_pLu[] =  { '\\', 'p', '{', 'L', 'u', '}', '\0' };
288    (uschar *)"\\p{Lu}",    /* upper */  static const pcre_uchar literal_pXan[] = { '\\', 'p', '{', 'X', 'a', 'n', '}', '\0' };
289    (uschar *)"\\p{Xan}",   /* alnum */  static const pcre_uchar literal_h[] =    { '\\', 'h', '\0' };
290    NULL,                   /* ascii */  static const pcre_uchar literal_pXps[] = { '\\', 'p', '{', 'X', 'p', 's', '}', '\0' };
291    (uschar *)"\\h",        /* blank */  static const pcre_uchar literal_PL[] =   { '\\', 'P', '{', 'L', '}', '\0' };
292    NULL,                   /* cntrl */  static const pcre_uchar literal_PLl[] =  { '\\', 'P', '{', 'L', 'l', '}', '\0' };
293    (uschar *)"\\p{Nd}",    /* digit */  static const pcre_uchar literal_PLu[] =  { '\\', 'P', '{', 'L', 'u', '}', '\0' };
294    NULL,                   /* graph */  static const pcre_uchar literal_PXan[] = { '\\', 'P', '{', 'X', 'a', 'n', '}', '\0' };
295    NULL,                   /* print */  static const pcre_uchar literal_H[] =    { '\\', 'H', '\0' };
296    NULL,                   /* punct */  static const pcre_uchar literal_PXps[] = { '\\', 'P', '{', 'X', 'p', 's', '}', '\0' };
297    (uschar *)"\\p{Xps}",   /* space */    /* NOTE: Xps is POSIX space */  
298    (uschar *)"\\p{Xwd}",   /* word */  static const pcre_uchar *posix_substitutes[] = {
299    NULL,                   /* xdigit */    literal_pL,            /* alpha */
300      literal_pLl,           /* lower */
301      literal_pLu,           /* upper */
302      literal_pXan,          /* alnum */
303      NULL,                  /* ascii */
304      literal_h,             /* blank */
305      NULL,                  /* cntrl */
306      literal_pNd,           /* digit */
307      NULL,                  /* graph */
308      NULL,                  /* print */
309      NULL,                  /* punct */
310      literal_pXps,          /* space */    /* NOTE: Xps is POSIX space */
311      literal_pXwd,          /* word */
312      NULL,                  /* xdigit */
313    /* Negated cases */    /* Negated cases */
314    (uschar *)"\\P{L}",     /* ^alpha */    literal_PL,            /* ^alpha */
315    (uschar *)"\\P{Ll}",    /* ^lower */    literal_PLl,           /* ^lower */
316    (uschar *)"\\P{Lu}",    /* ^upper */    literal_PLu,           /* ^upper */
317    (uschar *)"\\P{Xan}",   /* ^alnum */    literal_PXan,          /* ^alnum */
318    NULL,                   /* ^ascii */    NULL,                  /* ^ascii */
319    (uschar *)"\\H",        /* ^blank */    literal_H,             /* ^blank */
320    NULL,                   /* ^cntrl */    NULL,                  /* ^cntrl */
321    (uschar *)"\\P{Nd}",    /* ^digit */    literal_PNd,           /* ^digit */
322    NULL,                   /* ^graph */    NULL,                  /* ^graph */
323    NULL,                   /* ^print */    NULL,                  /* ^print */
324    NULL,                   /* ^punct */    NULL,                  /* ^punct */
325    (uschar *)"\\P{Xps}",   /* ^space */   /* NOTE: Xps is POSIX space */    literal_PXps,          /* ^space */   /* NOTE: Xps is POSIX space */
326    (uschar *)"\\P{Xwd}",   /* ^word */    literal_PXwd,          /* ^word */
327    NULL                    /* ^xdigit */    NULL                   /* ^xdigit */
328  };  };
329  #define POSIX_SUBSIZE (sizeof(posix_substitutes)/sizeof(uschar *))  #define POSIX_SUBSIZE (sizeof(posix_substitutes) / sizeof(pcre_uchar *))
330  #endif  #endif
331    
332  #define STRING(a)  # a  #define STRING(a)  # a
# Line 410  static const char error_texts[] = Line 430  static const char error_texts[] =
430    "this version of PCRE is not compiled with PCRE_UCP support\0"    "this version of PCRE is not compiled with PCRE_UCP support\0"
431    "\\c must be followed by an ASCII character\0"    "\\c must be followed by an ASCII character\0"
432    "\\k is not followed by a braced, angle-bracketed, or quoted name\0"    "\\k is not followed by a braced, angle-bracketed, or quoted name\0"
433      /* 70 */
434      "internal error: unknown opcode in find_fixedlength()\0"
435    ;    ;
436    
437  /* Table to identify digits and hex digits. This is used when compiling  /* Table to identify digits and hex digits. This is used when compiling
# Line 546  static const unsigned char ebcdic_charta Line 568  static const unsigned char ebcdic_charta
568  /* Definition to allow mutual recursion */  /* Definition to allow mutual recursion */
569    
570  static BOOL  static BOOL
571    compile_regex(int, uschar **, const uschar **, int *, BOOL, BOOL, int, int,    compile_regex(int, pcre_uchar **, const pcre_uchar **, int *, BOOL, BOOL, int, int,
572      int *, int *, branch_chain *, compile_data *, int *);      int *, int *, branch_chain *, compile_data *, int *);
573    
574    
# Line 593  Returns:    TRUE or FALSE Line 615  Returns:    TRUE or FALSE
615  */  */
616    
617  static BOOL  static BOOL
618  is_counted_repeat(const uschar *p)  is_counted_repeat(const pcre_uchar *p)
619  {  {
620  if ((digitab[*p++] & ctype_digit) == 0) return FALSE;  if ((digitab[*p++] & ctype_digit) == 0) return FALSE;
621  while ((digitab[*p] & ctype_digit) != 0) p++;  while ((digitab[*p] & ctype_digit) != 0) p++;
# Line 635  Returns:         zero or positive => a d Line 657  Returns:         zero or positive => a d
657  */  */
658    
659  static int  static int
660  check_escape(const uschar **ptrptr, int *errorcodeptr, int bracount,  check_escape(const pcre_uchar **ptrptr, int *errorcodeptr, int bracount,
661    int options, BOOL isclass)    int options, BOOL isclass)
662  {  {
663  BOOL utf8 = (options & PCRE_UTF8) != 0;  BOOL utf8 = (options & PCRE_UTF8) != 0;
664  const uschar *ptr = *ptrptr + 1;  const pcre_uchar *ptr = *ptrptr + 1;
665  int c, i;  int c, i;
666    
667  GETCHARINCTEST(c, ptr);           /* Get character value, increment pointer */  GETCHARINCTEST(c, ptr);           /* Get character value, increment pointer */
# Line 666  else if ((i = escapes[c - 0x48]) != 0) Line 688  else if ((i = escapes[c - 0x48]) != 0)
688    
689  else  else
690    {    {
691    const uschar *oldptr;    const pcre_uchar *oldptr;
692    BOOL braced, negated;    BOOL braced, negated;
693    
694    switch (c)    switch (c)
# Line 676  else Line 698  else
698    
699      case CHAR_l:      case CHAR_l:
700      case CHAR_L:      case CHAR_L:
701        *errorcodeptr = ERR37;
702        break;
703    
704      case CHAR_u:      case CHAR_u:
705        if ((options & PCRE_JAVASCRIPT_COMPAT) != 0)
706          {
707          /* In JavaScript, \u must be followed by four hexadecimal numbers.
708          Otherwise it is a lowercase u letter. */
709          if ((digitab[ptr[1]] & ctype_xdigit) != 0 && (digitab[ptr[2]] & ctype_xdigit) != 0
710               && (digitab[ptr[3]] & ctype_xdigit) != 0 && (digitab[ptr[4]] & ctype_xdigit) != 0)
711            {
712            c = 0;
713            for (i = 0; i < 4; ++i)
714              {
715              register int cc = *(++ptr);
716    #ifndef EBCDIC  /* ASCII/UTF-8 coding */
717              if (cc >= CHAR_a) cc -= 32;               /* Convert to upper case */
718              c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
719    #else           /* EBCDIC coding */
720              if (cc >= CHAR_a && cc <= CHAR_z) cc += 64;  /* Convert to upper case */
721              c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
722    #endif
723              }
724            }
725          }
726        else
727          *errorcodeptr = ERR37;
728        break;
729    
730      case CHAR_U:      case CHAR_U:
731      *errorcodeptr = ERR37;      /* In JavaScript, \U is an uppercase U letter. */
732        if ((options & PCRE_JAVASCRIPT_COMPAT) == 0) *errorcodeptr = ERR37;
733      break;      break;
734    
735      /* In a character class, \g is just a literal "g". Outside a character      /* In a character class, \g is just a literal "g". Outside a character
# Line 710  else Line 761  else
761    
762      if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)      if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)
763        {        {
764        const uschar *p;        const pcre_uchar *p;
765        for (p = ptr+2; *p != 0 && *p != CHAR_RIGHT_CURLY_BRACKET; p++)        for (p = ptr+2; *p != 0 && *p != CHAR_RIGHT_CURLY_BRACKET; p++)
766          if (*p != CHAR_MINUS && (digitab[*p] & ctype_digit) == 0) break;          if (*p != CHAR_MINUS && (digitab[*p] & ctype_digit) == 0) break;
767        if (*p != 0 && *p != CHAR_RIGHT_CURLY_BRACKET)        if (*p != 0 && *p != CHAR_RIGHT_CURLY_BRACKET)
# Line 828  else Line 879  else
879      treated as a data character. */      treated as a data character. */
880    
881      case CHAR_x:      case CHAR_x:
882        if ((options & PCRE_JAVASCRIPT_COMPAT) != 0)
883          {
884          /* In JavaScript, \x must be followed by two hexadecimal numbers.
885          Otherwise it is a lowercase x letter. */
886          if ((digitab[ptr[1]] & ctype_xdigit) != 0 && (digitab[ptr[2]] & ctype_xdigit) != 0)
887            {
888            c = 0;
889            for (i = 0; i < 2; ++i)
890              {
891              register int cc = *(++ptr);
892    #ifndef EBCDIC  /* ASCII/UTF-8 coding */
893              if (cc >= CHAR_a) cc -= 32;               /* Convert to upper case */
894              c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
895    #else           /* EBCDIC coding */
896              if (cc >= CHAR_a && cc <= CHAR_z) cc += 64;  /* Convert to upper case */
897              c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
898    #endif
899              }
900            }
901          break;
902          }
903    
904      if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)      if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)
905        {        {
906        const uschar *pt = ptr + 2;        const pcre_uchar *pt = ptr + 2;
907        int count = 0;        int count = 0;
908    
909        c = 0;        c = 0;
# Line 961  Returns:         type value from ucp_typ Line 1034  Returns:         type value from ucp_typ
1034  */  */
1035    
1036  static int  static int
1037  get_ucp(const uschar **ptrptr, BOOL *negptr, int *dptr, int *errorcodeptr)  get_ucp(const pcre_uchar **ptrptr, BOOL *negptr, int *dptr, int *errorcodeptr)
1038  {  {
1039  int c, i, bot, top;  int c, i, bot, top;
1040  const uschar *ptr = *ptrptr;  const pcre_uchar *ptr = *ptrptr;
1041  char name[32];  char name[32];
1042    
1043  c = *(++ptr);  c = *(++ptr);
# Line 1053  Returns:         pointer to '}' on succe Line 1126  Returns:         pointer to '}' on succe
1126                   current ptr on error, with errorcodeptr set non-zero                   current ptr on error, with errorcodeptr set non-zero
1127  */  */
1128    
1129  static const uschar *  static const pcre_uchar *
1130  read_repeat_counts(const uschar *p, int *minp, int *maxp, int *errorcodeptr)  read_repeat_counts(const pcre_uchar *p, int *minp, int *maxp, int *errorcodeptr)
1131  {  {
1132  int min = 0;  int min = 0;
1133  int max = -1;  int max = -1;
# Line 1139  Returns:       the number of the named s Line 1212  Returns:       the number of the named s
1212  */  */
1213    
1214  static int  static int
1215  find_parens_sub(uschar **ptrptr, compile_data *cd, const uschar *name, int lorn,  find_parens_sub(pcre_uchar **ptrptr, compile_data *cd, const pcre_uchar *name, int lorn,
1216    BOOL xmode, BOOL utf8, int *count)    BOOL xmode, BOOL utf8, int *count)
1217  {  {
1218  uschar *ptr = *ptrptr;  pcre_uchar *ptr = *ptrptr;
1219  int start_count = *count;  int start_count = *count;
1220  int hwm_count = start_count;  int hwm_count = start_count;
1221  BOOL dup_parens = FALSE;  BOOL dup_parens = FALSE;
# Line 1209  if (ptr[0] == CHAR_LEFT_PARENTHESIS) Line 1282  if (ptr[0] == CHAR_LEFT_PARENTHESIS)
1282          ptr[1] != CHAR_EQUALS_SIGN) || *ptr == CHAR_APOSTROPHE)          ptr[1] != CHAR_EQUALS_SIGN) || *ptr == CHAR_APOSTROPHE)
1283        {        {
1284        int term;        int term;
1285        const uschar *thisname;        const pcre_uchar *thisname;
1286        *count += 1;        *count += 1;
1287        if (name == NULL && *count == lorn) return *count;        if (name == NULL && *count == lorn) return *count;
1288        term = *ptr++;        term = *ptr++;
# Line 1372  Returns:       the number of the found s Line 1445  Returns:       the number of the found s
1445  */  */
1446    
1447  static int  static int
1448  find_parens(compile_data *cd, const uschar *name, int lorn, BOOL xmode,  find_parens(compile_data *cd, const pcre_uchar *name, int lorn, BOOL xmode,
1449    BOOL utf8)    BOOL utf8)
1450  {  {
1451  uschar *ptr = (uschar *)cd->start_pattern;  pcre_uchar *ptr = (pcre_uchar *)cd->start_pattern;
1452  int count = 0;  int count = 0;
1453  int rc;  int rc;
1454    
# Line 1413  Arguments: Line 1486  Arguments:
1486  Returns:       pointer to the first significant opcode  Returns:       pointer to the first significant opcode
1487  */  */
1488    
1489  static const uschar*  static const pcre_uchar*
1490  first_significant_code(const uschar *code, BOOL skipassert)  first_significant_code(const pcre_uchar *code, BOOL skipassert)
1491  {  {
1492  for (;;)  for (;;)
1493    {    {
# Line 1475  Arguments: Line 1548  Arguments:
1548    
1549  Returns:   the fixed length,  Returns:   the fixed length,
1550               or -1 if there is no fixed length,               or -1 if there is no fixed length,
1551               or -2 if \C was encountered               or -2 if \C was encountered (in UTF-8 mode only)
1552               or -3 if an OP_RECURSE item was encountered and atend is FALSE               or -3 if an OP_RECURSE item was encountered and atend is FALSE
1553                 or -4 if an unknown opcode was encountered (internal error)
1554  */  */
1555    
1556  static int  static int
1557  find_fixedlength(uschar *code, BOOL utf8, BOOL atend, compile_data *cd)  find_fixedlength(pcre_uchar *code, BOOL utf8, BOOL atend, compile_data *cd)
1558  {  {
1559  int length = -1;  int length = -1;
1560    
1561  register int branchlength = 0;  register int branchlength = 0;
1562  register uschar *cc = code + 1 + LINK_SIZE;  register pcre_uchar *cc = code + 1 + LINK_SIZE;
1563    
1564  /* Scan along the opcodes for this branch. If we get to the end of the  /* Scan along the opcodes for this branch. If we get to the end of the
1565  branch, check the length against that of the other branches. */  branch, check the length against that of the other branches. */
# Line 1493  branch, check the length against that of Line 1567  branch, check the length against that of
1567  for (;;)  for (;;)
1568    {    {
1569    int d;    int d;
1570    uschar *ce, *cs;    pcre_uchar *ce, *cs;
1571    register int op = *cc;    register int op = *cc;
1572    switch (op)    switch (op)
1573      {      {
1574      /* We only need to continue for OP_CBRA (normal capturing bracket) and      /* We only need to continue for OP_CBRA (normal capturing bracket) and
1575      OP_BRA (normal non-capturing bracket) because the other variants of these      OP_BRA (normal non-capturing bracket) because the other variants of these
1576      opcodes are all concerned with unlimited repeated groups, which of course      opcodes are all concerned with unlimited repeated groups, which of course
1577      are not of fixed length. They will cause a -1 response from the default      are not of fixed length. */
     case of this switch. */  
1578    
1579      case OP_CBRA:      case OP_CBRA:
1580      case OP_BRA:      case OP_BRA:
1581      case OP_ONCE:      case OP_ONCE:
1582        case OP_ONCE_NC:
1583      case OP_COND:      case OP_COND:
1584      d = find_fixedlength(cc + ((op == OP_CBRA)? 2:0), utf8, atend, cd);      d = find_fixedlength(cc + ((op == OP_CBRA)? 2:0), utf8, atend, cd);
1585      if (d < 0) return d;      if (d < 0) return d;
# Line 1514  for (;;) Line 1588  for (;;)
1588      cc += 1 + LINK_SIZE;      cc += 1 + LINK_SIZE;
1589      break;      break;
1590    
1591      /* Reached end of a branch; if it's a ket it is the end of a nested      /* Reached end of a branch; if it's a ket it is the end of a nested call.
1592      call. If it's ALT it is an alternation in a nested call. If it is      If it's ALT it is an alternation in a nested call. An ACCEPT is effectively
1593      END it's the end of the outer call. All can be handled by the same code.      an ALT. If it is END it's the end of the outer call. All can be handled by
1594      Note that we must not include the OP_KETRxxx opcodes here, because they      the same code. Note that we must not include the OP_KETRxxx opcodes here,
1595      all imply an unlimited repeat. */      because they all imply an unlimited repeat. */
1596    
1597      case OP_ALT:      case OP_ALT:
1598      case OP_KET:      case OP_KET:
1599      case OP_END:      case OP_END:
1600        case OP_ACCEPT:
1601        case OP_ASSERT_ACCEPT:
1602      if (length < 0) length = branchlength;      if (length < 0) length = branchlength;
1603        else if (length != branchlength) return -1;        else if (length != branchlength) return -1;
1604      if (*cc != OP_ALT) return length;      if (*cc != OP_ALT) return length;
# Line 1536  for (;;) Line 1612  for (;;)
1612    
1613      case OP_RECURSE:      case OP_RECURSE:
1614      if (!atend) return -3;      if (!atend) return -3;
1615      cs = ce = (uschar *)cd->start_code + GET(cc, 1);  /* Start subpattern */      cs = ce = (pcre_uchar *)cd->start_code + GET(cc, 1);  /* Start subpattern */
1616      do ce += GET(ce, 1); while (*ce == OP_ALT);       /* End subpattern */      do ce += GET(ce, 1); while (*ce == OP_ALT);           /* End subpattern */
1617      if (cc > cs && cc < ce) return -1;                /* Recursion */      if (cc > cs && cc < ce) return -1;                    /* Recursion */
1618      d = find_fixedlength(cs + 2, utf8, atend, cd);      d = find_fixedlength(cs + 2, utf8, atend, cd);
1619      if (d < 0) return d;      if (d < 0) return d;
1620      branchlength += d;      branchlength += d;
# Line 1556  for (;;) Line 1632  for (;;)
1632    
1633      /* Skip over things that don't match chars */      /* Skip over things that don't match chars */
1634    
1635      case OP_REVERSE:      case OP_MARK:
1636      case OP_CREF:      case OP_PRUNE_ARG:
1637      case OP_NCREF:      case OP_SKIP_ARG:
1638      case OP_RREF:      case OP_THEN_ARG:
1639      case OP_NRREF:      cc += cc[1] + _pcre_OP_lengths[*cc];
1640      case OP_DEF:      break;
1641    
1642      case OP_CALLOUT:      case OP_CALLOUT:
     case OP_SOD:  
     case OP_SOM:  
     case OP_SET_SOM:  
     case OP_EOD:  
     case OP_EODN:  
1643      case OP_CIRC:      case OP_CIRC:
1644      case OP_CIRCM:      case OP_CIRCM:
1645        case OP_CLOSE:
1646        case OP_COMMIT:
1647        case OP_CREF:
1648        case OP_DEF:
1649      case OP_DOLL:      case OP_DOLL:
1650      case OP_DOLLM:      case OP_DOLLM:
1651        case OP_EOD:
1652        case OP_EODN:
1653        case OP_FAIL:
1654        case OP_NCREF:
1655        case OP_NRREF:
1656      case OP_NOT_WORD_BOUNDARY:      case OP_NOT_WORD_BOUNDARY:
1657        case OP_PRUNE:
1658        case OP_REVERSE:
1659        case OP_RREF:
1660        case OP_SET_SOM:
1661        case OP_SKIP:
1662        case OP_SOD:
1663        case OP_SOM:
1664        case OP_THEN:
1665      case OP_WORD_BOUNDARY:      case OP_WORD_BOUNDARY:
1666      cc += _pcre_OP_lengths[*cc];      cc += _pcre_OP_lengths[*cc];
1667      break;      break;
# Line 1594  for (;;) Line 1683  for (;;)
1683      need to skip over a multibyte character in UTF8 mode.  */      need to skip over a multibyte character in UTF8 mode.  */
1684    
1685      case OP_EXACT:      case OP_EXACT:
1686        case OP_EXACTI:
1687        case OP_NOTEXACT:
1688        case OP_NOTEXACTI:
1689      branchlength += GET2(cc,1);      branchlength += GET2(cc,1);
1690      cc += 4;      cc += 4;
1691  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1614  for (;;) Line 1706  for (;;)
1706      cc += 2;      cc += 2;
1707      /* Fall through */      /* Fall through */
1708    
1709        case OP_HSPACE:
1710        case OP_VSPACE:
1711        case OP_NOT_HSPACE:
1712        case OP_NOT_VSPACE:
1713      case OP_NOT_DIGIT:      case OP_NOT_DIGIT:
1714      case OP_DIGIT:      case OP_DIGIT:
1715      case OP_NOT_WHITESPACE:      case OP_NOT_WHITESPACE:
# Line 1626  for (;;) Line 1722  for (;;)
1722      cc++;      cc++;
1723      break;      break;
1724    
1725      /* The single-byte matcher isn't allowed */      /* The single-byte matcher isn't allowed. This only happens in UTF-8 mode;
1726        otherwise \C is coded as OP_ALLANY. */
1727    
1728      case OP_ANYBYTE:      case OP_ANYBYTE:
1729      return -2;      return -2;
# Line 1645  for (;;) Line 1742  for (;;)
1742    
1743      switch (*cc)      switch (*cc)
1744        {        {
1745          case OP_CRPLUS:
1746          case OP_CRMINPLUS:
1747        case OP_CRSTAR:        case OP_CRSTAR:
1748        case OP_CRMINSTAR:        case OP_CRMINSTAR:
1749        case OP_CRQUERY:        case OP_CRQUERY:
# Line 1665  for (;;) Line 1764  for (;;)
1764    
1765      /* Anything else is variable length */      /* Anything else is variable length */
1766    
1767      default:      case OP_ANYNL:
1768        case OP_BRAMINZERO:
1769        case OP_BRAPOS:
1770        case OP_BRAPOSZERO:
1771        case OP_BRAZERO:
1772        case OP_CBRAPOS:
1773        case OP_EXTUNI:
1774        case OP_KETRMAX:
1775        case OP_KETRMIN:
1776        case OP_KETRPOS:
1777        case OP_MINPLUS:
1778        case OP_MINPLUSI:
1779        case OP_MINQUERY:
1780        case OP_MINQUERYI:
1781        case OP_MINSTAR:
1782        case OP_MINSTARI:
1783        case OP_MINUPTO:
1784        case OP_MINUPTOI:
1785        case OP_NOTMINPLUS:
1786        case OP_NOTMINPLUSI:
1787        case OP_NOTMINQUERY:
1788        case OP_NOTMINQUERYI:
1789        case OP_NOTMINSTAR:
1790        case OP_NOTMINSTARI:
1791        case OP_NOTMINUPTO:
1792        case OP_NOTMINUPTOI:
1793        case OP_NOTPLUS:
1794        case OP_NOTPLUSI:
1795        case OP_NOTPOSPLUS:
1796        case OP_NOTPOSPLUSI:
1797        case OP_NOTPOSQUERY:
1798        case OP_NOTPOSQUERYI:
1799        case OP_NOTPOSSTAR:
1800        case OP_NOTPOSSTARI:
1801        case OP_NOTPOSUPTO:
1802        case OP_NOTPOSUPTOI:
1803        case OP_NOTQUERY:
1804        case OP_NOTQUERYI:
1805        case OP_NOTSTAR:
1806        case OP_NOTSTARI:
1807        case OP_NOTUPTO:
1808        case OP_NOTUPTOI:
1809        case OP_PLUS:
1810        case OP_PLUSI:
1811        case OP_POSPLUS:
1812        case OP_POSPLUSI:
1813        case OP_POSQUERY:
1814        case OP_POSQUERYI:
1815        case OP_POSSTAR:
1816        case OP_POSSTARI:
1817        case OP_POSUPTO:
1818        case OP_POSUPTOI:
1819        case OP_QUERY:
1820        case OP_QUERYI:
1821        case OP_REF:
1822        case OP_REFI:
1823        case OP_SBRA:
1824        case OP_SBRAPOS:
1825        case OP_SCBRA:
1826        case OP_SCBRAPOS:
1827        case OP_SCOND:
1828        case OP_SKIPZERO:
1829        case OP_STAR:
1830        case OP_STARI:
1831        case OP_TYPEMINPLUS:
1832        case OP_TYPEMINQUERY:
1833        case OP_TYPEMINSTAR:
1834        case OP_TYPEMINUPTO:
1835        case OP_TYPEPLUS:
1836        case OP_TYPEPOSPLUS:
1837        case OP_TYPEPOSQUERY:
1838        case OP_TYPEPOSSTAR:
1839        case OP_TYPEPOSUPTO:
1840        case OP_TYPEQUERY:
1841        case OP_TYPESTAR:
1842        case OP_TYPEUPTO:
1843        case OP_UPTO:
1844        case OP_UPTOI:
1845      return -1;      return -1;
1846    
1847        /* Catch unrecognized opcodes so that when new ones are added they
1848        are not forgotten, as has happened in the past. */
1849    
1850        default:
1851        return -4;
1852      }      }
1853    }    }
1854  /* Control never gets here */  /* Control never gets here */
# Line 1693  Arguments: Line 1875  Arguments:
1875  Returns:      pointer to the opcode for the bracket, or NULL if not found  Returns:      pointer to the opcode for the bracket, or NULL if not found
1876  */  */
1877    
1878  const uschar *  const pcre_uchar *
1879  _pcre_find_bracket(const uschar *code, BOOL utf8, int number)  _pcre_find_bracket(const pcre_uchar *code, BOOL utf8, int number)
1880  {  {
1881  for (;;)  for (;;)
1882    {    {
# Line 1712  for (;;) Line 1894  for (;;)
1894    
1895    else if (c == OP_REVERSE)    else if (c == OP_REVERSE)
1896      {      {
1897      if (number < 0) return (uschar *)code;      if (number < 0) return (pcre_uchar *)code;
1898      code += _pcre_OP_lengths[c];      code += _pcre_OP_lengths[c];
1899      }      }
1900    
# Line 1722  for (;;) Line 1904  for (;;)
1904             c == OP_CBRAPOS || c == OP_SCBRAPOS)             c == OP_CBRAPOS || c == OP_SCBRAPOS)
1905      {      {
1906      int n = GET2(code, 1+LINK_SIZE);      int n = GET2(code, 1+LINK_SIZE);
1907      if (n == number) return (uschar *)code;      if (n == number) return (pcre_uchar *)code;
1908      code += _pcre_OP_lengths[c];      code += _pcre_OP_lengths[c];
1909      }      }
1910    
# Line 1830  Arguments: Line 2012  Arguments:
2012  Returns:      pointer to the opcode for OP_RECURSE, or NULL if not found  Returns:      pointer to the opcode for OP_RECURSE, or NULL if not found
2013  */  */
2014    
2015  static const uschar *  static const pcre_uchar *
2016  find_recurse(const uschar *code, BOOL utf8)  find_recurse(const pcre_uchar *code, BOOL utf8)
2017  {  {
2018  for (;;)  for (;;)
2019    {    {
# Line 1957  Returns:      TRUE if what is matched co Line 2139  Returns:      TRUE if what is matched co
2139  */  */
2140    
2141  static BOOL  static BOOL
2142  could_be_empty_branch(const uschar *code, const uschar *endcode, BOOL utf8,  could_be_empty_branch(const pcre_uchar *code, const pcre_uchar *endcode,
2143    compile_data *cd)    BOOL utf8, compile_data *cd)
2144  {  {
2145  register int c;  register int c;
2146  for (code = first_significant_code(code + _pcre_OP_lengths[*code], TRUE);  for (code = first_significant_code(code + _pcre_OP_lengths[*code], TRUE);
2147       code < endcode;       code < endcode;
2148       code = first_significant_code(code + _pcre_OP_lengths[c], TRUE))       code = first_significant_code(code + _pcre_OP_lengths[c], TRUE))
2149    {    {
2150    const uschar *ccode;    const pcre_uchar *ccode;
2151    
2152    c = *code;    c = *code;
2153    
# Line 1988  for (code = first_significant_code(code Line 2170  for (code = first_significant_code(code
2170    
2171    if (c == OP_RECURSE)    if (c == OP_RECURSE)
2172      {      {
2173      const uschar *scode;      const pcre_uchar *scode;
2174      BOOL empty_branch;      BOOL empty_branch;
2175    
2176      /* Test for forward reference */      /* Test for forward reference */
# Line 2045  for (code = first_significant_code(code Line 2227  for (code = first_significant_code(code
2227    
2228    if (c == OP_BRA  || c == OP_BRAPOS ||    if (c == OP_BRA  || c == OP_BRAPOS ||
2229        c == OP_CBRA || c == OP_CBRAPOS ||        c == OP_CBRA || c == OP_CBRAPOS ||
2230        c == OP_ONCE || c == OP_COND)        c == OP_ONCE || c == OP_ONCE_NC ||
2231          c == OP_COND)
2232      {      {
2233      BOOL empty_branch;      BOOL empty_branch;
2234      if (GET(code, 1) == 0) return TRUE;    /* Hit unclosed bracket */      if (GET(code, 1) == 0) return TRUE;    /* Hit unclosed bracket */
# Line 2254  Returns:      TRUE if what is matched co Line 2437  Returns:      TRUE if what is matched co
2437  */  */
2438    
2439  static BOOL  static BOOL
2440  could_be_empty(const uschar *code, const uschar *endcode, branch_chain *bcptr,  could_be_empty(const pcre_uchar *code, const pcre_uchar *endcode,
2441    BOOL utf8, compile_data *cd)    branch_chain *bcptr, BOOL utf8, compile_data *cd)
2442  {  {
2443  while (bcptr != NULL && bcptr->current_branch >= code)  while (bcptr != NULL && bcptr->current_branch >= code)
2444    {    {
# Line 2311  Returns:   TRUE or FALSE Line 2494  Returns:   TRUE or FALSE
2494  */  */
2495    
2496  static BOOL  static BOOL
2497  check_posix_syntax(const uschar *ptr, const uschar **endptr)  check_posix_syntax(const pcre_uchar *ptr, const pcre_uchar **endptr)
2498  {  {
2499  int terminator;          /* Don't combine these lines; the Solaris cc */  int terminator;          /* Don't combine these lines; the Solaris cc */
2500  terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */  terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */
# Line 2355  Returns:     a value representing the na Line 2538  Returns:     a value representing the na
2538  */  */
2539    
2540  static int  static int
2541  check_posix_name(const uschar *ptr, int len)  check_posix_name(const pcre_uchar *ptr, int len)
2542  {  {
2543  const char *pn = posix_names;  const char *pn = posix_names;
2544  register int yield = 0;  register int yield = 0;
# Line 2402  Returns:     nothing Line 2585  Returns:     nothing
2585  */  */
2586    
2587  static void  static void
2588  adjust_recurse(uschar *group, int adjust, BOOL utf8, compile_data *cd,  adjust_recurse(pcre_uchar *group, int adjust, BOOL utf8, compile_data *cd,
2589    uschar *save_hwm)    pcre_uchar *save_hwm)
2590  {  {
2591  uschar *ptr = group;  pcre_uchar *ptr = group;
2592    
2593  while ((ptr = (uschar *)find_recurse(ptr, utf8)) != NULL)  while ((ptr = (pcre_uchar *)find_recurse(ptr, utf8)) != NULL)
2594    {    {
2595    int offset;    int offset;
2596    uschar *hc;    pcre_uchar *hc;
2597    
2598    /* See if this recursion is on the forward reference list. If so, adjust the    /* See if this recursion is on the forward reference list. If so, adjust the
2599    reference. */    reference. */
# Line 2455  Arguments: Line 2638  Arguments:
2638  Returns:         new code pointer  Returns:         new code pointer
2639  */  */
2640    
2641  static uschar *  static pcre_uchar *
2642  auto_callout(uschar *code, const uschar *ptr, compile_data *cd)  auto_callout(pcre_uchar *code, const pcre_uchar *ptr, compile_data *cd)
2643  {  {
2644  *code++ = OP_CALLOUT;  *code++ = OP_CALLOUT;
2645  *code++ = 255;  *code++ = 255;
2646  PUT(code, 0, (int)(ptr - cd->start_pattern));  /* Pattern offset */  PUT(code, 0, (int)(ptr - cd->start_pattern));  /* Pattern offset */
2647  PUT(code, LINK_SIZE, 0);                       /* Default length */  PUT(code, LINK_SIZE, 0);                       /* Default length */
2648  return code + 2*LINK_SIZE;  return code + 2 * LINK_SIZE;
2649  }  }
2650    
2651    
# Line 2484  Returns:             nothing Line 2667  Returns:             nothing
2667  */  */
2668    
2669  static void  static void
2670  complete_callout(uschar *previous_callout, const uschar *ptr, compile_data *cd)  complete_callout(pcre_uchar *previous_callout, const pcre_uchar *ptr, compile_data *cd)
2671  {  {
2672  int length = (int)(ptr - cd->start_pattern - GET(previous_callout, 2));  int length = (int)(ptr - cd->start_pattern - GET(previous_callout, 2));
2673  PUT(previous_callout, 2 + LINK_SIZE, length);  PUT(previous_callout, 2 + LINK_SIZE, length);
# Line 2622  Returns:        TRUE if possessifying is Line 2805  Returns:        TRUE if possessifying is
2805  */  */
2806    
2807  static BOOL  static BOOL
2808  check_auto_possessive(const uschar *previous, BOOL utf8, const uschar *ptr,  check_auto_possessive(const pcre_uchar *previous, BOOL utf8,
2809    int options, compile_data *cd)    const pcre_uchar *ptr, int options, compile_data *cd)
2810  {  {
2811  int c, next;  int c, next;
2812  int op_code = *previous++;  int op_code = *previous++;
# Line 3051  Returns:         TRUE on success Line 3234  Returns:         TRUE on success
3234  */  */
3235    
3236  static BOOL  static BOOL
3237  compile_branch(int *optionsptr, uschar **codeptr, const uschar **ptrptr,  compile_branch(int *optionsptr, pcre_uchar **codeptr,
3238    int *errorcodeptr, int *firstbyteptr, int *reqbyteptr, branch_chain *bcptr,    const pcre_uchar **ptrptr, int *errorcodeptr, int *firstbyteptr,
3239    int cond_depth, compile_data *cd, int *lengthptr)    int *reqbyteptr, branch_chain *bcptr, int cond_depth, compile_data *cd,
3240      int *lengthptr)
3241  {  {
3242  int repeat_type, op_type;  int repeat_type, op_type;
3243  int repeat_min = 0, repeat_max = 0;      /* To please picky compilers */  int repeat_min = 0, repeat_max = 0;      /* To please picky compilers */
# Line 3066  int options = *optionsptr; Line 3250  int options = *optionsptr;
3250  int after_manual_callout = 0;  int after_manual_callout = 0;
3251  int length_prevgroup = 0;  int length_prevgroup = 0;
3252  register int c;  register int c;
3253  register uschar *code = *codeptr;  register pcre_uchar *code = *codeptr;
3254  uschar *last_code = code;  pcre_uchar *last_code = code;
3255  uschar *orig_code = code;  pcre_uchar *orig_code = code;
3256  uschar *tempcode;  pcre_uchar *tempcode;
3257  BOOL inescq = FALSE;  BOOL inescq = FALSE;
3258  BOOL groupsetfirstbyte = FALSE;  BOOL groupsetfirstbyte = FALSE;
3259  const uschar *ptr = *ptrptr;  const pcre_uchar *ptr = *ptrptr;
3260  const uschar *tempptr;  const pcre_uchar *tempptr;
3261  const uschar *nestptr = NULL;  const pcre_uchar *nestptr = NULL;
3262  uschar *previous = NULL;  pcre_uchar *previous = NULL;
3263  uschar *previous_callout = NULL;  pcre_uchar *previous_callout = NULL;
3264  uschar *save_hwm = NULL;  pcre_uchar *save_hwm = NULL;
3265  uschar classbits[32];  pcre_uchar classbits[32];
3266    
3267  /* We can fish out the UTF-8 setting once and for all into a BOOL, but we  /* We can fish out the UTF-8 setting once and for all into a BOOL, but we
3268  must not do this for other options (e.g. PCRE_EXTENDED) because they may change  must not do this for other options (e.g. PCRE_EXTENDED) because they may change
# Line 3087  dynamically as we process the pattern. * Line 3271  dynamically as we process the pattern. *
3271  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
3272  BOOL class_utf8;  BOOL class_utf8;
3273  BOOL utf8 = (options & PCRE_UTF8) != 0;  BOOL utf8 = (options & PCRE_UTF8) != 0;
3274  uschar *class_utf8data;  pcre_uint8 *class_utf8data;
3275  uschar *class_utf8data_base;  pcre_uint8 *class_utf8data_base;
3276  uschar utf8_char[6];  pcre_uint8 utf8_char[6];
3277  #else  #else
3278  BOOL utf8 = FALSE;  BOOL utf8 = FALSE;
3279  #endif  #endif
# Line 3142  for (;; ptr++) Line 3326  for (;; ptr++)
3326    int subfirstbyte;    int subfirstbyte;
3327    int terminator;    int terminator;
3328    int mclength;    int mclength;
3329    uschar mcbuffer[8];    int tempbracount;
3330      pcre_uchar mcbuffer[8];
3331    
3332    /* Get next byte in the pattern */    /* Get next byte in the pattern */
3333    
# Line 3189  for (;; ptr++) Line 3374  for (;; ptr++)
3374        }        }
3375    
3376      *lengthptr += (int)(code - last_code);      *lengthptr += (int)(code - last_code);
3377      DPRINTF(("length=%d added %d c=%c\n", *lengthptr, code - last_code, c));      DPRINTF(("length=%d added %d c=%c\n", *lengthptr, (int)(code - last_code),
3378          c));
3379    
3380      /* If "previous" is set and it is not at the start of the work space, move      /* If "previous" is set and it is not at the start of the work space, move
3381      it back to there, in order to avoid filling up the work space. Otherwise,      it back to there, in order to avoid filling up the work space. Otherwise,
# Line 3440  for (;; ptr++) Line 3626  for (;; ptr++)
3626      than 256), because in that case the compiled code doesn't use the bit map.      than 256), because in that case the compiled code doesn't use the bit map.
3627      */      */
3628    
3629      memset(classbits, 0, 32 * sizeof(uschar));      memset(classbits, 0, 32 * sizeof(pcre_uint8));
3630    
3631  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
3632      class_utf8 = FALSE;                       /* No chars >= 256 */      class_utf8 = FALSE;                       /* No chars >= 256 */
# Line 3454  for (;; ptr++) Line 3640  for (;; ptr++)
3640    
3641      if (c != 0) do      if (c != 0) do
3642        {        {
3643        const uschar *oldptr;        const pcre_uchar *oldptr;
3644    
3645  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
3646        if (utf8 && c > 127)        if (utf8 && c > 127)
# Line 3500  for (;; ptr++) Line 3686  for (;; ptr++)
3686          {          {
3687          BOOL local_negate = FALSE;          BOOL local_negate = FALSE;
3688          int posix_class, taboffset, tabopt;          int posix_class, taboffset, tabopt;
3689          register const uschar *cbits = cd->cbits;          register const pcre_uint8 *cbits = cd->cbits;
3690          uschar pbits[32];          pcre_uint8 pbits[32];
3691    
3692          if (ptr[1] != CHAR_COLON)          if (ptr[1] != CHAR_COLON)
3693            {            {
# Line 3556  for (;; ptr++) Line 3742  for (;; ptr++)
3742          /* Copy in the first table (always present) */          /* Copy in the first table (always present) */
3743    
3744          memcpy(pbits, cbits + posix_class_maps[posix_class],          memcpy(pbits, cbits + posix_class_maps[posix_class],
3745            32 * sizeof(uschar));            32 * sizeof(pcre_uint8));
3746    
3747          /* If there is a second table, add or remove it as required. */          /* If there is a second table, add or remove it as required. */
3748    
# Line 3618  for (;; ptr++) Line 3804  for (;; ptr++)
3804    
3805          if (c < 0)          if (c < 0)
3806            {            {
3807            register const uschar *cbits = cd->cbits;            register const pcre_uint8 *cbits = cd->cbits;
3808            class_charcount += 2;     /* Greater than 1 is what matters */            class_charcount += 2;     /* Greater than 1 is what matters */
3809    
3810            switch (-c)            switch (-c)
# Line 4261  for (;; ptr++) Line 4447  for (;; ptr++)
4447      past, but it no longer happens for non-repeated recursions. In fact, the      past, but it no longer happens for non-repeated recursions. In fact, the
4448      repeated ones could be re-implemented independently so as not to need this,      repeated ones could be re-implemented independently so as not to need this,
4449      but for the moment we rely on the code for repeating groups. */      but for the moment we rely on the code for repeating groups. */
4450    
4451      if (*previous == OP_RECURSE)      if (*previous == OP_RECURSE)
4452        {        {
4453        memmove(previous + 1 + LINK_SIZE, previous, 1 + LINK_SIZE);        memmove(previous + 1 + LINK_SIZE, previous, 1 + LINK_SIZE);
# Line 4303  for (;; ptr++) Line 4489  for (;; ptr++)
4489  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
4490        if (utf8 && (code[-1] & 0x80) != 0)        if (utf8 && (code[-1] & 0x80) != 0)
4491          {          {
4492          uschar *lastchar = code - 1;          pcre_uchar *lastchar = code - 1;
4493          while((*lastchar & 0xc0) == 0x80) lastchar--;          while((*lastchar & 0xc0) == 0x80) lastchar--;
4494          c = code - lastchar;            /* Length of UTF-8 character */          c = code - lastchar;            /* Length of UTF-8 character */
4495          memcpy(utf8_char, lastchar, c); /* Save the char */          memcpy(utf8_char, lastchar, c); /* Save the char */
# Line 4365  for (;; ptr++) Line 4551  for (;; ptr++)
4551    
4552      else if (*previous < OP_EODN)      else if (*previous < OP_EODN)
4553        {        {
4554        uschar *oldcode;        pcre_uchar *oldcode;
4555        int prop_type, prop_value;        int prop_type, prop_value;
4556        op_type = OP_TYPESTAR - OP_STAR;  /* Use type opcodes */        op_type = OP_TYPESTAR - OP_STAR;  /* Use type opcodes */
4557        c = *previous;        c = *previous;
# Line 4587  for (;; ptr++) Line 4773  for (;; ptr++)
4773        {        {
4774        register int i;        register int i;
4775        int len = (int)(code - previous);        int len = (int)(code - previous);
4776        uschar *bralink = NULL;        pcre_uchar *bralink = NULL;
4777        uschar *brazeroptr = NULL;        pcre_uchar *brazeroptr = NULL;
4778    
4779        /* Repeating a DEFINE group is pointless, but Perl allows the syntax, so        /* Repeating a DEFINE group is pointless, but Perl allows the syntax, so
4780        we just ignore the repeat. */        we just ignore the repeat. */
# Line 4719  for (;; ptr++) Line 4905  for (;; ptr++)
4905              if (groupsetfirstbyte && reqbyte < 0) reqbyte = firstbyte;              if (groupsetfirstbyte && reqbyte < 0) reqbyte = firstbyte;
4906              for (i = 1; i < repeat_min; i++)              for (i = 1; i < repeat_min; i++)
4907                {                {
4908                uschar *hc;                pcre_uchar *hc;
4909                uschar *this_hwm = cd->hwm;                pcre_uchar *this_hwm = cd->hwm;
4910                memcpy(code, previous, len);                memcpy(code, previous, len);
4911                for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE)                for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE)
4912                  {                  {
# Line 4771  for (;; ptr++) Line 4957  for (;; ptr++)
4957    
4958          else for (i = repeat_max - 1; i >= 0; i--)          else for (i = repeat_max - 1; i >= 0; i--)
4959            {            {
4960            uschar *hc;            pcre_uchar *hc;
4961            uschar *this_hwm = cd->hwm;            pcre_uchar *this_hwm = cd->hwm;
4962    
4963            *code++ = OP_BRAZERO + repeat_type;            *code++ = OP_BRAZERO + repeat_type;
4964    
# Line 4805  for (;; ptr++) Line 4991  for (;; ptr++)
4991            {            {
4992            int oldlinkoffset;            int oldlinkoffset;
4993            int offset = (int)(code - bralink + 1);            int offset = (int)(code - bralink + 1);
4994            uschar *bra = code - offset;            pcre_uchar *bra = code - offset;
4995            oldlinkoffset = GET(bra, 1);            oldlinkoffset = GET(bra, 1);
4996            bralink = (oldlinkoffset == 0)? NULL : bralink - oldlinkoffset;            bralink = (oldlinkoffset == 0)? NULL : bralink - oldlinkoffset;
4997            *code++ = OP_KET;            *code++ = OP_KET;
# Line 4819  for (;; ptr++) Line 5005  for (;; ptr++)
5005        ONCE brackets can be converted into non-capturing brackets, as the        ONCE brackets can be converted into non-capturing brackets, as the
5006        behaviour of (?:xx)++ is the same as (?>xx)++ and this saves having to        behaviour of (?:xx)++ is the same as (?>xx)++ and this saves having to
5007        deal with possessive ONCEs specially.        deal with possessive ONCEs specially.
5008    
5009        Otherwise, if the quantifier was possessive, we convert the BRA code to        Otherwise, when we are doing the actual compile phase, check to see
5010        the POS form, and the KET code to KETRPOS. (It turns out to be convenient        whether this group is one that could match an empty string. If so,
5011        at runtime to detect this kind of subpattern at both the start and at the        convert the initial operator to the S form (e.g. OP_BRA -> OP_SBRA) so
5012        end.) The use of special opcodes makes it possible to reduce greatly the        that runtime checking can be done. [This check is also applied to ONCE
5013        stack usage in pcre_exec(). If the group is preceded by OP_BRAZERO,        groups at runtime, but in a different way.]
5014        convert this to OP_BRAPOSZERO. Then cancel the possessive flag so that  
5015        the default action below, of wrapping everything inside atomic brackets,        Then, if the quantifier was possessive and the bracket is not a
5016        does not happen.        conditional, we convert the BRA code to the POS form, and the KET code to
5017          KETRPOS. (It turns out to be convenient at runtime to detect this kind of
5018        Then, when we are doing the actual compile phase, check to see whether        subpattern at both the start and at the end.) The use of special opcodes
5019        this group is one that could match an empty string. If so, convert the        makes it possible to reduce greatly the stack usage in pcre_exec(). If
5020        initial operator to the S form (e.g. OP_BRA -> OP_SBRA) so that runtime        the group is preceded by OP_BRAZERO, convert this to OP_BRAPOSZERO.
5021        checking can be done. [This check is also applied to ONCE groups at  
5022        runtime, but in a different way.] */        Then, if the minimum number of matches is 1 or 0, cancel the possessive
5023          flag so that the default action below, of wrapping everything inside
5024          atomic brackets, does not happen. When the minimum is greater than 1,
5025          there will be earlier copies of the group, and so we still have to wrap
5026          the whole thing. */
5027    
5028        else        else
5029          {          {
5030          uschar *ketcode = code - 1 - LINK_SIZE;          pcre_uchar *ketcode = code - 1 - LINK_SIZE;
5031          uschar *bracode = ketcode - GET(ketcode, 1);          pcre_uchar *bracode = ketcode - GET(ketcode, 1);
5032    
5033          if (*bracode == OP_ONCE && possessive_quantifier) *bracode = OP_BRA;          /* Convert possessive ONCE brackets to non-capturing */
5034          if (*bracode == OP_ONCE)  
5035            if ((*bracode == OP_ONCE || *bracode == OP_ONCE_NC) &&
5036                possessive_quantifier) *bracode = OP_BRA;
5037    
5038            /* For non-possessive ONCE brackets, all we need to do is to
5039            set the KET. */
5040    
5041            if (*bracode == OP_ONCE || *bracode == OP_ONCE_NC)
5042            *ketcode = OP_KETRMAX + repeat_type;            *ketcode = OP_KETRMAX + repeat_type;
5043    
5044            /* Handle non-ONCE brackets and possessive ONCEs (which have been
5045            converted to non-capturing above). */
5046    
5047          else          else
5048            {            {
5049            if (possessive_quantifier)            /* In the compile phase, check for empty string matching. */
5050              {  
             *bracode += 1;                   /* Switch to xxxPOS opcodes */  
             *ketcode = OP_KETRPOS;  
             if (brazeroptr != NULL) *brazeroptr = OP_BRAPOSZERO;  
             possessive_quantifier = FALSE;  
             }  
           else *ketcode = OP_KETRMAX + repeat_type;  
   
5051            if (lengthptr == NULL)            if (lengthptr == NULL)
5052              {              {
5053              uschar *scode = bracode;              pcre_uchar *scode = bracode;
5054              do              do
5055                {                {
5056                if (could_be_empty_branch(scode, ketcode, utf8, cd))                if (could_be_empty_branch(scode, ketcode, utf8, cd))
# Line 4868  for (;; ptr++) Line 5062  for (;; ptr++)
5062                }                }
5063              while (*scode == OP_ALT);              while (*scode == OP_ALT);
5064              }              }
5065    
5066              /* Handle possessive quantifiers. */
5067    
5068              if (possessive_quantifier)
5069                {
5070                /* For COND brackets, we wrap the whole thing in a possessively
5071                repeated non-capturing bracket, because we have not invented POS
5072                versions of the COND opcodes. Because we are moving code along, we
5073                must ensure that any pending recursive references are updated. */
5074    
5075                if (*bracode == OP_COND || *bracode == OP_SCOND)
5076                  {
5077                  int nlen = (int)(code - bracode);
5078                  *code = OP_END;
5079                  adjust_recurse(bracode, 1 + LINK_SIZE, utf8, cd, save_hwm);
5080                  memmove(bracode + 1+LINK_SIZE, bracode, nlen);
5081                  code += 1 + LINK_SIZE;
5082                  nlen += 1 + LINK_SIZE;
5083                  *bracode = OP_BRAPOS;
5084                  *code++ = OP_KETRPOS;
5085                  PUTINC(code, 0, nlen);
5086                  PUT(bracode, 1, nlen);
5087                  }
5088    
5089                /* For non-COND brackets, we modify the BRA code and use KETRPOS. */
5090    
5091                else
5092                  {
5093                  *bracode += 1;              /* Switch to xxxPOS opcodes */
5094                  *ketcode = OP_KETRPOS;
5095                  }
5096    
5097                /* If the minimum is zero, mark it as possessive, then unset the
5098                possessive flag when the minimum is 0 or 1. */
5099    
5100                if (brazeroptr != NULL) *brazeroptr = OP_BRAPOSZERO;
5101                if (repeat_min < 2) possessive_quantifier = FALSE;
5102                }
5103    
5104              /* Non-possessive quantifier */
5105    
5106              else *ketcode = OP_KETRMAX + repeat_type;
5107            }            }
5108          }          }
5109        }        }
# Line 4894  for (;; ptr++) Line 5130  for (;; ptr++)
5130      notation is just syntactic sugar, taken from Sun's Java package, but the      notation is just syntactic sugar, taken from Sun's Java package, but the
5131      special opcodes can optimize it.      special opcodes can optimize it.
5132    
5133      Possessively repeated subpatterns have already been handled in the code      Some (but not all) possessively repeated subpatterns have already been
5134      just above, so possessive_quantifier is always FALSE for them at this      completely handled in the code just above. For them, possessive_quantifier
5135      stage.      is always FALSE at this stage.
5136    
5137      Note that the repeated item starts at tempcode, not at previous, which      Note that the repeated item starts at tempcode, not at previous, which
5138      might be the first part of a string whose (former) last char we repeated.      might be the first part of a string whose (former) last char we repeated.
# Line 4997  for (;; ptr++) Line 5233  for (;; ptr++)
5233        int i, namelen;        int i, namelen;
5234        int arglen = 0;        int arglen = 0;
5235        const char *vn = verbnames;        const char *vn = verbnames;
5236        const uschar *name = ptr + 1;        const pcre_uchar *name = ptr + 1;
5237        const uschar *arg = NULL;        const pcre_uchar *arg = NULL;
5238        previous = NULL;        previous = NULL;
5239        while ((cd->ctypes[*++ptr] & ctype_letter) != 0) {};        while ((cd->ctypes[*++ptr] & ctype_letter) != 0) {};
5240        namelen = (int)(ptr - name);        namelen = (int)(ptr - name);
# Line 5096  for (;; ptr++) Line 5332  for (;; ptr++)
5332        {        {
5333        int i, set, unset, namelen;        int i, set, unset, namelen;
5334        int *optset;        int *optset;
5335        const uschar *name;        const pcre_uchar *name;
5336        uschar *slot;        pcre_uchar *slot;
5337    
5338        switch (*(++ptr))        switch (*(++ptr))
5339          {          {
# Line 5386  for (;; ptr++) Line 5622  for (;; ptr++)
5622    
5623          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
5624          case CHAR_C:                 /* Callout - may be followed by digits; */          case CHAR_C:                 /* Callout - may be followed by digits; */
5625          previous_callout = code;  /* Save for later completion */          previous_callout = code;     /* Save for later completion */
5626          after_manual_callout = 1; /* Skip one item before completing */          after_manual_callout = 1;    /* Skip one item before completing */
5627          *code++ = OP_CALLOUT;          *code++ = OP_CALLOUT;
5628            {            {
5629            int n = 0;            int n = 0;
# Line 5523  for (;; ptr++) Line 5759  for (;; ptr++)
5759    
5760              if (!dupname)              if (!dupname)
5761                {                {
5762                uschar *cslot = cd->name_table;                pcre_uchar *cslot = cd->name_table;
5763                for (i = 0; i < cd->names_found; i++)                for (i = 0; i < cd->names_found; i++)
5764                  {                  {
5765                  if (cslot != slot)                  if (cslot != slot)
# Line 5579  for (;; ptr++) Line 5815  for (;; ptr++)
5815    
5816          if (lengthptr != NULL)          if (lengthptr != NULL)
5817            {            {
5818            const uschar *temp;            const pcre_uchar *temp;
5819    
5820            if (namelen == 0)            if (namelen == 0)
5821              {              {
# Line 5661  for (;; ptr++) Line 5897  for (;; ptr++)
5897          case CHAR_0: case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4:          case CHAR_0: case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4:
5898          case CHAR_5: case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9:          case CHAR_5: case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9:
5899            {            {
5900            const uschar *called;            const pcre_uchar *called;
5901            terminator = CHAR_RIGHT_PARENTHESIS;            terminator = CHAR_RIGHT_PARENTHESIS;
5902    
5903            /* Come here from the \g<...> and \g'...' code (Oniguruma            /* Come here from the \g<...> and \g'...' code (Oniguruma
# Line 5906  for (;; ptr++) Line 6142  for (;; ptr++)
6142      *code = bravalue;      *code = bravalue;
6143      tempcode = code;      tempcode = code;
6144      tempreqvary = cd->req_varyopt;        /* Save value before bracket */      tempreqvary = cd->req_varyopt;        /* Save value before bracket */
6145        tempbracount = cd->bracount;          /* Save value before bracket */
6146      length_prevgroup = 0;                 /* Initialize for pre-compile phase */      length_prevgroup = 0;                 /* Initialize for pre-compile phase */
6147    
6148      if (!compile_regex(      if (!compile_regex(
# Line 5928  for (;; ptr++) Line 6165  for (;; ptr++)
6165           ))           ))
6166        goto FAILED;        goto FAILED;
6167    
6168        /* If this was an atomic group and there are no capturing groups within it,
6169        generate OP_ONCE_NC instead of OP_ONCE. */
6170    
6171        if (bravalue == OP_ONCE && cd->bracount <= tempbracount)
6172          *code = OP_ONCE_NC;
6173    
6174      if (bravalue >= OP_ASSERT && bravalue <= OP_ASSERTBACK_NOT)      if (bravalue >= OP_ASSERT && bravalue <= OP_ASSERTBACK_NOT)
6175        cd->assert_depth -= 1;        cd->assert_depth -= 1;
6176    
6177      /* At the end of compiling, code is still pointing to the start of the      /* At the end of compiling, code is still pointing to the start of the
6178      group, while tempcode has been updated to point past the end of the group      group, while tempcode has been updated to point past the end of the group.
6179      and any option resetting that may follow it. The pattern pointer (ptr)      The pattern pointer (ptr) is on the bracket.
     is on the bracket. */  
6180    
6181      /* If this is a conditional bracket, check that there are no more than      If this is a conditional bracket, check that there are no more than
6182      two branches in the group, or just one if it's a DEFINE group. We do this      two branches in the group, or just one if it's a DEFINE group. We do this
6183      in the real compile phase, not in the pre-pass, where the whole group may      in the real compile phase, not in the pre-pass, where the whole group may
6184      not be available. */      not be available. */
6185    
6186      if (bravalue == OP_COND && lengthptr == NULL)      if (bravalue == OP_COND && lengthptr == NULL)
6187        {        {
6188        uschar *tc = code;        pcre_uchar *tc = code;
6189        int condcount = 0;        int condcount = 0;
6190    
6191        do {        do {
# Line 6118  for (;; ptr++) Line 6360  for (;; ptr++)
6360    
6361        if (-c == ESC_g)        if (-c == ESC_g)
6362          {          {
6363          const uschar *p;          const pcre_uchar *p;
6364          save_hwm = cd->hwm;   /* Normally this is set when '(' is read */          save_hwm = cd->hwm;   /* Normally this is set when '(' is read */
6365          terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?          terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?
6366            CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;            CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;
# Line 6258  for (;; ptr++) Line 6500  for (;; ptr++)
6500            }            }
6501          else          else
6502  #endif  #endif
6503            {          /* In non-UTF-8 mode, we turn \C into OP_ALLANY instead of OP_ANYBYTE
6504            so that it works in DFA mode and in lookbehinds. */
6505    
6506              {
6507            previous = (-c > ESC_b && -c < ESC_Z)? code : NULL;            previous = (-c > ESC_b && -c < ESC_Z)? code : NULL;
6508            *code++ = -c;            *code++ = (!utf8 && c == -ESC_C)? OP_ALLANY : -c;
6509            }            }
6510          }          }
6511        continue;        continue;
# Line 6394  Returns:         TRUE on success Line 6639  Returns:         TRUE on success
6639  */  */
6640    
6641  static BOOL  static BOOL
6642  compile_regex(int options, uschar **codeptr, const uschar **ptrptr,  compile_regex(int options, pcre_uchar **codeptr, const pcre_uchar **ptrptr,
6643    int *errorcodeptr, BOOL lookbehind, BOOL reset_bracount, int skipbytes,    int *errorcodeptr, BOOL lookbehind, BOOL reset_bracount, int skipbytes,
6644    int cond_depth, int *firstbyteptr, int *reqbyteptr, branch_chain *bcptr,    int cond_depth, int *firstbyteptr, int *reqbyteptr, branch_chain *bcptr,
6645    compile_data *cd, int *lengthptr)    compile_data *cd, int *lengthptr)
6646  {  {
6647  const uschar *ptr = *ptrptr;  const pcre_uchar *ptr = *ptrptr;
6648  uschar *code = *codeptr;  pcre_uchar *code = *codeptr;
6649  uschar *last_branch = code;  pcre_uchar *last_branch = code;
6650  uschar *start_bracket = code;  pcre_uchar *start_bracket = code;
6651  uschar *reverse_count = NULL;  pcre_uchar *reverse_count = NULL;
6652  open_capitem capitem;  open_capitem capitem;
6653  int capnumber = 0;  int capnumber = 0;
6654  int firstbyte, reqbyte;  int firstbyte, reqbyte;
# Line 6552  for (;;) Line 6797  for (;;)
6797          }          }
6798        else if (fixed_length < 0)        else if (fixed_length < 0)
6799          {          {
6800          *errorcodeptr = (fixed_length == -2)? ERR36 : ERR25;          *errorcodeptr = (fixed_length == -2)? ERR36 :
6801                            (fixed_length == -4)? ERR70: ERR25;
6802          *ptrptr = ptr;          *ptrptr = ptr;
6803          return FALSE;          return FALSE;
6804          }          }
# Line 6699  Returns:     TRUE or FALSE Line 6945  Returns:     TRUE or FALSE
6945  */  */
6946    
6947  static BOOL  static BOOL
6948  is_anchored(register const uschar *code, unsigned int bracket_map,  is_anchored(register const pcre_uchar *code, unsigned int bracket_map,
6949    unsigned int backref_map)    unsigned int backref_map)
6950  {  {
6951  do {  do {
6952     const uschar *scode = first_significant_code(code + _pcre_OP_lengths[*code],     const pcre_uchar *scode = first_significant_code(
6953       FALSE);       code + _pcre_OP_lengths[*code], FALSE);
6954     register int op = *scode;     register int op = *scode;
6955    
6956     /* Non-capturing brackets */     /* Non-capturing brackets */
# Line 6727  do { Line 6973  do {
6973    
6974     /* Other brackets */     /* Other brackets */
6975    
6976     else if (op == OP_ASSERT || op == OP_ONCE || op == OP_COND)     else if (op == OP_ASSERT || op == OP_ONCE || op == OP_ONCE_NC ||
6977                op == OP_COND)
6978       {       {
6979       if (!is_anchored(scode, bracket_map, backref_map)) return FALSE;       if (!is_anchored(scode, bracket_map, backref_map)) return FALSE;
6980       }       }
# Line 6775  Returns:         TRUE or FALSE Line 7022  Returns:         TRUE or FALSE
7022  */  */
7023    
7024  static BOOL  static BOOL
7025  is_startline(const uschar *code, unsigned int bracket_map,  is_startline(const pcre_uchar *code, unsigned int bracket_map,
7026    unsigned int backref_map)    unsigned int backref_map)
7027  {  {
7028  do {  do {
7029     const uschar *scode = first_significant_code(code + _pcre_OP_lengths[*code],     const pcre_uchar *scode = first_significant_code(
7030       FALSE);       code + _pcre_OP_lengths[*code], FALSE);
7031     register int op = *scode;     register int op = *scode;
7032    
7033     /* If we are at the start of a conditional assertion group, *both* the     /* If we are at the start of a conditional assertion group, *both* the
# Line 6831  do { Line 7078  do {
7078    
7079     /* Other brackets */     /* Other brackets */
7080    
7081     else if (op == OP_ASSERT || op == OP_ONCE)     else if (op == OP_ASSERT || op == OP_ONCE || op == OP_ONCE_NC)
7082       {       {
7083       if (!is_startline(scode, bracket_map, backref_map)) return FALSE;       if (!is_startline(scode, bracket_map, backref_map)) return FALSE;
7084       }       }
# Line 6878  Returns:     -1 or the fixed first char Line 7125  Returns:     -1 or the fixed first char
7125  */  */
7126    
7127  static int  static int
7128  find_firstassertedchar(const uschar *code, BOOL inassert)  find_firstassertedchar(const pcre_uchar *code, BOOL inassert)
7129  {  {
7130  register int c = -1;  register int c = -1;
7131  do {  do {
7132     int d;     int d;
7133     int xl = (*code == OP_CBRA || *code == OP_SCBRA ||     int xl = (*code == OP_CBRA || *code == OP_SCBRA ||
7134               *code == OP_CBRAPOS || *code == OP_SCBRAPOS)? 2:0;               *code == OP_CBRAPOS || *code == OP_SCBRAPOS)? 2:0;
7135     const uschar *scode = first_significant_code(code + 1+LINK_SIZE + xl, TRUE);     const pcre_uchar *scode = first_significant_code(code + 1+LINK_SIZE + xl,
7136         TRUE);
7137     register int op = *scode;     register int op = *scode;
7138    
7139     switch(op)     switch(op)
# Line 6901  do { Line 7149  do {
7149       case OP_SCBRAPOS:       case OP_SCBRAPOS:
7150       case OP_ASSERT:       case OP_ASSERT:
7151       case OP_ONCE:       case OP_ONCE:
7152         case OP_ONCE_NC:
7153       case OP_COND:       case OP_COND:
7154       if ((d = find_firstassertedchar(scode, op == OP_ASSERT)) < 0)       if ((d = find_firstassertedchar(scode, op == OP_ASSERT)) < 0)
7155         return -1;         return -1;
# Line 6983  int errorcode = 0; Line 7232  int errorcode = 0;
7232  int skipatstart = 0;  int skipatstart = 0;
7233  BOOL utf8;  BOOL utf8;
7234  size_t size;  size_t size;
7235  uschar *code;  pcre_uchar *code;
7236  const uschar *codestart;  const pcre_uchar *codestart;
7237  const uschar *ptr;  const pcre_uchar *ptr;
7238  compile_data compile_block;  compile_data compile_block;
7239  compile_data *cd = &compile_block;  compile_data *cd = &compile_block;
7240    
# Line 6995  as soon as possible, so that a fairly la Line 7244  as soon as possible, so that a fairly la
7244  this purpose. The same space is used in the second phase for remembering where  this purpose. The same space is used in the second phase for remembering where
7245  to fill in forward references to subpatterns. */  to fill in forward references to subpatterns. */
7246    
7247  uschar cworkspace[COMPILE_WORK_SIZE];  pcre_uchar cworkspace[COMPILE_WORK_SIZE];
7248    
7249  /* Set this early so that early errors get offset 0. */  /* Set this early so that early errors get offset 0. */
7250    
7251  ptr = (const uschar *)pattern;  ptr = (const pcre_uchar *)pattern;
7252    
7253  /* We can't pass back an error message if errorptr is NULL; I guess the best we  /* We can't pass back an error message if errorptr is NULL; I guess the best we
7254  can do is just return NULL, but we can set a code value if there is a code  can do is just return NULL, but we can set a code value if there is a code
# Line 7088  not used here. */ Line 7337  not used here. */
7337    
7338  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
7339  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 &&  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 &&
7340       (errorcode = _pcre_valid_utf8((USPTR)pattern, -1, erroroffset)) != 0)       (errorcode = _pcre_valid_utf8((PCRE_PUCHAR)pattern, -1, erroroffset)) != 0)
7341    {    {
7342    errorcode = ERR44;    errorcode = ERR44;
7343    goto PCRE_EARLY_ERROR_RETURN2;    goto PCRE_EARLY_ERROR_RETURN2;
# Line 7186  cd->name_table = NULL; Line 7435  cd->name_table = NULL;
7435  cd->start_workspace = cworkspace;  cd->start_workspace = cworkspace;
7436  cd->start_code = cworkspace;  cd->start_code = cworkspace;
7437  cd->hwm = cworkspace;  cd->hwm = cworkspace;
7438  cd->start_pattern = (const uschar *)pattern;  cd->start_pattern = (const pcre_uchar *)pattern;
7439  cd->end_pattern = (const uschar *)(pattern + strlen(pattern));  cd->end_pattern = (const pcre_uchar *)(pattern + strlen(pattern));
7440  cd->req_varyopt = 0;  cd->req_varyopt = 0;
7441  cd->external_options = options;  cd->external_options = options;
7442  cd->external_flags = 0;  cd->external_flags = 0;
# Line 7260  cd->final_bracount = cd->bracount;  /* S Line 7509  cd->final_bracount = cd->bracount;  /* S
7509  cd->assert_depth = 0;  cd->assert_depth = 0;
7510  cd->bracount = 0;  cd->bracount = 0;
7511  cd->names_found = 0;  cd->names_found = 0;
7512  cd->name_table = (uschar *)re + re->name_table_offset;  cd->name_table = (pcre_uchar *)re + re->name_table_offset;
7513  codestart = cd->name_table + re->name_entry_size * re->name_count;  codestart = cd->name_table + re->name_entry_size * re->name_count;
7514  cd->start_code = codestart;  cd->start_code = codestart;
7515  cd->hwm = cworkspace;  cd->hwm = cworkspace;
# Line 7273  cd->open_caps = NULL; Line 7522  cd->open_caps = NULL;
7522  error, errorcode will be set non-zero, so we don't need to look at the result  error, errorcode will be set non-zero, so we don't need to look at the result
7523  of the function here. */  of the function here. */
7524    
7525  ptr = (const uschar *)pattern + skipatstart;  ptr = (const pcre_uchar *)pattern + skipatstart;
7526  code = (uschar *)codestart;  code = (pcre_uchar *)codestart;
7527  *code = OP_BRA;  *code = OP_BRA;
7528  (void)compile_regex(re->options, &code, &ptr, &errorcode, FALSE, FALSE, 0, 0,  (void)compile_regex(re->options, &code, &ptr, &errorcode, FALSE, FALSE, 0, 0,
7529    &firstbyte, &reqbyte, NULL, cd, NULL);    &firstbyte, &reqbyte, NULL, cd, NULL);
# Line 7302  if (code - codestart > length) errorcode Line 7551  if (code - codestart > length) errorcode
7551  while (errorcode == 0 && cd->hwm > cworkspace)  while (errorcode == 0 && cd->hwm > cworkspace)
7552    {    {
7553    int offset, recno;    int offset, recno;
7554    const uschar *groupptr;    const pcre_uchar *groupptr;
7555    cd->hwm -= LINK_SIZE;    cd->hwm -= LINK_SIZE;
7556    offset = GET(cd->hwm, 0);    offset = GET(cd->hwm, 0);
7557    recno = GET(codestart, offset);    recno = GET(codestart, offset);
7558    groupptr = _pcre_find_bracket(codestart, utf8, recno);    groupptr = _pcre_find_bracket(codestart, utf8, recno);
7559    if (groupptr == NULL) errorcode = ERR53;    if (groupptr == NULL) errorcode = ERR53;
7560      else PUT(((uschar *)codestart), offset, (int)(groupptr - codestart));      else PUT(((pcre_uchar *)codestart), offset, (int)(groupptr - codestart));
7561    }    }
7562    
7563  /* Give an error if there's back reference to a non-existent capturing  /* Give an error if there's back reference to a non-existent capturing
# Line 7326  length, and set their lengths. */ Line 7575  length, and set their lengths. */
7575    
7576  if (cd->check_lookbehind)  if (cd->check_lookbehind)
7577    {    {
7578    uschar *cc = (uschar *)codestart;    pcre_uchar *cc = (pcre_uchar *)codestart;
7579    
7580    /* Loop, searching for OP_REVERSE items, and process those that do not have    /* Loop, searching for OP_REVERSE items, and process those that do not have
7581    their length set. (Actually, it will also re-process any that have a length    their length set. (Actually, it will also re-process any that have a length
7582    of zero, but that is a pathological case, and it does no harm.) When we find    of zero, but that is a pathological case, and it does no harm.) When we find
7583    one, we temporarily terminate the branch it is in while we scan it. */    one, we temporarily terminate the branch it is in while we scan it. */
7584    
7585    for (cc = (uschar *)_pcre_find_bracket(codestart, utf8, -1);    for (cc = (pcre_uchar *)_pcre_find_bracket(codestart, utf8, -1);
7586         cc != NULL;         cc != NULL;
7587         cc = (uschar *)_pcre_find_bracket(cc, utf8, -1))         cc = (pcre_uchar *)_pcre_find_bracket(cc, utf8, -1))
7588      {      {
7589      if (GET(cc, 1) == 0)      if (GET(cc, 1) == 0)
7590        {        {
7591        int fixed_length;        int fixed_length;
7592        uschar *be = cc - 1 - LINK_SIZE + GET(cc, -LINK_SIZE);        pcre_uchar *be = cc - 1 - LINK_SIZE + GET(cc, -LINK_SIZE);
7593        int end_op = *be;        int end_op = *be;
7594        *be = OP_END;        *be = OP_END;
7595        fixed_length = find_fixedlength(cc, (re->options & PCRE_UTF8) != 0, TRUE,        fixed_length = find_fixedlength(cc, (re->options & PCRE_UTF8) != 0, TRUE,
# Line 7349  if (cd->check_lookbehind) Line 7598  if (cd->check_lookbehind)
7598        DPRINTF(("fixed length = %d\n", fixed_length));        DPRINTF(("fixed length = %d\n", fixed_length));
7599        if (fixed_length < 0)        if (fixed_length < 0)
7600          {          {
7601          errorcode = (fixed_length == -2)? ERR36 : ERR25;          errorcode = (fixed_length == -2)? ERR36 :
7602                        (fixed_length == -4)? ERR70 : ERR25;
7603          break;          break;
7604          }          }
7605        PUT(cc, 1, fixed_length);        PUT(cc, 1, fixed_length);
# Line 7364  if (errorcode != 0) Line 7614  if (errorcode != 0)
7614    {    {
7615    (pcre_free)(re);    (pcre_free)(re);
7616    PCRE_EARLY_ERROR_RETURN:    PCRE_EARLY_ERROR_RETURN:
7617    *erroroffset = (int)(ptr - (const uschar *)pattern);    *erroroffset = (int)(ptr - (const pcre_uchar *)pattern);
7618    PCRE_EARLY_ERROR_RETURN2:    PCRE_EARLY_ERROR_RETURN2:
7619    *errorptr = find_error_text(errorcode);    *errorptr = find_error_text(errorcode);
7620    if (errorcodeptr != NULL) *errorcodeptr = errorcode;    if (errorcodeptr != NULL) *errorcodeptr = errorcode;
# Line 7450  if (code - codestart > length) Line 7700  if (code - codestart > length)
7700    {    {
7701    (pcre_free)(re);    (pcre_free)(re);
7702    *errorptr = find_error_text(ERR23);    *errorptr = find_error_text(ERR23);
7703    *erroroffset = ptr - (uschar *)pattern;    *erroroffset = ptr - (pcre_uchar *)pattern;
7704    if (errorcodeptr != NULL) *errorcodeptr = ERR23;    if (errorcodeptr != NULL) *errorcodeptr = ERR23;
7705    return NULL;    return NULL;
7706    }    }

Legend:
Removed from v.716  
changed lines
  Added in v.756

  ViewVC Help
Powered by ViewVC 1.1.5