/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 389 by ph10, Sun Mar 15 18:24:05 2009 UTC revision 438 by ph10, Sun Sep 6 20:00:47 2009 UTC
# Line 97  are simple data values; negative values Line 97  are simple data values; negative values
97  on. Zero means further processing is needed (for things like \x), or the escape  on. Zero means further processing is needed (for things like \x), or the escape
98  is invalid. */  is invalid. */
99    
100  #ifndef EBCDIC  /* This is the "normal" table for ASCII systems */  #ifndef EBCDIC
101    
102    /* This is the "normal" table for ASCII systems or for EBCDIC systems running
103    in UTF-8 mode. */
104    
105  static const short int escapes[] = {  static const short int escapes[] = {
106       0,      0,      0,      0,      0,      0,      0,      0,   /* 0 - 7 */       0,                       0,
107       0,      0,    ':',    ';',    '<',    '=',    '>',    '?',   /* 8 - ? */       0,                       0,
108     '@', -ESC_A, -ESC_B, -ESC_C, -ESC_D, -ESC_E,      0, -ESC_G,   /* @ - G */       0,                       0,
109  -ESC_H,      0,      0, -ESC_K,      0,      0,      0,      0,   /* H - O */       0,                       0,
110  -ESC_P, -ESC_Q, -ESC_R, -ESC_S,      0,      0, -ESC_V, -ESC_W,   /* P - W */       0,                       0,
111  -ESC_X,      0, -ESC_Z,    '[',   '\\',    ']',    '^',    '_',   /* X - _ */       CHAR_COLON,              CHAR_SEMICOLON,
112     '`',      7, -ESC_b,      0, -ESC_d,  ESC_e,  ESC_f,      0,   /* ` - g */       CHAR_LESS_THAN_SIGN,     CHAR_EQUALS_SIGN,
113  -ESC_h,      0,      0, -ESC_k,      0,      0,  ESC_n,      0,   /* h - o */       CHAR_GREATER_THAN_SIGN,  CHAR_QUESTION_MARK,
114  -ESC_p,      0,  ESC_r, -ESC_s,  ESC_tee,    0, -ESC_v, -ESC_w,   /* p - w */       CHAR_COMMERCIAL_AT,      -ESC_A,
115       0,      0, -ESC_z                                            /* x - z */       -ESC_B,                  -ESC_C,
116         -ESC_D,                  -ESC_E,
117         0,                       -ESC_G,
118         -ESC_H,                  0,
119         0,                       -ESC_K,
120         0,                       0,
121         0,                       0,
122         -ESC_P,                  -ESC_Q,
123         -ESC_R,                  -ESC_S,
124         0,                       0,
125         -ESC_V,                  -ESC_W,
126         -ESC_X,                  0,
127         -ESC_Z,                  CHAR_LEFT_SQUARE_BRACKET,
128         CHAR_BACKSLASH,          CHAR_RIGHT_SQUARE_BRACKET,
129         CHAR_CIRCUMFLEX_ACCENT,  CHAR_UNDERSCORE,
130         CHAR_GRAVE_ACCENT,       7,
131         -ESC_b,                  0,
132         -ESC_d,                  ESC_e,
133         ESC_f,                   0,
134         -ESC_h,                  0,
135         0,                       -ESC_k,
136         0,                       0,
137         ESC_n,                   0,
138         -ESC_p,                  0,
139         ESC_r,                   -ESC_s,
140         ESC_tee,                 0,
141         -ESC_v,                  -ESC_w,
142         0,                       0,
143         -ESC_z
144  };  };
145    
146  #else           /* This is the "abnormal" table for EBCDIC systems */  #else
147    
148    /* This is the "abnormal" table for EBCDIC systems without UTF-8 support. */
149    
150  static const short int escapes[] = {  static const short int escapes[] = {
151  /*  48 */     0,     0,      0,     '.',    '<',   '(',    '+',    '|',  /*  48 */     0,     0,      0,     '.',    '<',   '(',    '+',    '|',
152  /*  50 */   '&',     0,      0,       0,      0,     0,      0,      0,  /*  50 */   '&',     0,      0,       0,      0,     0,      0,      0,
# Line 142  static const short int escapes[] = { Line 177  static const short int escapes[] = {
177    
178  /* Table of special "verbs" like (*PRUNE). This is a short table, so it is  /* Table of special "verbs" like (*PRUNE). This is a short table, so it is
179  searched linearly. Put all the names into a single string, in order to reduce  searched linearly. Put all the names into a single string, in order to reduce
180  the number of relocations when a shared library is dynamically linked. */  the number of relocations when a shared library is dynamically linked. The
181    string is built from string macros so that it works in UTF-8 mode on EBCDIC
182    platforms. */
183    
184  typedef struct verbitem {  typedef struct verbitem {
185    int   len;    int   len;
# Line 150  typedef struct verbitem { Line 187  typedef struct verbitem {
187  } verbitem;  } verbitem;
188    
189  static const char verbnames[] =  static const char verbnames[] =
190    "ACCEPT\0"    STRING_ACCEPT0
191    "COMMIT\0"    STRING_COMMIT0
192    "F\0"    STRING_F0
193    "FAIL\0"    STRING_FAIL0
194    "PRUNE\0"    STRING_PRUNE0
195    "SKIP\0"    STRING_SKIP0
196    "THEN";    STRING_THEN;
197    
198  static const verbitem verbs[] = {  static const verbitem verbs[] = {
199    { 6, OP_ACCEPT },    { 6, OP_ACCEPT },
# Line 178  length entry. The first three must be al Line 215  length entry. The first three must be al
215  for handling case independence. */  for handling case independence. */
216    
217  static const char posix_names[] =  static const char posix_names[] =
218    "alpha\0"  "lower\0"  "upper\0"  "alnum\0"  "ascii\0"  "blank\0"    STRING_alpha0 STRING_lower0 STRING_upper0 STRING_alnum0
219    "cntrl\0"  "digit\0"  "graph\0"  "print\0"  "punct\0"  "space\0"    STRING_ascii0 STRING_blank0 STRING_cntrl0 STRING_digit0
220    "word\0"   "xdigit";    STRING_graph0 STRING_print0 STRING_punct0 STRING_space0
221      STRING_word0  STRING_xdigit;
222    
223  static const uschar posix_name_lengths[] = {  static const uschar posix_name_lengths[] = {
224    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 };    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 };
# Line 322  For convenience, we use the same bit def Line 360  For convenience, we use the same bit def
360    
361  Then we can use ctype_digit and ctype_xdigit in the code. */  Then we can use ctype_digit and ctype_xdigit in the code. */
362    
363  #ifndef EBCDIC  /* This is the "normal" case, for ASCII systems */  #ifndef EBCDIC
364    
365    /* This is the "normal" case, for ASCII systems, and EBCDIC systems running in
366    UTF-8 mode. */
367    
368  static const unsigned char digitab[] =  static const unsigned char digitab[] =
369    {    {
370    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
# Line 358  static const unsigned char digitab[] = Line 400  static const unsigned char digitab[] =
400    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
401    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
402    
403  #else           /* This is the "abnormal" case, for EBCDIC systems */  #else
404    
405    /* This is the "abnormal" case, for EBCDIC systems not running in UTF-8 mode. */
406    
407  static const unsigned char digitab[] =  static const unsigned char digitab[] =
408    {    {
409    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7  0 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7  0 */
# Line 503  if (c == 0) *errorcodeptr = ERR1; Line 548  if (c == 0) *errorcodeptr = ERR1;
548  in a table. A non-zero result is something that can be returned immediately.  in a table. A non-zero result is something that can be returned immediately.
549  Otherwise further processing may be required. */  Otherwise further processing may be required. */
550    
551  #ifndef EBCDIC  /* ASCII coding */  #ifndef EBCDIC  /* ASCII/UTF-8 coding */
552  else if (c < '0' || c > 'z') {}                           /* Not alphanumeric */  else if (c < CHAR_0 || c > CHAR_z) {}                     /* Not alphanumeric */
553  else if ((i = escapes[c - '0']) != 0) c = i;  else if ((i = escapes[c - CHAR_0]) != 0) c = i;
554    
555  #else           /* EBCDIC coding */  #else           /* EBCDIC coding */
556  else if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {}   /* Not alphanumeric */  else if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {}   /* Not alphanumeric */
# Line 524  else Line 569  else
569      /* A number of Perl escapes are not handled by PCRE. We give an explicit      /* A number of Perl escapes are not handled by PCRE. We give an explicit
570      error. */      error. */
571    
572      case 'l':      case CHAR_l:
573      case 'L':      case CHAR_L:
574      case 'N':      case CHAR_N:
575      case 'u':      case CHAR_u:
576      case 'U':      case CHAR_U:
577      *errorcodeptr = ERR37;      *errorcodeptr = ERR37;
578      break;      break;
579    
# Line 548  else Line 593  else
593      (possibly recursive) subroutine calls, _not_ backreferences. Just return      (possibly recursive) subroutine calls, _not_ backreferences. Just return
594      the -ESC_g code (cf \k). */      the -ESC_g code (cf \k). */
595    
596      case 'g':      case CHAR_g:
597      if (ptr[1] == '<' || ptr[1] == '\'')      if (ptr[1] == CHAR_LESS_THAN_SIGN || ptr[1] == CHAR_APOSTROPHE)
598        {        {
599        c = -ESC_g;        c = -ESC_g;
600        break;        break;
# Line 557  else Line 602  else
602    
603      /* Handle the Perl-compatible cases */      /* Handle the Perl-compatible cases */
604    
605      if (ptr[1] == '{')      if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)
606        {        {
607        const uschar *p;        const uschar *p;
608        for (p = ptr+2; *p != 0 && *p != '}'; p++)        for (p = ptr+2; *p != 0 && *p != CHAR_RIGHT_CURLY_BRACKET; p++)
609          if (*p != '-' && (digitab[*p] & ctype_digit) == 0) break;          if (*p != CHAR_MINUS && (digitab[*p] & ctype_digit) == 0) break;
610        if (*p != 0 && *p != '}')        if (*p != 0 && *p != CHAR_RIGHT_CURLY_BRACKET)
611          {          {
612          c = -ESC_k;          c = -ESC_k;
613          break;          break;
# Line 572  else Line 617  else
617        }        }
618      else braced = FALSE;      else braced = FALSE;
619    
620      if (ptr[1] == '-')      if (ptr[1] == CHAR_MINUS)
621        {        {
622        negated = TRUE;        negated = TRUE;
623        ptr++;        ptr++;
# Line 581  else Line 626  else
626    
627      c = 0;      c = 0;
628      while ((digitab[ptr[1]] & ctype_digit) != 0)      while ((digitab[ptr[1]] & ctype_digit) != 0)
629        c = c * 10 + *(++ptr) - '0';        c = c * 10 + *(++ptr) - CHAR_0;
630    
631      if (c < 0)   /* Integer overflow */      if (c < 0)   /* Integer overflow */
632        {        {
# Line 589  else Line 634  else
634        break;        break;
635        }        }
636    
637      if (braced && *(++ptr) != '}')      if (braced && *(++ptr) != CHAR_RIGHT_CURLY_BRACKET)
638        {        {
639        *errorcodeptr = ERR57;        *errorcodeptr = ERR57;
640        break;        break;
# Line 626  else Line 671  else
671      value is greater than 377, the least significant 8 bits are taken. Inside a      value is greater than 377, the least significant 8 bits are taken. Inside a
672      character class, \ followed by a digit is always an octal number. */      character class, \ followed by a digit is always an octal number. */
673    
674      case '1': case '2': case '3': case '4': case '5':      case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4: case CHAR_5:
675      case '6': case '7': case '8': case '9':      case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9:
676    
677      if (!isclass)      if (!isclass)
678        {        {
679        oldptr = ptr;        oldptr = ptr;
680        c -= '0';        c -= CHAR_0;
681        while ((digitab[ptr[1]] & ctype_digit) != 0)        while ((digitab[ptr[1]] & ctype_digit) != 0)
682          c = c * 10 + *(++ptr) - '0';          c = c * 10 + *(++ptr) - CHAR_0;
683        if (c < 0)    /* Integer overflow */        if (c < 0)    /* Integer overflow */
684          {          {
685          *errorcodeptr = ERR61;          *errorcodeptr = ERR61;
# Line 652  else Line 697  else
697      generates a binary zero byte and treats the digit as a following literal.      generates a binary zero byte and treats the digit as a following literal.
698      Thus we have to pull back the pointer by one. */      Thus we have to pull back the pointer by one. */
699    
700      if ((c = *ptr) >= '8')      if ((c = *ptr) >= CHAR_8)
701        {        {
702        ptr--;        ptr--;
703        c = 0;        c = 0;
# Line 665  else Line 710  else
710      to do). Nowadays we allow for larger numbers in UTF-8 mode, but no more      to do). Nowadays we allow for larger numbers in UTF-8 mode, but no more
711      than 3 octal digits. */      than 3 octal digits. */
712    
713      case '0':      case CHAR_0:
714      c -= '0';      c -= CHAR_0;
715      while(i++ < 2 && ptr[1] >= '0' && ptr[1] <= '7')      while(i++ < 2 && ptr[1] >= CHAR_0 && ptr[1] <= CHAR_7)
716          c = c * 8 + *(++ptr) - '0';          c = c * 8 + *(++ptr) - CHAR_0;
717      if (!utf8 && c > 255) *errorcodeptr = ERR51;      if (!utf8 && c > 255) *errorcodeptr = ERR51;
718      break;      break;
719    
# Line 676  else Line 721  else
721      than 0xff in utf8 mode, but only if the ddd are hex digits. If not, { is      than 0xff in utf8 mode, but only if the ddd are hex digits. If not, { is
722      treated as a data character. */      treated as a data character. */
723    
724      case 'x':      case CHAR_x:
725      if (ptr[1] == '{')      if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)
726        {        {
727        const uschar *pt = ptr + 2;        const uschar *pt = ptr + 2;
728        int count = 0;        int count = 0;
# Line 686  else Line 731  else
731        while ((digitab[*pt] & ctype_xdigit) != 0)        while ((digitab[*pt] & ctype_xdigit) != 0)
732          {          {
733          register int cc = *pt++;          register int cc = *pt++;
734          if (c == 0 && cc == '0') continue;     /* Leading zeroes */          if (c == 0 && cc == CHAR_0) continue;     /* Leading zeroes */
735          count++;          count++;
736    
737  #ifndef EBCDIC  /* ASCII coding */  #ifndef EBCDIC  /* ASCII/UTF-8 coding */
738          if (cc >= 'a') cc -= 32;               /* Convert to upper case */          if (cc >= CHAR_a) cc -= 32;               /* Convert to upper case */
739          c = (c << 4) + cc - ((cc < 'A')? '0' : ('A' - 10));          c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
740  #else           /* EBCDIC coding */  #else           /* EBCDIC coding */
741          if (cc >= 'a' && cc <= 'z') cc += 64;  /* Convert to upper case */          if (cc >= CHAR_a && cc <= CHAR_z) cc += 64;  /* Convert to upper case */
742          c = (c << 4) + cc - ((cc >= '0')? '0' : ('A' - 10));          c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
743  #endif  #endif
744          }          }
745    
746        if (*pt == '}')        if (*pt == CHAR_RIGHT_CURLY_BRACKET)
747          {          {
748          if (c < 0 || count > (utf8? 8 : 2)) *errorcodeptr = ERR34;          if (c < 0 || count > (utf8? 8 : 2)) *errorcodeptr = ERR34;
749          ptr = pt;          ptr = pt;
# Line 714  else Line 759  else
759      c = 0;      c = 0;
760      while (i++ < 2 && (digitab[ptr[1]] & ctype_xdigit) != 0)      while (i++ < 2 && (digitab[ptr[1]] & ctype_xdigit) != 0)
761        {        {
762        int cc;                               /* Some compilers don't like ++ */        int cc;                                  /* Some compilers don't like */
763        cc = *(++ptr);                        /* in initializers */        cc = *(++ptr);                           /* ++ in initializers */
764  #ifndef EBCDIC  /* ASCII coding */  #ifndef EBCDIC  /* ASCII/UTF-8 coding */
765        if (cc >= 'a') cc -= 32;              /* Convert to upper case */        if (cc >= CHAR_a) cc -= 32;              /* Convert to upper case */
766        c = c * 16 + cc - ((cc < 'A')? '0' : ('A' - 10));        c = c * 16 + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
767  #else           /* EBCDIC coding */  #else           /* EBCDIC coding */
768        if (cc <= 'z') cc += 64;              /* Convert to upper case */        if (cc <= CHAR_z) cc += 64;              /* Convert to upper case */
769        c = c * 16 + cc - ((cc >= '0')? '0' : ('A' - 10));        c = c * 16 + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
770  #endif  #endif
771        }        }
772      break;      break;
# Line 730  else Line 775  else
775      This coding is ASCII-specific, but then the whole concept of \cx is      This coding is ASCII-specific, but then the whole concept of \cx is
776      ASCII-specific. (However, an EBCDIC equivalent has now been added.) */      ASCII-specific. (However, an EBCDIC equivalent has now been added.) */
777    
778      case 'c':      case CHAR_c:
779      c = *(++ptr);      c = *(++ptr);
780      if (c == 0)      if (c == 0)
781        {        {
# Line 738  else Line 783  else
783        break;        break;
784        }        }
785    
786  #ifndef EBCDIC  /* ASCII coding */  #ifndef EBCDIC  /* ASCII/UTF-8 coding */
787      if (c >= 'a' && c <= 'z') c -= 32;      if (c >= CHAR_a && c <= CHAR_z) c -= 32;
788      c ^= 0x40;      c ^= 0x40;
789  #else           /* EBCDIC coding */  #else           /* EBCDIC coding */
790      if (c >= 'a' && c <= 'z') c += 64;      if (c >= CHAR_a && c <= CHAR_z) c += 64;
791      c ^= 0xC0;      c ^= 0xC0;
792  #endif  #endif
793      break;      break;
# Line 804  if (c == 0) goto ERROR_RETURN; Line 849  if (c == 0) goto ERROR_RETURN;
849  /* \P or \p can be followed by a name in {}, optionally preceded by ^ for  /* \P or \p can be followed by a name in {}, optionally preceded by ^ for
850  negation. */  negation. */
851    
852  if (c == '{')  if (c == CHAR_LEFT_CURLY_BRACKET)
853    {    {
854    if (ptr[1] == '^')    if (ptr[1] == CHAR_CIRCUMFLEX_ACCENT)
855      {      {
856      *negptr = TRUE;      *negptr = TRUE;
857      ptr++;      ptr++;
# Line 815  if (c == '{') Line 860  if (c == '{')
860      {      {
861      c = *(++ptr);      c = *(++ptr);
862      if (c == 0) goto ERROR_RETURN;      if (c == 0) goto ERROR_RETURN;
863      if (c == '}') break;      if (c == CHAR_RIGHT_CURLY_BRACKET) break;
864      name[i] = c;      name[i] = c;
865      }      }
866    if (c !='}') goto ERROR_RETURN;    if (c != CHAR_RIGHT_CURLY_BRACKET) goto ERROR_RETURN;
867    name[i] = 0;    name[i] = 0;
868    }    }
869    
# Line 883  is_counted_repeat(const uschar *p) Line 928  is_counted_repeat(const uschar *p)
928  {  {
929  if ((digitab[*p++] & ctype_digit) == 0) return FALSE;  if ((digitab[*p++] & ctype_digit) == 0) return FALSE;
930  while ((digitab[*p] & ctype_digit) != 0) p++;  while ((digitab[*p] & ctype_digit) != 0) p++;
931  if (*p == '}') return TRUE;  if (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE;
932    
933  if (*p++ != ',') return FALSE;  if (*p++ != CHAR_COMMA) return FALSE;
934  if (*p == '}') return TRUE;  if (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE;
935    
936  if ((digitab[*p++] & ctype_digit) == 0) return FALSE;  if ((digitab[*p++] & ctype_digit) == 0) return FALSE;
937  while ((digitab[*p] & ctype_digit) != 0) p++;  while ((digitab[*p] & ctype_digit) != 0) p++;
938    
939  return (*p == '}');  return (*p == CHAR_RIGHT_CURLY_BRACKET);
940  }  }
941    
942    
# Line 924  int max = -1; Line 969  int max = -1;
969  /* Read the minimum value and do a paranoid check: a negative value indicates  /* Read the minimum value and do a paranoid check: a negative value indicates
970  an integer overflow. */  an integer overflow. */
971    
972  while ((digitab[*p] & ctype_digit) != 0) min = min * 10 + *p++ - '0';  while ((digitab[*p] & ctype_digit) != 0) min = min * 10 + *p++ - CHAR_0;
973  if (min < 0 || min > 65535)  if (min < 0 || min > 65535)
974    {    {
975    *errorcodeptr = ERR5;    *errorcodeptr = ERR5;
# Line 934  if (min < 0 || min > 65535) Line 979  if (min < 0 || min > 65535)
979  /* Read the maximum value if there is one, and again do a paranoid on its size.  /* Read the maximum value if there is one, and again do a paranoid on its size.
980  Also, max must not be less than min. */  Also, max must not be less than min. */
981    
982  if (*p == '}') max = min; else  if (*p == CHAR_RIGHT_CURLY_BRACKET) max = min; else
983    {    {
984    if (*(++p) != '}')    if (*(++p) != CHAR_RIGHT_CURLY_BRACKET)
985      {      {
986      max = 0;      max = 0;
987      while((digitab[*p] & ctype_digit) != 0) max = max * 10 + *p++ - '0';      while((digitab[*p] & ctype_digit) != 0) max = max * 10 + *p++ - CHAR_0;
988      if (max < 0 || max > 65535)      if (max < 0 || max > 65535)
989        {        {
990        *errorcodeptr = ERR5;        *errorcodeptr = ERR5;
# Line 964  return p; Line 1009  return p;
1009    
1010    
1011  /*************************************************  /*************************************************
1012  *       Find forward referenced subpattern       *  *  Subroutine for finding forward reference      *
1013  *************************************************/  *************************************************/
1014    
1015  /* This function scans along a pattern's text looking for capturing  /* This recursive function is called only from find_parens() below. The
1016    top-level call starts at the beginning of the pattern. All other calls must
1017    start at a parenthesis. It scans along a pattern's text looking for capturing
1018  subpatterns, and counting them. If it finds a named pattern that matches the  subpatterns, and counting them. If it finds a named pattern that matches the
1019  name it is given, it returns its number. Alternatively, if the name is NULL, it  name it is given, it returns its number. Alternatively, if the name is NULL, it
1020  returns when it reaches a given numbered subpattern. This is used for forward  returns when it reaches a given numbered subpattern. We know that if (?P< is
1021  references to subpatterns. We know that if (?P< is encountered, the name will  encountered, the name will be terminated by '>' because that is checked in the
1022  be terminated by '>' because that is checked in the first pass.  first pass. Recursion is used to keep track of subpatterns that reset the
1023    capturing group numbers - the (?| feature.
1024    
1025  Arguments:  Arguments:
1026    ptr          current position in the pattern    ptrptr       address of the current character pointer (updated)
1027    cd           compile background data    cd           compile background data
1028    name         name to seek, or NULL if seeking a numbered subpattern    name         name to seek, or NULL if seeking a numbered subpattern
1029    lorn         name length, or subpattern number if name is NULL    lorn         name length, or subpattern number if name is NULL
1030    xmode        TRUE if we are in /x mode    xmode        TRUE if we are in /x mode
1031      count        pointer to the current capturing subpattern number (updated)
1032    
1033  Returns:       the number of the named subpattern, or -1 if not found  Returns:       the number of the named subpattern, or -1 if not found
1034  */  */
1035    
1036  static int  static int
1037  find_parens(const uschar *ptr, compile_data *cd, const uschar *name, int lorn,  find_parens_sub(uschar **ptrptr, compile_data *cd, const uschar *name, int lorn,
1038    BOOL xmode)    BOOL xmode, int *count)
1039  {  {
1040  const uschar *thisname;  uschar *ptr = *ptrptr;
1041  int count = cd->bracount;  int start_count = *count;
1042    int hwm_count = start_count;
1043    BOOL dup_parens = FALSE;
1044    
1045  for (; *ptr != 0; ptr++)  /* If the first character is a parenthesis, check on the type of group we are
1046    dealing with. The very first call may not start with a parenthesis. */
1047    
1048    if (ptr[0] == CHAR_LEFT_PARENTHESIS)
1049    {    {
1050    int term;    if (ptr[1] == CHAR_QUESTION_MARK &&
1051          ptr[2] == CHAR_VERTICAL_LINE)
1052        {
1053        ptr += 3;
1054        dup_parens = TRUE;
1055        }
1056    
1057      /* Handle a normal, unnamed capturing parenthesis */
1058    
1059      else if (ptr[1] != CHAR_QUESTION_MARK && ptr[1] != CHAR_ASTERISK)
1060        {
1061        *count += 1;
1062        if (name == NULL && *count == lorn) return *count;
1063        ptr++;
1064        }
1065    
1066      /* Handle a condition. If it is an assertion, just carry on so that it
1067      is processed as normal. If not, skip to the closing parenthesis of the
1068      condition (there can't be any nested parens. */
1069    
1070      else if (ptr[2] == CHAR_LEFT_PARENTHESIS)
1071        {
1072        ptr += 2;
1073        if (ptr[1] != CHAR_QUESTION_MARK)
1074          {
1075          while (*ptr != 0 && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
1076          if (*ptr != 0) ptr++;
1077          }
1078        }
1079    
1080      /* We have either (? or (* and not a condition */
1081    
1082      else
1083        {
1084        ptr += 2;
1085        if (*ptr == CHAR_P) ptr++;                      /* Allow optional P */
1086    
1087        /* We have to disambiguate (?<! and (?<= from (?<name> for named groups */
1088    
1089        if ((*ptr == CHAR_LESS_THAN_SIGN && ptr[1] != CHAR_EXCLAMATION_MARK &&
1090            ptr[1] != CHAR_EQUALS_SIGN) || *ptr == CHAR_APOSTROPHE)
1091          {
1092          int term;
1093          const uschar *thisname;
1094          *count += 1;
1095          if (name == NULL && *count == lorn) return *count;
1096          term = *ptr++;
1097          if (term == CHAR_LESS_THAN_SIGN) term = CHAR_GREATER_THAN_SIGN;
1098          thisname = ptr;
1099          while (*ptr != term) ptr++;
1100          if (name != NULL && lorn == ptr - thisname &&
1101              strncmp((const char *)name, (const char *)thisname, lorn) == 0)
1102            return *count;
1103          term++;
1104          }
1105        }
1106      }
1107    
1108    /* Past any initial parenthesis handling, scan for parentheses or vertical
1109    bars. */
1110    
1111    for (; *ptr != 0; ptr++)
1112      {
1113    /* Skip over backslashed characters and also entire \Q...\E */    /* Skip over backslashed characters and also entire \Q...\E */
1114    
1115    if (*ptr == '\\')    if (*ptr == CHAR_BACKSLASH)
1116      {      {
1117      if (*(++ptr) == 0) return -1;      if (*(++ptr) == 0) goto FAIL_EXIT;
1118      if (*ptr == 'Q') for (;;)      if (*ptr == CHAR_Q) for (;;)
1119        {        {
1120        while (*(++ptr) != 0 && *ptr != '\\') {};        while (*(++ptr) != 0 && *ptr != CHAR_BACKSLASH) {};
1121        if (*ptr == 0) return -1;        if (*ptr == 0) goto FAIL_EXIT;
1122        if (*(++ptr) == 'E') break;        if (*(++ptr) == CHAR_E) break;
1123        }        }
1124      continue;      continue;
1125      }      }
# Line 1012  for (; *ptr != 0; ptr++) Line 1127  for (; *ptr != 0; ptr++)
1127    /* Skip over character classes; this logic must be similar to the way they    /* Skip over character classes; this logic must be similar to the way they
1128    are handled for real. If the first character is '^', skip it. Also, if the    are handled for real. If the first character is '^', skip it. Also, if the
1129    first few characters (either before or after ^) are \Q\E or \E we skip them    first few characters (either before or after ^) are \Q\E or \E we skip them
1130    too. This makes for compatibility with Perl. */    too. This makes for compatibility with Perl. Note the use of STR macros to
1131      encode "Q\\E" so that it works in UTF-8 on EBCDIC platforms. */
1132    
1133    if (*ptr == '[')    if (*ptr == CHAR_LEFT_SQUARE_BRACKET)
1134      {      {
1135      BOOL negate_class = FALSE;      BOOL negate_class = FALSE;
1136      for (;;)      for (;;)
1137        {        {
1138        int c = *(++ptr);        if (ptr[1] == CHAR_BACKSLASH)
       if (c == '\\')  
1139          {          {
1140          if (ptr[1] == 'E') ptr++;          if (ptr[2] == CHAR_E)
1141            else if (strncmp((const char *)ptr+1, "Q\\E", 3) == 0) ptr += 3;            ptr+= 2;
1142              else break;          else if (strncmp((const char *)ptr+2,
1143                     STR_Q STR_BACKSLASH STR_E, 3) == 0)
1144              ptr += 4;
1145            else
1146              break;
1147          }          }
1148        else if (!negate_class && c == '^')        else if (!negate_class && ptr[1] == CHAR_CIRCUMFLEX_ACCENT)
1149            {
1150          negate_class = TRUE;          negate_class = TRUE;
1151            ptr++;
1152            }
1153        else break;        else break;
1154        }        }
1155    
1156      /* If the next character is ']', it is a data character that must be      /* If the next character is ']', it is a data character that must be
1157      skipped, except in JavaScript compatibility mode. */      skipped, except in JavaScript compatibility mode. */
1158    
1159      if (ptr[1] == ']' && (cd->external_options & PCRE_JAVASCRIPT_COMPAT) == 0)      if (ptr[1] == CHAR_RIGHT_SQUARE_BRACKET &&
1160            (cd->external_options & PCRE_JAVASCRIPT_COMPAT) == 0)
1161        ptr++;        ptr++;
1162    
1163      while (*(++ptr) != ']')      while (*(++ptr) != CHAR_RIGHT_SQUARE_BRACKET)
1164        {        {
1165        if (*ptr == 0) return -1;        if (*ptr == 0) return -1;
1166        if (*ptr == '\\')        if (*ptr == CHAR_BACKSLASH)
1167          {          {
1168          if (*(++ptr) == 0) return -1;          if (*(++ptr) == 0) goto FAIL_EXIT;
1169          if (*ptr == 'Q') for (;;)          if (*ptr == CHAR_Q) for (;;)
1170            {            {
1171            while (*(++ptr) != 0 && *ptr != '\\') {};            while (*(++ptr) != 0 && *ptr != CHAR_BACKSLASH) {};
1172            if (*ptr == 0) return -1;            if (*ptr == 0) goto FAIL_EXIT;
1173            if (*(++ptr) == 'E') break;            if (*(++ptr) == CHAR_E) break;
1174            }            }
1175          continue;          continue;
1176          }          }
# Line 1057  for (; *ptr != 0; ptr++) Line 1180  for (; *ptr != 0; ptr++)
1180    
1181    /* Skip comments in /x mode */    /* Skip comments in /x mode */
1182    
1183    if (xmode && *ptr == '#')    if (xmode && *ptr == CHAR_NUMBER_SIGN)
1184      {      {
1185      while (*(++ptr) != 0 && *ptr != '\n') {};      while (*(++ptr) != 0 && *ptr != CHAR_NL) {};
1186      if (*ptr == 0) return -1;      if (*ptr == 0) goto FAIL_EXIT;
1187      continue;      continue;
1188      }      }
1189    
1190    /* An opening parens must now be a real metacharacter */    /* Check for the special metacharacters */
1191    
1192    if (*ptr != '(') continue;    if (*ptr == CHAR_LEFT_PARENTHESIS)
   if (ptr[1] != '?' && ptr[1] != '*')  
1193      {      {
1194      count++;      int rc = find_parens_sub(&ptr, cd, name, lorn, xmode, count);
1195      if (name == NULL && count == lorn) return count;      if (rc > 0) return rc;
1196      continue;      if (*ptr == 0) goto FAIL_EXIT;
1197      }      }
1198    
1199    ptr += 2;    else if (*ptr == CHAR_RIGHT_PARENTHESIS)
1200    if (*ptr == 'P') ptr++;                      /* Allow optional P */      {
1201        if (dup_parens && *count < hwm_count) *count = hwm_count;
1202        *ptrptr = ptr;
1203        return -1;
1204        }
1205    
1206    /* We have to disambiguate (?<! and (?<= from (?<name> */    else if (*ptr == CHAR_VERTICAL_LINE && dup_parens)
1207        {
1208        if (*count > hwm_count) hwm_count = *count;
1209        *count = start_count;
1210        }
1211      }
1212    
1213    if ((*ptr != '<' || ptr[1] == '!' || ptr[1] == '=') &&  FAIL_EXIT:
1214         *ptr != '\'')  *ptrptr = ptr;
1215      continue;  return -1;
1216    }
1217    
1218    
1219    
1220    
1221    /*************************************************
1222    *       Find forward referenced subpattern       *
1223    *************************************************/
1224    
1225    /* This function scans along a pattern's text looking for capturing
1226    subpatterns, and counting them. If it finds a named pattern that matches the
1227    name it is given, it returns its number. Alternatively, if the name is NULL, it
1228    returns when it reaches a given numbered subpattern. This is used for forward
1229    references to subpatterns. We used to be able to start this scan from the
1230    current compiling point, using the current count value from cd->bracount, and
1231    do it all in a single loop, but the addition of the possibility of duplicate
1232    subpattern numbers means that we have to scan from the very start, in order to
1233    take account of such duplicates, and to use a recursive function to keep track
1234    of the different types of group.
1235    
1236    count++;  Arguments:
1237      cd           compile background data
1238      name         name to seek, or NULL if seeking a numbered subpattern
1239      lorn         name length, or subpattern number if name is NULL
1240      xmode        TRUE if we are in /x mode
1241    
1242    if (name == NULL && count == lorn) return count;  Returns:       the number of the found subpattern, or -1 if not found
1243    term = *ptr++;  */
1244    if (term == '<') term = '>';  
1245    thisname = ptr;  static int
1246    while (*ptr != term) ptr++;  find_parens(compile_data *cd, const uschar *name, int lorn, BOOL xmode)
1247    if (name != NULL && lorn == ptr - thisname &&  {
1248        strncmp((const char *)name, (const char *)thisname, lorn) == 0)  uschar *ptr = (uschar *)cd->start_pattern;
1249      return count;  int count = 0;
1250    int rc;
1251    
1252    /* If the pattern does not start with an opening parenthesis, the first call
1253    to find_parens_sub() will scan right to the end (if necessary). However, if it
1254    does start with a parenthesis, find_parens_sub() will return when it hits the
1255    matching closing parens. That is why we have to have a loop. */
1256    
1257    for (;;)
1258      {
1259      rc = find_parens_sub(&ptr, cd, name, lorn, xmode, &count);
1260      if (rc > 0 || *ptr++ == 0) break;
1261    }    }
1262    
1263  return -1;  return rc;
1264  }  }
1265    
1266    
1267    
1268    
1269  /*************************************************  /*************************************************
1270  *      Find first significant op code            *  *      Find first significant op code            *
1271  *************************************************/  *************************************************/
# Line 1260  for (;;) Line 1426  for (;;)
1426      branchlength++;      branchlength++;
1427      cc += 2;      cc += 2;
1428  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1429      if ((options & PCRE_UTF8) != 0)      if ((options & PCRE_UTF8) != 0 && cc[-1] >= 0xc0)
1430        {        cc += _pcre_utf8_table4[cc[-1] & 0x3f];
       while ((*cc & 0xc0) == 0x80) cc++;  
       }  
1431  #endif  #endif
1432      break;      break;
1433    
# Line 1274  for (;;) Line 1438  for (;;)
1438      branchlength += GET2(cc,1);      branchlength += GET2(cc,1);
1439      cc += 4;      cc += 4;
1440  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1441      if ((options & PCRE_UTF8) != 0)      if ((options & PCRE_UTF8) != 0 && cc[-1] >= 0xc0)
1442        {        cc += _pcre_utf8_table4[cc[-1] & 0x3f];
       while((*cc & 0x80) == 0x80) cc++;  
       }  
1443  #endif  #endif
1444      break;      break;
1445    
# Line 1613  for (code = first_significant_code(code Line 1775  for (code = first_significant_code(code
1775      BOOL empty_branch;      BOOL empty_branch;
1776      if (GET(code, 1) == 0) return TRUE;    /* Hit unclosed bracket */      if (GET(code, 1) == 0) return TRUE;    /* Hit unclosed bracket */
1777    
1778      /* Scan a closed bracket */      /* If a conditional group has only one branch, there is a second, implied,
1779        empty branch, so just skip over the conditional, because it could be empty.
1780        Otherwise, scan the individual branches of the group. */
1781    
1782      empty_branch = FALSE;      if (c == OP_COND && code[GET(code, 1)] != OP_ALT)
     do  
       {  
       if (!empty_branch && could_be_empty_branch(code, endcode, utf8))  
         empty_branch = TRUE;  
1783        code += GET(code, 1);        code += GET(code, 1);
1784        else
1785          {
1786          empty_branch = FALSE;
1787          do
1788            {
1789            if (!empty_branch && could_be_empty_branch(code, endcode, utf8))
1790              empty_branch = TRUE;
1791            code += GET(code, 1);
1792            }
1793          while (*code == OP_ALT);
1794          if (!empty_branch) return FALSE;   /* All branches are non-empty */
1795        }        }
1796      while (*code == OP_ALT);  
     if (!empty_branch) return FALSE;   /* All branches are non-empty */  
1797      c = *code;      c = *code;
1798      continue;      continue;
1799      }      }
# Line 1741  for (code = first_significant_code(code Line 1911  for (code = first_significant_code(code
1911      case OP_QUERY:      case OP_QUERY:
1912      case OP_MINQUERY:      case OP_MINQUERY:
1913      case OP_POSQUERY:      case OP_POSQUERY:
1914        if (utf8 && code[1] >= 0xc0) code += _pcre_utf8_table4[code[1] & 0x3f];
1915        break;
1916    
1917      case OP_UPTO:      case OP_UPTO:
1918      case OP_MINUPTO:      case OP_MINUPTO:
1919      case OP_POSUPTO:      case OP_POSUPTO:
1920      if (utf8) while ((code[2] & 0xc0) == 0x80) code++;      if (utf8 && code[3] >= 0xc0) code += _pcre_utf8_table4[code[3] & 0x3f];
1921      break;      break;
1922  #endif  #endif
1923      }      }
# Line 1825  int terminator;          /* Don't combin Line 1998  int terminator;          /* Don't combin
1998  terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */  terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */
1999  for (++ptr; *ptr != 0; ptr++)  for (++ptr; *ptr != 0; ptr++)
2000    {    {
2001    if (*ptr == '\\' && ptr[1] == ']') ptr++; else    if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET) ptr++; else
2002      {      {
2003      if (*ptr == ']') return FALSE;      if (*ptr == CHAR_RIGHT_SQUARE_BRACKET) return FALSE;
2004      if (*ptr == terminator && ptr[1] == ']')      if (*ptr == terminator && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
2005        {        {
2006        *endptr = ptr;        *endptr = ptr;
2007        return TRUE;        return TRUE;
# Line 2074  if ((options & PCRE_EXTENDED) != 0) Line 2247  if ((options & PCRE_EXTENDED) != 0)
2247    for (;;)    for (;;)
2248      {      {
2249      while ((cd->ctypes[*ptr] & ctype_space) != 0) ptr++;      while ((cd->ctypes[*ptr] & ctype_space) != 0) ptr++;
2250      if (*ptr == '#')      if (*ptr == CHAR_NUMBER_SIGN)
2251        {        {
2252        while (*(++ptr) != 0)        while (*(++ptr) != 0)
2253          if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }          if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }
# Line 2086  if ((options & PCRE_EXTENDED) != 0) Line 2259  if ((options & PCRE_EXTENDED) != 0)
2259  /* If the next item is one that we can handle, get its value. A non-negative  /* If the next item is one that we can handle, get its value. A non-negative
2260  value is a character, a negative value is an escape value. */  value is a character, a negative value is an escape value. */
2261    
2262  if (*ptr == '\\')  if (*ptr == CHAR_BACKSLASH)
2263    {    {
2264    int temperrorcode = 0;    int temperrorcode = 0;
2265    next = check_escape(&ptr, &temperrorcode, cd->bracount, options, FALSE);    next = check_escape(&ptr, &temperrorcode, cd->bracount, options, FALSE);
# Line 2111  if ((options & PCRE_EXTENDED) != 0) Line 2284  if ((options & PCRE_EXTENDED) != 0)
2284    for (;;)    for (;;)
2285      {      {
2286      while ((cd->ctypes[*ptr] & ctype_space) != 0) ptr++;      while ((cd->ctypes[*ptr] & ctype_space) != 0) ptr++;
2287      if (*ptr == '#')      if (*ptr == CHAR_NUMBER_SIGN)
2288        {        {
2289        while (*(++ptr) != 0)        while (*(++ptr) != 0)
2290          if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }          if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }
# Line 2122  if ((options & PCRE_EXTENDED) != 0) Line 2295  if ((options & PCRE_EXTENDED) != 0)
2295    
2296  /* If the next thing is itself optional, we have to give up. */  /* If the next thing is itself optional, we have to give up. */
2297    
2298  if (*ptr == '*' || *ptr == '?' || strncmp((char *)ptr, "{0,", 3) == 0)  if (*ptr == CHAR_ASTERISK || *ptr == CHAR_QUESTION_MARK ||
2299    return FALSE;    strncmp((char *)ptr, STR_LEFT_CURLY_BRACKET STR_0 STR_COMMA, 3) == 0)
2300        return FALSE;
2301    
2302  /* Now compare the next item with the previous opcode. If the previous is a  /* Now compare the next item with the previous opcode. If the previous is a
2303  positive single character match, "item" either contains the character or, if  positive single character match, "item" either contains the character or, if
# Line 2561  for (;; ptr++) Line 2735  for (;; ptr++)
2735    
2736    if (inescq && c != 0)    if (inescq && c != 0)
2737      {      {
2738      if (c == '\\' && ptr[1] == 'E')      if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E)
2739        {        {
2740        inescq = FALSE;        inescq = FALSE;
2741        ptr++;        ptr++;
# Line 2587  for (;; ptr++) Line 2761  for (;; ptr++)
2761    /* Fill in length of a previous callout, except when the next thing is    /* Fill in length of a previous callout, except when the next thing is
2762    a quantifier. */    a quantifier. */
2763    
2764    is_quantifier = c == '*' || c == '+' || c == '?' ||    is_quantifier =
2765      (c == '{' && is_counted_repeat(ptr+1));      c == CHAR_ASTERISK || c == CHAR_PLUS || c == CHAR_QUESTION_MARK ||
2766        (c == CHAR_LEFT_CURLY_BRACKET && is_counted_repeat(ptr+1));
2767    
2768    if (!is_quantifier && previous_callout != NULL &&    if (!is_quantifier && previous_callout != NULL &&
2769         after_manual_callout-- <= 0)         after_manual_callout-- <= 0)
# Line 2603  for (;; ptr++) Line 2778  for (;; ptr++)
2778    if ((options & PCRE_EXTENDED) != 0)    if ((options & PCRE_EXTENDED) != 0)
2779      {      {
2780      if ((cd->ctypes[c] & ctype_space) != 0) continue;      if ((cd->ctypes[c] & ctype_space) != 0) continue;
2781      if (c == '#')      if (c == CHAR_NUMBER_SIGN)
2782        {        {
2783        while (*(++ptr) != 0)        while (*(++ptr) != 0)
2784          {          {
# Line 2628  for (;; ptr++) Line 2803  for (;; ptr++)
2803      {      {
2804      /* ===================================================================*/      /* ===================================================================*/
2805      case 0:                        /* The branch terminates at string end */      case 0:                        /* The branch terminates at string end */
2806      case '|':                      /* or | or ) */      case CHAR_VERTICAL_LINE:       /* or | or ) */
2807      case ')':      case CHAR_RIGHT_PARENTHESIS:
2808      *firstbyteptr = firstbyte;      *firstbyteptr = firstbyte;
2809      *reqbyteptr = reqbyte;      *reqbyteptr = reqbyte;
2810      *codeptr = code;      *codeptr = code;
# Line 2651  for (;; ptr++) Line 2826  for (;; ptr++)
2826      /* Handle single-character metacharacters. In multiline mode, ^ disables      /* Handle single-character metacharacters. In multiline mode, ^ disables
2827      the setting of any following char as a first character. */      the setting of any following char as a first character. */
2828    
2829      case '^':      case CHAR_CIRCUMFLEX_ACCENT:
2830      if ((options & PCRE_MULTILINE) != 0)      if ((options & PCRE_MULTILINE) != 0)
2831        {        {
2832        if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;        if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
# Line 2660  for (;; ptr++) Line 2835  for (;; ptr++)
2835      *code++ = OP_CIRC;      *code++ = OP_CIRC;
2836      break;      break;
2837    
2838      case '$':      case CHAR_DOLLAR_SIGN:
2839      previous = NULL;      previous = NULL;
2840      *code++ = OP_DOLL;      *code++ = OP_DOLL;
2841      break;      break;
# Line 2668  for (;; ptr++) Line 2843  for (;; ptr++)
2843      /* There can never be a first char if '.' is first, whatever happens about      /* There can never be a first char if '.' is first, whatever happens about
2844      repeats. The value of reqbyte doesn't change either. */      repeats. The value of reqbyte doesn't change either. */
2845    
2846      case '.':      case CHAR_DOT:
2847      if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;      if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
2848      zerofirstbyte = firstbyte;      zerofirstbyte = firstbyte;
2849      zeroreqbyte = reqbyte;      zeroreqbyte = reqbyte;
# Line 2692  for (;; ptr++) Line 2867  for (;; ptr++)
2867      In JavaScript compatibility mode, an isolated ']' causes an error. In      In JavaScript compatibility mode, an isolated ']' causes an error. In
2868      default (Perl) mode, it is treated as a data character. */      default (Perl) mode, it is treated as a data character. */
2869    
2870      case ']':      case CHAR_RIGHT_SQUARE_BRACKET:
2871      if ((cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0)      if ((cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0)
2872        {        {
2873        *errorcodeptr = ERR64;        *errorcodeptr = ERR64;
# Line 2700  for (;; ptr++) Line 2875  for (;; ptr++)
2875        }        }
2876      goto NORMAL_CHAR;      goto NORMAL_CHAR;
2877    
2878      case '[':      case CHAR_LEFT_SQUARE_BRACKET:
2879      previous = code;      previous = code;
2880    
2881      /* PCRE supports POSIX class stuff inside a class. Perl gives an error if      /* PCRE supports POSIX class stuff inside a class. Perl gives an error if
2882      they are encountered at the top level, so we'll do that too. */      they are encountered at the top level, so we'll do that too. */
2883    
2884      if ((ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') &&      if ((ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT ||
2885             ptr[1] == CHAR_EQUALS_SIGN) &&
2886          check_posix_syntax(ptr, &tempptr))          check_posix_syntax(ptr, &tempptr))
2887        {        {
2888        *errorcodeptr = (ptr[1] == ':')? ERR13 : ERR31;        *errorcodeptr = (ptr[1] == CHAR_COLON)? ERR13 : ERR31;
2889        goto FAILED;        goto FAILED;
2890        }        }
2891    
# Line 2721  for (;; ptr++) Line 2897  for (;; ptr++)
2897      for (;;)      for (;;)
2898        {        {
2899        c = *(++ptr);        c = *(++ptr);
2900        if (c == '\\')        if (c == CHAR_BACKSLASH)
2901          {          {
2902          if (ptr[1] == 'E') ptr++;          if (ptr[1] == CHAR_E)
2903            else if (strncmp((const char *)ptr+1, "Q\\E", 3) == 0) ptr += 3;            ptr++;
2904              else break;          else if (strncmp((const char *)ptr+1,
2905                              STR_Q STR_BACKSLASH STR_E, 3) == 0)
2906              ptr += 3;
2907            else
2908              break;
2909          }          }
2910        else if (!negate_class && c == '^')        else if (!negate_class && c == CHAR_CIRCUMFLEX_ACCENT)
2911          negate_class = TRUE;          negate_class = TRUE;
2912        else break;        else break;
2913        }        }
# Line 2737  for (;; ptr++) Line 2917  for (;; ptr++)
2917      that. In JS mode, [] must always fail, so generate OP_FAIL, whereas      that. In JS mode, [] must always fail, so generate OP_FAIL, whereas
2918      [^] must match any character, so generate OP_ALLANY. */      [^] must match any character, so generate OP_ALLANY. */
2919    
2920      if (c ==']' && (cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0)      if (c == CHAR_RIGHT_SQUARE_BRACKET &&
2921            (cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0)
2922        {        {
2923        *code++ = negate_class? OP_ALLANY : OP_FAIL;        *code++ = negate_class? OP_ALLANY : OP_FAIL;
2924        if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;        if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
# Line 2802  for (;; ptr++) Line 2983  for (;; ptr++)
2983    
2984        if (inescq)        if (inescq)
2985          {          {
2986          if (c == '\\' && ptr[1] == 'E')     /* If we are at \E */          if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E)  /* If we are at \E */
2987            {            {
2988            inescq = FALSE;                   /* Reset literal state */            inescq = FALSE;                   /* Reset literal state */
2989            ptr++;                            /* Skip the 'E' */            ptr++;                            /* Skip the 'E' */
# Line 2817  for (;; ptr++) Line 2998  for (;; ptr++)
2998        [.ch.] and [=ch=] ("collating elements") and fault them, as Perl        [.ch.] and [=ch=] ("collating elements") and fault them, as Perl
2999        5.6 and 5.8 do. */        5.6 and 5.8 do. */
3000    
3001        if (c == '[' &&        if (c == CHAR_LEFT_SQUARE_BRACKET &&
3002            (ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') &&            (ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT ||
3003            check_posix_syntax(ptr, &tempptr))             ptr[1] == CHAR_EQUALS_SIGN) && check_posix_syntax(ptr, &tempptr))
3004          {          {
3005          BOOL local_negate = FALSE;          BOOL local_negate = FALSE;
3006          int posix_class, taboffset, tabopt;          int posix_class, taboffset, tabopt;
3007          register const uschar *cbits = cd->cbits;          register const uschar *cbits = cd->cbits;
3008          uschar pbits[32];          uschar pbits[32];
3009    
3010          if (ptr[1] != ':')          if (ptr[1] != CHAR_COLON)
3011            {            {
3012            *errorcodeptr = ERR31;            *errorcodeptr = ERR31;
3013            goto FAILED;            goto FAILED;
3014            }            }
3015    
3016          ptr += 2;          ptr += 2;
3017          if (*ptr == '^')          if (*ptr == CHAR_CIRCUMFLEX_ACCENT)
3018            {            {
3019            local_negate = TRUE;            local_negate = TRUE;
3020            should_flip_negation = TRUE;  /* Note negative special */            should_flip_negation = TRUE;  /* Note negative special */
# Line 2906  for (;; ptr++) Line 3087  for (;; ptr++)
3087        to 'or' into the one we are building. We assume they have more than one        to 'or' into the one we are building. We assume they have more than one
3088        character in them, so set class_charcount bigger than one. */        character in them, so set class_charcount bigger than one. */
3089    
3090        if (c == '\\')        if (c == CHAR_BACKSLASH)
3091          {          {
3092          c = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);          c = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);
3093          if (*errorcodeptr != 0) goto FAILED;          if (*errorcodeptr != 0) goto FAILED;
3094    
3095          if (-c == ESC_b) c = '\b';       /* \b is backspace in a class */          if (-c == ESC_b) c = CHAR_BS;       /* \b is backspace in a class */
3096          else if (-c == ESC_X) c = 'X';   /* \X is literal X in a class */          else if (-c == ESC_X) c = CHAR_X;   /* \X is literal X in a class */
3097          else if (-c == ESC_R) c = 'R';   /* \R is literal R in a class */          else if (-c == ESC_R) c = CHAR_R;   /* \R is literal R in a class */
3098          else if (-c == ESC_Q)            /* Handle start of quoted string */          else if (-c == ESC_Q)            /* Handle start of quoted string */
3099            {            {
3100            if (ptr[1] == '\\' && ptr[2] == 'E')            if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)
3101              {              {
3102              ptr += 2; /* avoid empty string */              ptr += 2; /* avoid empty string */
3103              }              }
# Line 3142  for (;; ptr++) Line 3323  for (;; ptr++)
3323        entirely. The code for handling \Q and \E is messy. */        entirely. The code for handling \Q and \E is messy. */
3324    
3325        CHECK_RANGE:        CHECK_RANGE:
3326        while (ptr[1] == '\\' && ptr[2] == 'E')        while (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)
3327          {          {
3328          inescq = FALSE;          inescq = FALSE;
3329          ptr += 2;          ptr += 2;
# Line 3152  for (;; ptr++) Line 3333  for (;; ptr++)
3333    
3334        /* Remember \r or \n */        /* Remember \r or \n */
3335    
3336        if (c == '\r' || c == '\n') cd->external_flags |= PCRE_HASCRORLF;        if (c == CHAR_CR || c == CHAR_NL) cd->external_flags |= PCRE_HASCRORLF;
3337    
3338        /* Check for range */        /* Check for range */
3339    
3340        if (!inescq && ptr[1] == '-')        if (!inescq && ptr[1] == CHAR_MINUS)
3341          {          {
3342          int d;          int d;
3343          ptr += 2;          ptr += 2;
3344          while (*ptr == '\\' && ptr[1] == 'E') ptr += 2;          while (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_E) ptr += 2;
3345    
3346          /* If we hit \Q (not followed by \E) at this point, go into escaped          /* If we hit \Q (not followed by \E) at this point, go into escaped
3347          mode. */          mode. */
3348    
3349          while (*ptr == '\\' && ptr[1] == 'Q')          while (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_Q)
3350            {            {
3351            ptr += 2;            ptr += 2;
3352            if (*ptr == '\\' && ptr[1] == 'E') { ptr += 2; continue; }            if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_E)
3353                { ptr += 2; continue; }
3354            inescq = TRUE;            inescq = TRUE;
3355            break;            break;
3356            }            }
3357    
3358          if (*ptr == 0 || (!inescq && *ptr == ']'))          if (*ptr == 0 || (!inescq && *ptr == CHAR_RIGHT_SQUARE_BRACKET))
3359            {            {
3360            ptr = oldptr;            ptr = oldptr;
3361            goto LONE_SINGLE_CHARACTER;            goto LONE_SINGLE_CHARACTER;
# Line 3192  for (;; ptr++) Line 3374  for (;; ptr++)
3374          not any of the other escapes. Perl 5.6 treats a hyphen as a literal          not any of the other escapes. Perl 5.6 treats a hyphen as a literal
3375          in such circumstances. */          in such circumstances. */
3376    
3377          if (!inescq && d == '\\')          if (!inescq && d == CHAR_BACKSLASH)
3378            {            {
3379            d = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);            d = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);
3380            if (*errorcodeptr != 0) goto FAILED;            if (*errorcodeptr != 0) goto FAILED;
# Line 3202  for (;; ptr++) Line 3384  for (;; ptr++)
3384    
3385            if (d < 0)            if (d < 0)
3386              {              {
3387              if (d == -ESC_b) d = '\b';              if (d == -ESC_b) d = CHAR_BS;
3388              else if (d == -ESC_X) d = 'X';              else if (d == -ESC_X) d = CHAR_X;
3389              else if (d == -ESC_R) d = 'R'; else              else if (d == -ESC_R) d = CHAR_R; else
3390                {                {
3391                ptr = oldptr;                ptr = oldptr;
3392                goto LONE_SINGLE_CHARACTER;  /* A few lines below */                goto LONE_SINGLE_CHARACTER;  /* A few lines below */
# Line 3225  for (;; ptr++) Line 3407  for (;; ptr++)
3407    
3408          /* Remember \r or \n */          /* Remember \r or \n */
3409    
3410          if (d == '\r' || d == '\n') cd->external_flags |= PCRE_HASCRORLF;          if (d == CHAR_CR || d == CHAR_NL) cd->external_flags |= PCRE_HASCRORLF;
3411    
3412          /* In UTF-8 mode, if the upper limit is > 255, or > 127 for caseless          /* In UTF-8 mode, if the upper limit is > 255, or > 127 for caseless
3413          matching, we have to use an XCLASS with extra data items. Caseless          matching, we have to use an XCLASS with extra data items. Caseless
# Line 3372  for (;; ptr++) Line 3554  for (;; ptr++)
3554    
3555      /* Loop until ']' reached. This "while" is the end of the "do" above. */      /* Loop until ']' reached. This "while" is the end of the "do" above. */
3556    
3557      while ((c = *(++ptr)) != 0 && (c != ']' || inescq));      while ((c = *(++ptr)) != 0 && (c != CHAR_RIGHT_SQUARE_BRACKET || inescq));
3558    
3559      if (c == 0)                          /* Missing terminating ']' */      if (c == 0)                          /* Missing terminating ']' */
3560        {        {
# Line 3517  we set the flag only if there is a liter Line 3699  we set the flag only if there is a liter
3699      /* Various kinds of repeat; '{' is not necessarily a quantifier, but this      /* Various kinds of repeat; '{' is not necessarily a quantifier, but this
3700      has been tested above. */      has been tested above. */
3701    
3702      case '{':      case CHAR_LEFT_CURLY_BRACKET:
3703      if (!is_quantifier) goto NORMAL_CHAR;      if (!is_quantifier) goto NORMAL_CHAR;
3704      ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorcodeptr);      ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorcodeptr);
3705      if (*errorcodeptr != 0) goto FAILED;      if (*errorcodeptr != 0) goto FAILED;
3706      goto REPEAT;      goto REPEAT;
3707    
3708      case '*':      case CHAR_ASTERISK:
3709      repeat_min = 0;      repeat_min = 0;
3710      repeat_max = -1;      repeat_max = -1;
3711      goto REPEAT;      goto REPEAT;
3712    
3713      case '+':      case CHAR_PLUS:
3714      repeat_min = 1;      repeat_min = 1;
3715      repeat_max = -1;      repeat_max = -1;
3716      goto REPEAT;      goto REPEAT;
3717    
3718      case '?':      case CHAR_QUESTION_MARK:
3719      repeat_min = 0;      repeat_min = 0;
3720      repeat_max = 1;      repeat_max = 1;
3721    
# Line 3568  we set the flag only if there is a liter Line 3750  we set the flag only if there is a liter
3750      but if PCRE_UNGREEDY is set, it works the other way round. We change the      but if PCRE_UNGREEDY is set, it works the other way round. We change the
3751      repeat type to the non-default. */      repeat type to the non-default. */
3752    
3753      if (ptr[1] == '+')      if (ptr[1] == CHAR_PLUS)
3754        {        {
3755        repeat_type = 0;                  /* Force greedy */        repeat_type = 0;                  /* Force greedy */
3756        possessive_quantifier = TRUE;        possessive_quantifier = TRUE;
3757        ptr++;        ptr++;
3758        }        }
3759      else if (ptr[1] == '?')      else if (ptr[1] == CHAR_QUESTION_MARK)
3760        {        {
3761        repeat_type = greedy_non_default;        repeat_type = greedy_non_default;
3762        ptr++;        ptr++;
# Line 3689  we set the flag only if there is a liter Line 3871  we set the flag only if there is a liter
3871    
3872        if (repeat_max == 0) goto END_REPEAT;        if (repeat_max == 0) goto END_REPEAT;
3873    
3874          /*--------------------------------------------------------------------*/
3875          /* This code is obsolete from release 8.00; the restriction was finally
3876          removed: */
3877    
3878        /* All real repeats make it impossible to handle partial matching (maybe        /* All real repeats make it impossible to handle partial matching (maybe
3879        one day we will be able to remove this restriction). */        one day we will be able to remove this restriction). */
3880    
3881        if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL;        /* if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL; */
3882          /*--------------------------------------------------------------------*/
3883    
3884        /* Combine the op_type with the repeat_type */        /* Combine the op_type with the repeat_type */
3885    
# Line 3839  we set the flag only if there is a liter Line 4026  we set the flag only if there is a liter
4026          goto END_REPEAT;          goto END_REPEAT;
4027          }          }
4028    
4029          /*--------------------------------------------------------------------*/
4030          /* This code is obsolete from release 8.00; the restriction was finally
4031          removed: */
4032    
4033        /* All real repeats make it impossible to handle partial matching (maybe        /* All real repeats make it impossible to handle partial matching (maybe
4034        one day we will be able to remove this restriction). */        one day we will be able to remove this restriction). */
4035    
4036        if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL;        /* if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL; */
4037          /*--------------------------------------------------------------------*/
4038    
4039        if (repeat_min == 0 && repeat_max == -1)        if (repeat_min == 0 && repeat_max == -1)
4040          *code++ = OP_CRSTAR + repeat_type;          *code++ = OP_CRSTAR + repeat_type;
# Line 4157  we set the flag only if there is a liter Line 4349  we set the flag only if there is a liter
4349      if (possessive_quantifier)      if (possessive_quantifier)
4350        {        {
4351        int len;        int len;
4352        if (*tempcode == OP_EXACT || *tempcode == OP_TYPEEXACT ||  
4353            *tempcode == OP_NOTEXACT)        if (*tempcode == OP_TYPEEXACT)
4354          tempcode += _pcre_OP_lengths[*tempcode] +          tempcode += _pcre_OP_lengths[*tempcode] +
4355            ((*tempcode == OP_TYPEEXACT &&            ((tempcode[3] == OP_PROP || tempcode[3] == OP_NOTPROP)? 2 : 0);
4356               (tempcode[3] == OP_PROP || tempcode[3] == OP_NOTPROP))? 2:0);  
4357          else if (*tempcode == OP_EXACT || *tempcode == OP_NOTEXACT)
4358            {
4359            tempcode += _pcre_OP_lengths[*tempcode];
4360    #ifdef SUPPORT_UTF8
4361            if (utf8 && tempcode[-1] >= 0xc0)
4362              tempcode += _pcre_utf8_table4[tempcode[-1] & 0x3f];
4363    #endif
4364            }
4365    
4366        len = code - tempcode;        len = code - tempcode;
4367        if (len > 0) switch (*tempcode)        if (len > 0) switch (*tempcode)
4368          {          {
# Line 4207  we set the flag only if there is a liter Line 4408  we set the flag only if there is a liter
4408      lookbehind or option setting or condition or all the other extended      lookbehind or option setting or condition or all the other extended
4409      parenthesis forms.  */      parenthesis forms.  */
4410    
4411      case '(':      case CHAR_LEFT_PARENTHESIS:
4412      newoptions = options;      newoptions = options;
4413      skipbytes = 0;      skipbytes = 0;
4414      bravalue = OP_CBRA;      bravalue = OP_CBRA;
# Line 4216  we set the flag only if there is a liter Line 4417  we set the flag only if there is a liter
4417    
4418      /* First deal with various "verbs" that can be introduced by '*'. */      /* First deal with various "verbs" that can be introduced by '*'. */
4419    
4420      if (*(++ptr) == '*' && (cd->ctypes[ptr[1]] & ctype_letter) != 0)      if (*(++ptr) == CHAR_ASTERISK && (cd->ctypes[ptr[1]] & ctype_letter) != 0)
4421        {        {
4422        int i, namelen;        int i, namelen;
4423        const char *vn = verbnames;        const char *vn = verbnames;
4424        const uschar *name = ++ptr;        const uschar *name = ++ptr;
4425        previous = NULL;        previous = NULL;
4426        while ((cd->ctypes[*++ptr] & ctype_letter) != 0) {};        while ((cd->ctypes[*++ptr] & ctype_letter) != 0) {};
4427        if (*ptr == ':')        if (*ptr == CHAR_COLON)
4428          {          {
4429          *errorcodeptr = ERR59;   /* Not supported */          *errorcodeptr = ERR59;   /* Not supported */
4430          goto FAILED;          goto FAILED;
4431          }          }
4432        if (*ptr != ')')        if (*ptr != CHAR_RIGHT_PARENTHESIS)
4433          {          {
4434          *errorcodeptr = ERR60;          *errorcodeptr = ERR60;
4435          goto FAILED;          goto FAILED;
# Line 4253  we set the flag only if there is a liter Line 4454  we set the flag only if there is a liter
4454      /* Deal with the extended parentheses; all are introduced by '?', and the      /* Deal with the extended parentheses; all are introduced by '?', and the
4455      appearance of any of them means that this is not a capturing group. */      appearance of any of them means that this is not a capturing group. */
4456    
4457      else if (*ptr == '?')      else if (*ptr == CHAR_QUESTION_MARK)
4458        {        {
4459        int i, set, unset, namelen;        int i, set, unset, namelen;
4460        int *optset;        int *optset;
# Line 4262  we set the flag only if there is a liter Line 4463  we set the flag only if there is a liter
4463    
4464        switch (*(++ptr))        switch (*(++ptr))
4465          {          {
4466          case '#':                 /* Comment; skip to ket */          case CHAR_NUMBER_SIGN:                 /* Comment; skip to ket */
4467          ptr++;          ptr++;
4468          while (*ptr != 0 && *ptr != ')') ptr++;          while (*ptr != 0 && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
4469          if (*ptr == 0)          if (*ptr == 0)
4470            {            {
4471            *errorcodeptr = ERR18;            *errorcodeptr = ERR18;
# Line 4274  we set the flag only if there is a liter Line 4475  we set the flag only if there is a liter
4475    
4476    
4477          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4478          case '|':                 /* Reset capture count for each branch */          case CHAR_VERTICAL_LINE:  /* Reset capture count for each branch */
4479          reset_bracount = TRUE;          reset_bracount = TRUE;
4480          /* Fall through */          /* Fall through */
4481    
4482          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4483          case ':':                 /* Non-capturing bracket */          case CHAR_COLON:          /* Non-capturing bracket */
4484          bravalue = OP_BRA;          bravalue = OP_BRA;
4485          ptr++;          ptr++;
4486          break;          break;
4487    
4488    
4489          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4490          case '(':          case CHAR_LEFT_PARENTHESIS:
4491          bravalue = OP_COND;       /* Conditional group */          bravalue = OP_COND;       /* Conditional group */
4492    
4493          /* A condition can be an assertion, a number (referring to a numbered          /* A condition can be an assertion, a number (referring to a numbered
# Line 4306  we set the flag only if there is a liter Line 4507  we set the flag only if there is a liter
4507          the switch. This will take control down to where bracketed groups,          the switch. This will take control down to where bracketed groups,
4508          including assertions, are processed. */          including assertions, are processed. */
4509    
4510          if (ptr[1] == '?' && (ptr[2] == '=' || ptr[2] == '!' || ptr[2] == '<'))          if (ptr[1] == CHAR_QUESTION_MARK && (ptr[2] == CHAR_EQUALS_SIGN ||
4511                ptr[2] == CHAR_EXCLAMATION_MARK || ptr[2] == CHAR_LESS_THAN_SIGN))
4512            break;            break;
4513    
4514          /* Most other conditions use OP_CREF (a couple change to OP_RREF          /* Most other conditions use OP_CREF (a couple change to OP_RREF
# Line 4318  we set the flag only if there is a liter Line 4520  we set the flag only if there is a liter
4520    
4521          /* Check for a test for recursion in a named group. */          /* Check for a test for recursion in a named group. */
4522    
4523          if (ptr[1] == 'R' && ptr[2] == '&')          if (ptr[1] == CHAR_R && ptr[2] == CHAR_AMPERSAND)
4524            {            {
4525            terminator = -1;            terminator = -1;
4526            ptr += 2;            ptr += 2;
# Line 4328  we set the flag only if there is a liter Line 4530  we set the flag only if there is a liter
4530          /* Check for a test for a named group's having been set, using the Perl          /* Check for a test for a named group's having been set, using the Perl
4531          syntax (?(<name>) or (?('name') */          syntax (?(<name>) or (?('name') */
4532    
4533          else if (ptr[1] == '<')          else if (ptr[1] == CHAR_LESS_THAN_SIGN)
4534            {            {
4535            terminator = '>';            terminator = CHAR_GREATER_THAN_SIGN;
4536            ptr++;            ptr++;
4537            }            }
4538          else if (ptr[1] == '\'')          else if (ptr[1] == CHAR_APOSTROPHE)
4539            {            {
4540            terminator = '\'';            terminator = CHAR_APOSTROPHE;
4541            ptr++;            ptr++;
4542            }            }
4543          else          else
4544            {            {
4545            terminator = 0;            terminator = 0;
4546            if (ptr[1] == '-' || ptr[1] == '+') refsign = *(++ptr);            if (ptr[1] == CHAR_MINUS || ptr[1] == CHAR_PLUS) refsign = *(++ptr);
4547            }            }
4548    
4549          /* We now expect to read a name; any thing else is an error */          /* We now expect to read a name; any thing else is an error */
# Line 4361  we set the flag only if there is a liter Line 4563  we set the flag only if there is a liter
4563            {            {
4564            if (recno >= 0)            if (recno >= 0)
4565              recno = ((digitab[*ptr] & ctype_digit) != 0)?              recno = ((digitab[*ptr] & ctype_digit) != 0)?
4566                recno * 10 + *ptr - '0' : -1;                recno * 10 + *ptr - CHAR_0 : -1;
4567            ptr++;            ptr++;
4568            }            }
4569          namelen = ptr - name;          namelen = ptr - name;
4570    
4571          if ((terminator > 0 && *ptr++ != terminator) || *ptr++ != ')')          if ((terminator > 0 && *ptr++ != terminator) ||
4572                *ptr++ != CHAR_RIGHT_PARENTHESIS)
4573            {            {
4574            ptr--;      /* Error offset */            ptr--;      /* Error offset */
4575            *errorcodeptr = ERR26;            *errorcodeptr = ERR26;
# Line 4388  we set the flag only if there is a liter Line 4591  we set the flag only if there is a liter
4591              *errorcodeptr = ERR58;              *errorcodeptr = ERR58;
4592              goto FAILED;              goto FAILED;
4593              }              }
4594            recno = (refsign == '-')?            recno = (refsign == CHAR_MINUS)?
4595              cd->bracount - recno + 1 : recno +cd->bracount;              cd->bracount - recno + 1 : recno +cd->bracount;
4596            if (recno <= 0 || recno > cd->final_bracount)            if (recno <= 0 || recno > cd->final_bracount)
4597              {              {
# Line 4419  we set the flag only if there is a liter Line 4622  we set the flag only if there is a liter
4622    
4623          /* Search the pattern for a forward reference */          /* Search the pattern for a forward reference */
4624    
4625          else if ((i = find_parens(ptr, cd, name, namelen,          else if ((i = find_parens(cd, name, namelen,
4626                          (options & PCRE_EXTENDED) != 0)) > 0)                          (options & PCRE_EXTENDED) != 0)) > 0)
4627            {            {
4628            PUT2(code, 2+LINK_SIZE, i);            PUT2(code, 2+LINK_SIZE, i);
# Line 4440  we set the flag only if there is a liter Line 4643  we set the flag only if there is a liter
4643          /* Check for (?(R) for recursion. Allow digits after R to specify a          /* Check for (?(R) for recursion. Allow digits after R to specify a
4644          specific group number. */          specific group number. */
4645    
4646          else if (*name == 'R')          else if (*name == CHAR_R)
4647            {            {
4648            recno = 0;            recno = 0;
4649            for (i = 1; i < namelen; i++)            for (i = 1; i < namelen; i++)
# Line 4450  we set the flag only if there is a liter Line 4653  we set the flag only if there is a liter
4653                *errorcodeptr = ERR15;                *errorcodeptr = ERR15;
4654                goto FAILED;                goto FAILED;
4655                }                }
4656              recno = recno * 10 + name[i] - '0';              recno = recno * 10 + name[i] - CHAR_0;
4657              }              }
4658            if (recno == 0) recno = RREF_ANY;            if (recno == 0) recno = RREF_ANY;
4659            code[1+LINK_SIZE] = OP_RREF;      /* Change test type */            code[1+LINK_SIZE] = OP_RREF;      /* Change test type */
# Line 4460  we set the flag only if there is a liter Line 4663  we set the flag only if there is a liter
4663          /* Similarly, check for the (?(DEFINE) "condition", which is always          /* Similarly, check for the (?(DEFINE) "condition", which is always
4664          false. */          false. */
4665    
4666          else if (namelen == 6 && strncmp((char *)name, "DEFINE", 6) == 0)          else if (namelen == 6 && strncmp((char *)name, STRING_DEFINE, 6) == 0)
4667            {            {
4668            code[1+LINK_SIZE] = OP_DEF;            code[1+LINK_SIZE] = OP_DEF;
4669            skipbytes = 1;            skipbytes = 1;
# Line 4485  we set the flag only if there is a liter Line 4688  we set the flag only if there is a liter
4688    
4689    
4690          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4691          case '=':                 /* Positive lookahead */          case CHAR_EQUALS_SIGN:                 /* Positive lookahead */
4692          bravalue = OP_ASSERT;          bravalue = OP_ASSERT;
4693          ptr++;          ptr++;
4694          break;          break;
4695    
4696    
4697          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4698          case '!':                 /* Negative lookahead */          case CHAR_EXCLAMATION_MARK:            /* Negative lookahead */
4699          ptr++;          ptr++;
4700          if (*ptr == ')')          /* Optimize (?!) */          if (*ptr == CHAR_RIGHT_PARENTHESIS)    /* Optimize (?!) */
4701            {            {
4702            *code++ = OP_FAIL;            *code++ = OP_FAIL;
4703            previous = NULL;            previous = NULL;
# Line 4505  we set the flag only if there is a liter Line 4708  we set the flag only if there is a liter
4708    
4709    
4710          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4711          case '<':                 /* Lookbehind or named define */          case CHAR_LESS_THAN_SIGN:              /* Lookbehind or named define */
4712          switch (ptr[1])          switch (ptr[1])
4713            {            {
4714            case '=':               /* Positive lookbehind */            case CHAR_EQUALS_SIGN:               /* Positive lookbehind */
4715            bravalue = OP_ASSERTBACK;            bravalue = OP_ASSERTBACK;
4716            ptr += 2;            ptr += 2;
4717            break;            break;
4718    
4719            case '!':               /* Negative lookbehind */            case CHAR_EXCLAMATION_MARK:          /* Negative lookbehind */
4720            bravalue = OP_ASSERTBACK_NOT;            bravalue = OP_ASSERTBACK_NOT;
4721            ptr += 2;            ptr += 2;
4722            break;            break;
# Line 4528  we set the flag only if there is a liter Line 4731  we set the flag only if there is a liter
4731    
4732    
4733          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4734          case '>':                 /* One-time brackets */          case CHAR_GREATER_THAN_SIGN:           /* One-time brackets */
4735          bravalue = OP_ONCE;          bravalue = OP_ONCE;
4736          ptr++;          ptr++;
4737          break;          break;
4738    
4739    
4740          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4741          case 'C':                 /* Callout - may be followed by digits; */          case CHAR_C:                 /* Callout - may be followed by digits; */
4742          previous_callout = code;  /* Save for later completion */          previous_callout = code;  /* Save for later completion */
4743          after_manual_callout = 1; /* Skip one item before completing */          after_manual_callout = 1; /* Skip one item before completing */
4744          *code++ = OP_CALLOUT;          *code++ = OP_CALLOUT;
4745            {            {
4746            int n = 0;            int n = 0;
4747            while ((digitab[*(++ptr)] & ctype_digit) != 0)            while ((digitab[*(++ptr)] & ctype_digit) != 0)
4748              n = n * 10 + *ptr - '0';              n = n * 10 + *ptr - CHAR_0;
4749            if (*ptr != ')')            if (*ptr != CHAR_RIGHT_PARENTHESIS)
4750              {              {
4751              *errorcodeptr = ERR39;              *errorcodeptr = ERR39;
4752              goto FAILED;              goto FAILED;
# Line 4563  we set the flag only if there is a liter Line 4766  we set the flag only if there is a liter
4766    
4767    
4768          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4769          case 'P':                 /* Python-style named subpattern handling */          case CHAR_P:              /* Python-style named subpattern handling */
4770          if (*(++ptr) == '=' || *ptr == '>')  /* Reference or recursion */          if (*(++ptr) == CHAR_EQUALS_SIGN ||
4771                *ptr == CHAR_GREATER_THAN_SIGN)  /* Reference or recursion */
4772            {            {
4773            is_recurse = *ptr == '>';            is_recurse = *ptr == CHAR_GREATER_THAN_SIGN;
4774            terminator = ')';            terminator = CHAR_RIGHT_PARENTHESIS;
4775            goto NAMED_REF_OR_RECURSE;            goto NAMED_REF_OR_RECURSE;
4776            }            }
4777          else if (*ptr != '<')    /* Test for Python-style definition */          else if (*ptr != CHAR_LESS_THAN_SIGN)  /* Test for Python-style defn */
4778            {            {
4779            *errorcodeptr = ERR41;            *errorcodeptr = ERR41;
4780            goto FAILED;            goto FAILED;
# Line 4580  we set the flag only if there is a liter Line 4784  we set the flag only if there is a liter
4784    
4785          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4786          DEFINE_NAME:    /* Come here from (?< handling */          DEFINE_NAME:    /* Come here from (?< handling */
4787          case '\'':          case CHAR_APOSTROPHE:
4788            {            {
4789            terminator = (*ptr == '<')? '>' : '\'';            terminator = (*ptr == CHAR_LESS_THAN_SIGN)?
4790                CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;
4791            name = ++ptr;            name = ++ptr;
4792    
4793            while ((cd->ctypes[*ptr] & ctype_word) != 0) ptr++;            while ((cd->ctypes[*ptr] & ctype_word) != 0) ptr++;
# Line 4656  we set the flag only if there is a liter Line 4861  we set the flag only if there is a liter
4861    
4862    
4863          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4864          case '&':                 /* Perl recursion/subroutine syntax */          case CHAR_AMPERSAND:            /* Perl recursion/subroutine syntax */
4865          terminator = ')';          terminator = CHAR_RIGHT_PARENTHESIS;
4866          is_recurse = TRUE;          is_recurse = TRUE;
4867          /* Fall through */          /* Fall through */
4868    
# Line 4716  we set the flag only if there is a liter Line 4921  we set the flag only if there is a liter
4921              recno = GET2(slot, 0);              recno = GET2(slot, 0);
4922              }              }
4923            else if ((recno =                /* Forward back reference */            else if ((recno =                /* Forward back reference */
4924                      find_parens(ptr, cd, name, namelen,                      find_parens(cd, name, namelen,
4925                        (options & PCRE_EXTENDED) != 0)) <= 0)                        (options & PCRE_EXTENDED) != 0)) <= 0)
4926              {              {
4927              *errorcodeptr = ERR15;              *errorcodeptr = ERR15;
# Line 4732  we set the flag only if there is a liter Line 4937  we set the flag only if there is a liter
4937    
4938    
4939          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4940          case 'R':                 /* Recursion */          case CHAR_R:              /* Recursion */
4941          ptr++;                    /* Same as (?0)      */          ptr++;                    /* Same as (?0)      */
4942          /* Fall through */          /* Fall through */
4943    
4944    
4945          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4946          case '-': case '+':          case CHAR_MINUS: case CHAR_PLUS:  /* Recursion or subroutine */
4947          case '0': case '1': case '2': case '3': case '4':   /* Recursion or */          case CHAR_0: case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4:
4948          case '5': case '6': case '7': case '8': case '9':   /* subroutine */          case CHAR_5: case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9:
4949            {            {
4950            const uschar *called;            const uschar *called;
4951            terminator = ')';            terminator = CHAR_RIGHT_PARENTHESIS;
4952    
4953            /* Come here from the \g<...> and \g'...' code (Oniguruma            /* Come here from the \g<...> and \g'...' code (Oniguruma
4954            compatibility). However, the syntax has been checked to ensure that            compatibility). However, the syntax has been checked to ensure that
# Line 4753  we set the flag only if there is a liter Line 4958  we set the flag only if there is a liter
4958    
4959            HANDLE_NUMERICAL_RECURSION:            HANDLE_NUMERICAL_RECURSION:
4960    
4961            if ((refsign = *ptr) == '+')            if ((refsign = *ptr) == CHAR_PLUS)
4962              {              {
4963              ptr++;              ptr++;
4964              if ((digitab[*ptr] & ctype_digit) == 0)              if ((digitab[*ptr] & ctype_digit) == 0)
# Line 4762  we set the flag only if there is a liter Line 4967  we set the flag only if there is a liter
4967                goto FAILED;                goto FAILED;
4968                }                }
4969              }              }
4970            else if (refsign == '-')            else if (refsign == CHAR_MINUS)
4971              {              {
4972              if ((digitab[ptr[1]] & ctype_digit) == 0)              if ((digitab[ptr[1]] & ctype_digit) == 0)
4973                goto OTHER_CHAR_AFTER_QUERY;                goto OTHER_CHAR_AFTER_QUERY;
# Line 4771  we set the flag only if there is a liter Line 4976  we set the flag only if there is a liter
4976    
4977            recno = 0;            recno = 0;
4978            while((digitab[*ptr] & ctype_digit) != 0)            while((digitab[*ptr] & ctype_digit) != 0)
4979              recno = recno * 10 + *ptr++ - '0';              recno = recno * 10 + *ptr++ - CHAR_0;
4980    
4981            if (*ptr != terminator)            if (*ptr != terminator)
4982              {              {
# Line 4779  we set the flag only if there is a liter Line 4984  we set the flag only if there is a liter
4984              goto FAILED;              goto FAILED;
4985              }              }
4986    
4987            if (refsign == '-')            if (refsign == CHAR_MINUS)
4988              {              {
4989              if (recno == 0)              if (recno == 0)
4990                {                {
# Line 4793  we set the flag only if there is a liter Line 4998  we set the flag only if there is a liter
4998                goto FAILED;                goto FAILED;
4999                }                }
5000              }              }
5001            else if (refsign == '+')            else if (refsign == CHAR_PLUS)
5002              {              {
5003              if (recno == 0)              if (recno == 0)
5004                {                {
# Line 4826  we set the flag only if there is a liter Line 5031  we set the flag only if there is a liter
5031    
5032              if (called == NULL)              if (called == NULL)
5033                {                {
5034                if (find_parens(ptr, cd, NULL, recno,                if (find_parens(cd, NULL, recno,
5035                      (options & PCRE_EXTENDED) != 0) < 0)                      (options & PCRE_EXTENDED) != 0) < 0)
5036                  {                  {
5037                  *errorcodeptr = ERR15;                  *errorcodeptr = ERR15;
# Line 4879  we set the flag only if there is a liter Line 5084  we set the flag only if there is a liter
5084          set = unset = 0;          set = unset = 0;
5085          optset = &set;          optset = &set;
5086    
5087          while (*ptr != ')' && *ptr != ':')          while (*ptr != CHAR_RIGHT_PARENTHESIS && *ptr != CHAR_COLON)
5088            {            {
5089            switch (*ptr++)            switch (*ptr++)
5090              {              {
5091              case '-': optset = &unset; break;              case CHAR_MINUS: optset = &unset; break;
5092    
5093              case 'J':    /* Record that it changed in the external options */              case CHAR_J:    /* Record that it changed in the external options */
5094              *optset |= PCRE_DUPNAMES;              *optset |= PCRE_DUPNAMES;
5095              cd->external_flags |= PCRE_JCHANGED;              cd->external_flags |= PCRE_JCHANGED;
5096              break;              break;
5097    
5098              case 'i': *optset |= PCRE_CASELESS; break;              case CHAR_i: *optset |= PCRE_CASELESS; break;
5099              case 'm': *optset |= PCRE_MULTILINE; break;              case CHAR_m: *optset |= PCRE_MULTILINE; break;
5100              case 's': *optset |= PCRE_DOTALL; break;              case CHAR_s: *optset |= PCRE_DOTALL; break;
5101              case 'x': *optset |= PCRE_EXTENDED; break;              case CHAR_x: *optset |= PCRE_EXTENDED; break;
5102              case 'U': *optset |= PCRE_UNGREEDY; break;              case CHAR_U: *optset |= PCRE_UNGREEDY; break;
5103              case 'X': *optset |= PCRE_EXTRA; break;              case CHAR_X: *optset |= PCRE_EXTRA; break;
5104    
5105              default:  *errorcodeptr = ERR12;              default:  *errorcodeptr = ERR12;
5106                        ptr--;    /* Correct the offset */                        ptr--;    /* Correct the offset */
# Line 4929  we set the flag only if there is a liter Line 5134  we set the flag only if there is a liter
5134          options if this setting actually changes any of them, and reset the          options if this setting actually changes any of them, and reset the
5135          greedy defaults and the case value for firstbyte and reqbyte. */          greedy defaults and the case value for firstbyte and reqbyte. */
5136    
5137          if (*ptr == ')')          if (*ptr == CHAR_RIGHT_PARENTHESIS)
5138            {            {
5139            if (code == cd->start_code + 1 + LINK_SIZE &&            if (code == cd->start_code + 1 + LINK_SIZE &&
5140                 (lengthptr == NULL || *lengthptr == 2 + 2*LINK_SIZE))                 (lengthptr == NULL || *lengthptr == 2 + 2*LINK_SIZE))
# Line 5069  we set the flag only if there is a liter Line 5274  we set the flag only if there is a liter
5274    
5275      /* Error if hit end of pattern */      /* Error if hit end of pattern */
5276    
5277      if (*ptr != ')')      if (*ptr != CHAR_RIGHT_PARENTHESIS)
5278        {        {
5279        *errorcodeptr = ERR14;        *errorcodeptr = ERR14;
5280        goto FAILED;        goto FAILED;
# Line 5167  we set the flag only if there is a liter Line 5372  we set the flag only if there is a liter
5372      We can test for values between ESC_b and ESC_Z for the latter; this may      We can test for values between ESC_b and ESC_Z for the latter; this may
5373      have to change if any new ones are ever created. */      have to change if any new ones are ever created. */
5374    
5375      case '\\':      case CHAR_BACKSLASH:
5376      tempptr = ptr;      tempptr = ptr;
5377      c = check_escape(&ptr, errorcodeptr, cd->bracount, options, FALSE);      c = check_escape(&ptr, errorcodeptr, cd->bracount, options, FALSE);
5378      if (*errorcodeptr != 0) goto FAILED;      if (*errorcodeptr != 0) goto FAILED;
# Line 5176  we set the flag only if there is a liter Line 5381  we set the flag only if there is a liter
5381        {        {
5382        if (-c == ESC_Q)            /* Handle start of quoted string */        if (-c == ESC_Q)            /* Handle start of quoted string */
5383          {          {
5384          if (ptr[1] == '\\' && ptr[2] == 'E') ptr += 2; /* avoid empty string */          if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)
5385            else inescq = TRUE;            ptr += 2;               /* avoid empty string */
5386                else inescq = TRUE;
5387          continue;          continue;
5388          }          }
5389    
# Line 5205  we set the flag only if there is a liter Line 5411  we set the flag only if there is a liter
5411          {          {
5412          const uschar *p;          const uschar *p;
5413          save_hwm = cd->hwm;   /* Normally this is set when '(' is read */          save_hwm = cd->hwm;   /* Normally this is set when '(' is read */
5414          terminator = (*(++ptr) == '<')? '>' : '\'';          terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?
5415              CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;
5416    
5417          /* These two statements stop the compiler for warning about possibly          /* These two statements stop the compiler for warning about possibly
5418          unset variables caused by the jump to HANDLE_NUMERICAL_RECURSION. In          unset variables caused by the jump to HANDLE_NUMERICAL_RECURSION. In
# Line 5217  we set the flag only if there is a liter Line 5424  we set the flag only if there is a liter
5424    
5425          /* Test for a name */          /* Test for a name */
5426    
5427          if (ptr[1] != '+' && ptr[1] != '-')          if (ptr[1] != CHAR_PLUS && ptr[1] != CHAR_MINUS)
5428            {            {
5429            BOOL isnumber = TRUE;            BOOL isnumber = TRUE;
5430            for (p = ptr + 1; *p != 0 && *p != terminator; p++)            for (p = ptr + 1; *p != 0 && *p != terminator; p++)
# Line 5255  we set the flag only if there is a liter Line 5462  we set the flag only if there is a liter
5462        /* \k<name> or \k'name' is a back reference by name (Perl syntax).        /* \k<name> or \k'name' is a back reference by name (Perl syntax).
5463        We also support \k{name} (.NET syntax) */        We also support \k{name} (.NET syntax) */
5464    
5465        if (-c == ESC_k && (ptr[1] == '<' || ptr[1] == '\'' || ptr[1] == '{'))        if (-c == ESC_k && (ptr[1] == CHAR_LESS_THAN_SIGN ||
5466              ptr[1] == CHAR_APOSTROPHE || ptr[1] == CHAR_LEFT_CURLY_BRACKET))
5467          {          {
5468          is_recurse = FALSE;          is_recurse = FALSE;
5469          terminator = (*(++ptr) == '<')? '>' : (*ptr == '\'')? '\'' : '}';          terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?
5470              CHAR_GREATER_THAN_SIGN : (*ptr == CHAR_APOSTROPHE)?
5471              CHAR_APOSTROPHE : CHAR_RIGHT_CURLY_BRACKET;
5472          goto NAMED_REF_OR_RECURSE;          goto NAMED_REF_OR_RECURSE;
5473          }          }
5474    
# Line 5361  we set the flag only if there is a liter Line 5571  we set the flag only if there is a liter
5571    
5572      /* Remember if \r or \n were seen */      /* Remember if \r or \n were seen */
5573    
5574      if (mcbuffer[0] == '\r' || mcbuffer[0] == '\n')      if (mcbuffer[0] == CHAR_CR || mcbuffer[0] == CHAR_NL)
5575        cd->external_flags |= PCRE_HASCRORLF;        cd->external_flags |= PCRE_HASCRORLF;
5576    
5577      /* Set the first and required bytes appropriately. If no previous first      /* Set the first and required bytes appropriately. If no previous first
# Line 5606  for (;;) Line 5816  for (;;)
5816    compile a resetting op-code following, except at the very end of the pattern.    compile a resetting op-code following, except at the very end of the pattern.
5817    Return leaving the pointer at the terminating char. */    Return leaving the pointer at the terminating char. */
5818    
5819    if (*ptr != '|')    if (*ptr != CHAR_VERTICAL_LINE)
5820      {      {
5821      if (lengthptr == NULL)      if (lengthptr == NULL)
5822        {        {
# Line 5629  for (;;) Line 5839  for (;;)
5839    
5840      /* Resetting option if needed */      /* Resetting option if needed */
5841    
5842      if ((options & PCRE_IMS) != oldims && *ptr == ')')      if ((options & PCRE_IMS) != oldims && *ptr == CHAR_RIGHT_PARENTHESIS)
5843        {        {
5844        *code++ = OP_OPT;        *code++ = OP_OPT;
5845        *code++ = oldims;        *code++ = oldims;
# Line 5810  do { Line 6020  do {
6020     const uschar *scode = first_significant_code(code + _pcre_OP_lengths[*code],     const uschar *scode = first_significant_code(code + _pcre_OP_lengths[*code],
6021       NULL, 0, FALSE);       NULL, 0, FALSE);
6022     register int op = *scode;     register int op = *scode;
6023    
6024     /* If we are at the start of a conditional assertion group, *both* the     /* If we are at the start of a conditional assertion group, *both* the
6025     conditional assertion *and* what follows the condition must satisfy the test     conditional assertion *and* what follows the condition must satisfy the test
6026     for start of line. Other kinds of condition fail. Note that there may be an     for start of line. Other kinds of condition fail. Note that there may be an
# Line 5818  do { Line 6028  do {
6028    
6029     if (op == OP_COND)     if (op == OP_COND)
6030       {       {
6031       scode += 1 + LINK_SIZE;       scode += 1 + LINK_SIZE;
6032       if (*scode == OP_CALLOUT) scode += _pcre_OP_lengths[OP_CALLOUT];       if (*scode == OP_CALLOUT) scode += _pcre_OP_lengths[OP_CALLOUT];
6033       switch (*scode)       switch (*scode)
6034         {         {
6035         case OP_CREF:         case OP_CREF:
6036         case OP_RREF:         case OP_RREF:
6037         case OP_DEF:         case OP_DEF:
6038         return FALSE;         return FALSE;
6039    
6040         default:     /* Assertion */         default:     /* Assertion */
6041         if (!is_startline(scode, bracket_map, backref_map)) return FALSE;         if (!is_startline(scode, bracket_map, backref_map)) return FALSE;
6042         do scode += GET(scode, 1); while (*scode == OP_ALT);         do scode += GET(scode, 1); while (*scode == OP_ALT);
6043         scode += 1 + LINK_SIZE;         scode += 1 + LINK_SIZE;
6044         break;         break;
6045         }         }
6046       scode = first_significant_code(scode, NULL, 0, FALSE);       scode = first_significant_code(scode, NULL, 0, FALSE);
6047       op = *scode;       op = *scode;
6048       }       }
6049    
6050     /* Non-capturing brackets */     /* Non-capturing brackets */
6051    
# Line 5856  do { Line 6066  do {
6066     /* Other brackets */     /* Other brackets */
6067    
6068     else if (op == OP_ASSERT || op == OP_ONCE)     else if (op == OP_ASSERT || op == OP_ONCE)
6069       {       {
6070       if (!is_startline(scode, bracket_map, backref_map)) return FALSE;       if (!is_startline(scode, bracket_map, backref_map)) return FALSE;
6071       }       }
6072    
6073     /* .* means "start at start or after \n" if it isn't in brackets that     /* .* means "start at start or after \n" if it isn't in brackets that
# Line 6037  if (erroroffset == NULL) Line 6247  if (erroroffset == NULL)
6247    
6248  *erroroffset = 0;  *erroroffset = 0;
6249    
 /* Can't support UTF8 unless PCRE has been compiled to include the code. */  
   
 #ifdef SUPPORT_UTF8  
 utf8 = (options & PCRE_UTF8) != 0;  
 if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 &&  
      (*erroroffset = _pcre_valid_utf8((uschar *)pattern, -1)) >= 0)  
   {  
   errorcode = ERR44;  
   goto PCRE_EARLY_ERROR_RETURN2;  
   }  
 #else  
 if ((options & PCRE_UTF8) != 0)  
   {  
   errorcode = ERR32;  
   goto PCRE_EARLY_ERROR_RETURN;  
   }  
 #endif  
   
 if ((options & ~PUBLIC_COMPILE_OPTIONS) != 0)  
   {  
   errorcode = ERR17;  
   goto PCRE_EARLY_ERROR_RETURN;  
   }  
   
6250  /* Set up pointers to the individual character tables */  /* Set up pointers to the individual character tables */
6251    
6252  if (tables == NULL) tables = _pcre_default_tables;  if (tables == NULL) tables = _pcre_default_tables;
# Line 6069  cd->fcc = tables + fcc_offset; Line 6255  cd->fcc = tables + fcc_offset;
6255  cd->cbits = tables + cbits_offset;  cd->cbits = tables + cbits_offset;
6256  cd->ctypes = tables + ctypes_offset;  cd->ctypes = tables + ctypes_offset;
6257    
6258    /* Check that all undefined public option bits are zero */
6259    
6260    if ((options & ~PUBLIC_COMPILE_OPTIONS) != 0)
6261      {
6262      errorcode = ERR17;
6263      goto PCRE_EARLY_ERROR_RETURN;
6264      }
6265    
6266  /* Check for global one-time settings at the start of the pattern, and remember  /* Check for global one-time settings at the start of the pattern, and remember
6267  the offset for later. */  the offset for later. */
6268    
6269  while (ptr[skipatstart] == '(' && ptr[skipatstart+1] == '*')  while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&
6270           ptr[skipatstart+1] == CHAR_ASTERISK)
6271    {    {
6272    int newnl = 0;    int newnl = 0;
6273    int newbsr = 0;    int newbsr = 0;
6274    
6275    if (strncmp((char *)(ptr+skipatstart+2), "CR)", 3) == 0)    if (strncmp((char *)(ptr+skipatstart+2), STRING_UTF8_RIGHTPAR, 5) == 0)
6276        { skipatstart += 7; options |= PCRE_UTF8; continue; }
6277    
6278      if (strncmp((char *)(ptr+skipatstart+2), STRING_CR_RIGHTPAR, 3) == 0)
6279      { skipatstart += 5; newnl = PCRE_NEWLINE_CR; }      { skipatstart += 5; newnl = PCRE_NEWLINE_CR; }
6280    else if (strncmp((char *)(ptr+skipatstart+2), "LF)", 3)  == 0)    else if (strncmp((char *)(ptr+skipatstart+2), STRING_LF_RIGHTPAR, 3)  == 0)
6281      { skipatstart += 5; newnl = PCRE_NEWLINE_LF; }      { skipatstart += 5; newnl = PCRE_NEWLINE_LF; }
6282    else if (strncmp((char *)(ptr+skipatstart+2), "CRLF)", 5)  == 0)    else if (strncmp((char *)(ptr+skipatstart+2), STRING_CRLF_RIGHTPAR, 5)  == 0)
6283      { skipatstart += 7; newnl = PCRE_NEWLINE_CR + PCRE_NEWLINE_LF; }      { skipatstart += 7; newnl = PCRE_NEWLINE_CR + PCRE_NEWLINE_LF; }
6284    else if (strncmp((char *)(ptr+skipatstart+2), "ANY)", 4) == 0)    else if (strncmp((char *)(ptr+skipatstart+2), STRING_ANY_RIGHTPAR, 4) == 0)
6285      { skipatstart += 6; newnl = PCRE_NEWLINE_ANY; }      { skipatstart += 6; newnl = PCRE_NEWLINE_ANY; }
6286    else if (strncmp((char *)(ptr+skipatstart+2), "ANYCRLF)", 8)  == 0)    else if (strncmp((char *)(ptr+skipatstart+2), STRING_ANYCRLF_RIGHTPAR, 8) == 0)
6287      { skipatstart += 10; newnl = PCRE_NEWLINE_ANYCRLF; }      { skipatstart += 10; newnl = PCRE_NEWLINE_ANYCRLF; }
6288    
6289    else if (strncmp((char *)(ptr+skipatstart+2), "BSR_ANYCRLF)", 12) == 0)    else if (strncmp((char *)(ptr+skipatstart+2), STRING_BSR_ANYCRLF_RIGHTPAR, 12) == 0)
6290      { skipatstart += 14; newbsr = PCRE_BSR_ANYCRLF; }      { skipatstart += 14; newbsr = PCRE_BSR_ANYCRLF; }
6291    else if (strncmp((char *)(ptr+skipatstart+2), "BSR_UNICODE)", 12) == 0)    else if (strncmp((char *)(ptr+skipatstart+2), STRING_BSR_UNICODE_RIGHTPAR, 12) == 0)
6292      { skipatstart += 14; newbsr = PCRE_BSR_UNICODE; }      { skipatstart += 14; newbsr = PCRE_BSR_UNICODE; }
6293    
6294    if (newnl != 0)    if (newnl != 0)
# Line 6100  while (ptr[skipatstart] == '(' && ptr[sk Line 6298  while (ptr[skipatstart] == '(' && ptr[sk
6298    else break;    else break;
6299    }    }
6300    
6301    /* Can't support UTF8 unless PCRE has been compiled to include the code. */
6302    
6303    #ifdef SUPPORT_UTF8
6304    utf8 = (options & PCRE_UTF8) != 0;
6305    if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 &&
6306         (*erroroffset = _pcre_valid_utf8((uschar *)pattern, -1)) >= 0)
6307      {
6308      errorcode = ERR44;
6309      goto PCRE_EARLY_ERROR_RETURN2;
6310      }
6311    #else
6312    if ((options & PCRE_UTF8) != 0)
6313      {
6314      errorcode = ERR32;
6315      goto PCRE_EARLY_ERROR_RETURN;
6316      }
6317    #endif
6318    
6319  /* Check validity of \R options. */  /* Check validity of \R options. */
6320    
6321  switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))  switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
# Line 6118  current code allows for fixed one- or tw Line 6334  current code allows for fixed one- or tw
6334  switch (options & PCRE_NEWLINE_BITS)  switch (options & PCRE_NEWLINE_BITS)
6335    {    {
6336    case 0: newline = NEWLINE; break;   /* Build-time default */    case 0: newline = NEWLINE; break;   /* Build-time default */
6337    case PCRE_NEWLINE_CR: newline = '\r'; break;    case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
6338    case PCRE_NEWLINE_LF: newline = '\n'; break;    case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
6339    case PCRE_NEWLINE_CR+    case PCRE_NEWLINE_CR+
6340         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;         PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
6341    case PCRE_NEWLINE_ANY: newline = -1; break;    case PCRE_NEWLINE_ANY: newline = -1; break;
6342    case PCRE_NEWLINE_ANYCRLF: newline = -2; break;    case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
6343    default: errorcode = ERR56; goto PCRE_EARLY_ERROR_RETURN;    default: errorcode = ERR56; goto PCRE_EARLY_ERROR_RETURN;

Legend:
Removed from v.389  
changed lines
  Added in v.438

  ViewVC Help
Powered by ViewVC 1.1.5