/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 360 by ph10, Wed Jul 9 20:00:28 2008 UTC revision 426 by ph10, Wed Aug 26 15:38:32 2009 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2008 University of Cambridge             Copyright (c) 1997-2009 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 97  are simple data values; negative values Line 97  are simple data values; negative values
97  on. Zero means further processing is needed (for things like \x), or the escape  on. Zero means further processing is needed (for things like \x), or the escape
98  is invalid. */  is invalid. */
99    
100  #ifndef EBCDIC  /* This is the "normal" table for ASCII systems */  #ifndef EBCDIC
101    
102    /* This is the "normal" table for ASCII systems or for EBCDIC systems running
103    in UTF-8 mode. */
104    
105  static const short int escapes[] = {  static const short int escapes[] = {
106       0,      0,      0,      0,      0,      0,      0,      0,   /* 0 - 7 */       0,                       0,
107       0,      0,    ':',    ';',    '<',    '=',    '>',    '?',   /* 8 - ? */       0,                       0,
108     '@', -ESC_A, -ESC_B, -ESC_C, -ESC_D, -ESC_E,      0, -ESC_G,   /* @ - G */       0,                       0,
109  -ESC_H,      0,      0, -ESC_K,      0,      0,      0,      0,   /* H - O */       0,                       0,
110  -ESC_P, -ESC_Q, -ESC_R, -ESC_S,      0,      0, -ESC_V, -ESC_W,   /* P - W */       0,                       0,
111  -ESC_X,      0, -ESC_Z,    '[',   '\\',    ']',    '^',    '_',   /* X - _ */       CHAR_COLON,              CHAR_SEMICOLON,
112     '`',      7, -ESC_b,      0, -ESC_d,  ESC_e,  ESC_f,      0,   /* ` - g */       CHAR_LESS_THAN_SIGN,     CHAR_EQUALS_SIGN,
113  -ESC_h,      0,      0, -ESC_k,      0,      0,  ESC_n,      0,   /* h - o */       CHAR_GREATER_THAN_SIGN,  CHAR_QUESTION_MARK,
114  -ESC_p,      0,  ESC_r, -ESC_s,  ESC_tee,    0, -ESC_v, -ESC_w,   /* p - w */       CHAR_COMMERCIAL_AT,      -ESC_A,
115       0,      0, -ESC_z                                            /* x - z */       -ESC_B,                  -ESC_C,
116         -ESC_D,                  -ESC_E,
117         0,                       -ESC_G,
118         -ESC_H,                  0,
119         0,                       -ESC_K,
120         0,                       0,
121         0,                       0,
122         -ESC_P,                  -ESC_Q,
123         -ESC_R,                  -ESC_S,
124         0,                       0,
125         -ESC_V,                  -ESC_W,
126         -ESC_X,                  0,
127         -ESC_Z,                  CHAR_LEFT_SQUARE_BRACKET,
128         CHAR_BACKSLASH,          CHAR_RIGHT_SQUARE_BRACKET,
129         CHAR_CIRCUMFLEX_ACCENT,  CHAR_UNDERSCORE,
130         CHAR_GRAVE_ACCENT,       7,
131         -ESC_b,                  0,
132         -ESC_d,                  ESC_e,
133         ESC_f,                   0,
134         -ESC_h,                  0,
135         0,                       -ESC_k,
136         0,                       0,
137         ESC_n,                   0,
138         -ESC_p,                  0,
139         ESC_r,                   -ESC_s,
140         ESC_tee,                 0,
141         -ESC_v,                  -ESC_w,
142         0,                       0,
143         -ESC_z
144  };  };
145    
146  #else           /* This is the "abnormal" table for EBCDIC systems */  #else
147    
148    /* This is the "abnormal" table for EBCDIC systems without UTF-8 support. */
149    
150  static const short int escapes[] = {  static const short int escapes[] = {
151  /*  48 */     0,     0,      0,     '.',    '<',   '(',    '+',    '|',  /*  48 */     0,     0,      0,     '.',    '<',   '(',    '+',    '|',
152  /*  50 */   '&',     0,      0,       0,      0,     0,      0,      0,  /*  50 */   '&',     0,      0,       0,      0,     0,      0,      0,
# Line 142  static const short int escapes[] = { Line 177  static const short int escapes[] = {
177    
178  /* Table of special "verbs" like (*PRUNE). This is a short table, so it is  /* Table of special "verbs" like (*PRUNE). This is a short table, so it is
179  searched linearly. Put all the names into a single string, in order to reduce  searched linearly. Put all the names into a single string, in order to reduce
180  the number of relocations when a shared library is dynamically linked. */  the number of relocations when a shared library is dynamically linked. The
181    string is built from string macros so that it works in UTF-8 mode on EBCDIC
182    platforms. */
183    
184  typedef struct verbitem {  typedef struct verbitem {
185    int   len;    int   len;
# Line 150  typedef struct verbitem { Line 187  typedef struct verbitem {
187  } verbitem;  } verbitem;
188    
189  static const char verbnames[] =  static const char verbnames[] =
190    "ACCEPT\0"    STRING_ACCEPT0
191    "COMMIT\0"    STRING_COMMIT0
192    "F\0"    STRING_F0
193    "FAIL\0"    STRING_FAIL0
194    "PRUNE\0"    STRING_PRUNE0
195    "SKIP\0"    STRING_SKIP0
196    "THEN";    STRING_THEN;
197    
198  static const verbitem verbs[] = {  static const verbitem verbs[] = {
199    { 6, OP_ACCEPT },    { 6, OP_ACCEPT },
# Line 178  length entry. The first three must be al Line 215  length entry. The first three must be al
215  for handling case independence. */  for handling case independence. */
216    
217  static const char posix_names[] =  static const char posix_names[] =
218    "alpha\0"  "lower\0"  "upper\0"  "alnum\0"  "ascii\0"  "blank\0"    STRING_alpha0 STRING_lower0 STRING_upper0 STRING_alnum0
219    "cntrl\0"  "digit\0"  "graph\0"  "print\0"  "punct\0"  "space\0"    STRING_ascii0 STRING_blank0 STRING_cntrl0 STRING_digit0
220    "word\0"   "xdigit";    STRING_graph0 STRING_print0 STRING_punct0 STRING_space0
221      STRING_word0  STRING_xdigit;
222    
223  static const uschar posix_name_lengths[] = {  static const uschar posix_name_lengths[] = {
224    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 };    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 };
# Line 322  For convenience, we use the same bit def Line 360  For convenience, we use the same bit def
360    
361  Then we can use ctype_digit and ctype_xdigit in the code. */  Then we can use ctype_digit and ctype_xdigit in the code. */
362    
363  #ifndef EBCDIC  /* This is the "normal" case, for ASCII systems */  #ifndef EBCDIC
364    
365    /* This is the "normal" case, for ASCII systems, and EBCDIC systems running in
366    UTF-8 mode. */
367    
368  static const unsigned char digitab[] =  static const unsigned char digitab[] =
369    {    {
370    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
# Line 358  static const unsigned char digitab[] = Line 400  static const unsigned char digitab[] =
400    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
401    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
402    
403  #else           /* This is the "abnormal" case, for EBCDIC systems */  #else
404    
405    /* This is the "abnormal" case, for EBCDIC systems not running in UTF-8 mode. */
406    
407  static const unsigned char digitab[] =  static const unsigned char digitab[] =
408    {    {
409    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7  0 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7  0 */
# Line 455  static const char * Line 500  static const char *
500  find_error_text(int n)  find_error_text(int n)
501  {  {
502  const char *s = error_texts;  const char *s = error_texts;
503  for (; n > 0; n--) while (*s++ != 0);  for (; n > 0; n--) while (*s++ != 0) {};
504  return s;  return s;
505  }  }
506    
# Line 503  if (c == 0) *errorcodeptr = ERR1; Line 548  if (c == 0) *errorcodeptr = ERR1;
548  in a table. A non-zero result is something that can be returned immediately.  in a table. A non-zero result is something that can be returned immediately.
549  Otherwise further processing may be required. */  Otherwise further processing may be required. */
550    
551  #ifndef EBCDIC  /* ASCII coding */  #ifndef EBCDIC  /* ASCII/UTF-8 coding */
552  else if (c < '0' || c > 'z') {}                           /* Not alphanumeric */  else if (c < CHAR_0 || c > CHAR_z) {}                     /* Not alphanumeric */
553  else if ((i = escapes[c - '0']) != 0) c = i;  else if ((i = escapes[c - CHAR_0]) != 0) c = i;
554    
555  #else           /* EBCDIC coding */  #else           /* EBCDIC coding */
556  else if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {}   /* Not alphanumeric */  else if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {}   /* Not alphanumeric */
# Line 524  else Line 569  else
569      /* A number of Perl escapes are not handled by PCRE. We give an explicit      /* A number of Perl escapes are not handled by PCRE. We give an explicit
570      error. */      error. */
571    
572      case 'l':      case CHAR_l:
573      case 'L':      case CHAR_L:
574      case 'N':      case CHAR_N:
575      case 'u':      case CHAR_u:
576      case 'U':      case CHAR_U:
577      *errorcodeptr = ERR37;      *errorcodeptr = ERR37;
578      break;      break;
579    
# Line 548  else Line 593  else
593      (possibly recursive) subroutine calls, _not_ backreferences. Just return      (possibly recursive) subroutine calls, _not_ backreferences. Just return
594      the -ESC_g code (cf \k). */      the -ESC_g code (cf \k). */
595    
596      case 'g':      case CHAR_g:
597      if (ptr[1] == '<' || ptr[1] == '\'')      if (ptr[1] == CHAR_LESS_THAN_SIGN || ptr[1] == CHAR_APOSTROPHE)
598        {        {
599        c = -ESC_g;        c = -ESC_g;
600        break;        break;
# Line 557  else Line 602  else
602    
603      /* Handle the Perl-compatible cases */      /* Handle the Perl-compatible cases */
604    
605      if (ptr[1] == '{')      if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)
606        {        {
607        const uschar *p;        const uschar *p;
608        for (p = ptr+2; *p != 0 && *p != '}'; p++)        for (p = ptr+2; *p != 0 && *p != CHAR_RIGHT_CURLY_BRACKET; p++)
609          if (*p != '-' && (digitab[*p] & ctype_digit) == 0) break;          if (*p != CHAR_MINUS && (digitab[*p] & ctype_digit) == 0) break;
610        if (*p != 0 && *p != '}')        if (*p != 0 && *p != CHAR_RIGHT_CURLY_BRACKET)
611          {          {
612          c = -ESC_k;          c = -ESC_k;
613          break;          break;
# Line 572  else Line 617  else
617        }        }
618      else braced = FALSE;      else braced = FALSE;
619    
620      if (ptr[1] == '-')      if (ptr[1] == CHAR_MINUS)
621        {        {
622        negated = TRUE;        negated = TRUE;
623        ptr++;        ptr++;
# Line 581  else Line 626  else
626    
627      c = 0;      c = 0;
628      while ((digitab[ptr[1]] & ctype_digit) != 0)      while ((digitab[ptr[1]] & ctype_digit) != 0)
629        c = c * 10 + *(++ptr) - '0';        c = c * 10 + *(++ptr) - CHAR_0;
630    
631      if (c < 0)   /* Integer overflow */      if (c < 0)   /* Integer overflow */
632        {        {
# Line 589  else Line 634  else
634        break;        break;
635        }        }
636    
637      if (braced && *(++ptr) != '}')      if (braced && *(++ptr) != CHAR_RIGHT_CURLY_BRACKET)
638        {        {
639        *errorcodeptr = ERR57;        *errorcodeptr = ERR57;
640        break;        break;
# Line 626  else Line 671  else
671      value is greater than 377, the least significant 8 bits are taken. Inside a      value is greater than 377, the least significant 8 bits are taken. Inside a
672      character class, \ followed by a digit is always an octal number. */      character class, \ followed by a digit is always an octal number. */
673    
674      case '1': case '2': case '3': case '4': case '5':      case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4: case CHAR_5:
675      case '6': case '7': case '8': case '9':      case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9:
676    
677      if (!isclass)      if (!isclass)
678        {        {
679        oldptr = ptr;        oldptr = ptr;
680        c -= '0';        c -= CHAR_0;
681        while ((digitab[ptr[1]] & ctype_digit) != 0)        while ((digitab[ptr[1]] & ctype_digit) != 0)
682          c = c * 10 + *(++ptr) - '0';          c = c * 10 + *(++ptr) - CHAR_0;
683        if (c < 0)    /* Integer overflow */        if (c < 0)    /* Integer overflow */
684          {          {
685          *errorcodeptr = ERR61;          *errorcodeptr = ERR61;
# Line 652  else Line 697  else
697      generates a binary zero byte and treats the digit as a following literal.      generates a binary zero byte and treats the digit as a following literal.
698      Thus we have to pull back the pointer by one. */      Thus we have to pull back the pointer by one. */
699    
700      if ((c = *ptr) >= '8')      if ((c = *ptr) >= CHAR_8)
701        {        {
702        ptr--;        ptr--;
703        c = 0;        c = 0;
# Line 665  else Line 710  else
710      to do). Nowadays we allow for larger numbers in UTF-8 mode, but no more      to do). Nowadays we allow for larger numbers in UTF-8 mode, but no more
711      than 3 octal digits. */      than 3 octal digits. */
712    
713      case '0':      case CHAR_0:
714      c -= '0';      c -= CHAR_0;
715      while(i++ < 2 && ptr[1] >= '0' && ptr[1] <= '7')      while(i++ < 2 && ptr[1] >= CHAR_0 && ptr[1] <= CHAR_7)
716          c = c * 8 + *(++ptr) - '0';          c = c * 8 + *(++ptr) - CHAR_0;
717      if (!utf8 && c > 255) *errorcodeptr = ERR51;      if (!utf8 && c > 255) *errorcodeptr = ERR51;
718      break;      break;
719    
# Line 676  else Line 721  else
721      than 0xff in utf8 mode, but only if the ddd are hex digits. If not, { is      than 0xff in utf8 mode, but only if the ddd are hex digits. If not, { is
722      treated as a data character. */      treated as a data character. */
723    
724      case 'x':      case CHAR_x:
725      if (ptr[1] == '{')      if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)
726        {        {
727        const uschar *pt = ptr + 2;        const uschar *pt = ptr + 2;
728        int count = 0;        int count = 0;
# Line 686  else Line 731  else
731        while ((digitab[*pt] & ctype_xdigit) != 0)        while ((digitab[*pt] & ctype_xdigit) != 0)
732          {          {
733          register int cc = *pt++;          register int cc = *pt++;
734          if (c == 0 && cc == '0') continue;     /* Leading zeroes */          if (c == 0 && cc == CHAR_0) continue;     /* Leading zeroes */
735          count++;          count++;
736    
737  #ifndef EBCDIC  /* ASCII coding */  #ifndef EBCDIC  /* ASCII/UTF-8 coding */
738          if (cc >= 'a') cc -= 32;               /* Convert to upper case */          if (cc >= CHAR_a) cc -= 32;               /* Convert to upper case */
739          c = (c << 4) + cc - ((cc < 'A')? '0' : ('A' - 10));          c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
740  #else           /* EBCDIC coding */  #else           /* EBCDIC coding */
741          if (cc >= 'a' && cc <= 'z') cc += 64;  /* Convert to upper case */          if (cc >= CHAR_a && cc <= CHAR_z) cc += 64;  /* Convert to upper case */
742          c = (c << 4) + cc - ((cc >= '0')? '0' : ('A' - 10));          c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
743  #endif  #endif
744          }          }
745    
746        if (*pt == '}')        if (*pt == CHAR_RIGHT_CURLY_BRACKET)
747          {          {
748          if (c < 0 || count > (utf8? 8 : 2)) *errorcodeptr = ERR34;          if (c < 0 || count > (utf8? 8 : 2)) *errorcodeptr = ERR34;
749          ptr = pt;          ptr = pt;
# Line 714  else Line 759  else
759      c = 0;      c = 0;
760      while (i++ < 2 && (digitab[ptr[1]] & ctype_xdigit) != 0)      while (i++ < 2 && (digitab[ptr[1]] & ctype_xdigit) != 0)
761        {        {
762        int cc;                               /* Some compilers don't like ++ */        int cc;                                  /* Some compilers don't like */
763        cc = *(++ptr);                        /* in initializers */        cc = *(++ptr);                           /* ++ in initializers */
764  #ifndef EBCDIC  /* ASCII coding */  #ifndef EBCDIC  /* ASCII/UTF-8 coding */
765        if (cc >= 'a') cc -= 32;              /* Convert to upper case */        if (cc >= CHAR_a) cc -= 32;              /* Convert to upper case */
766        c = c * 16 + cc - ((cc < 'A')? '0' : ('A' - 10));        c = c * 16 + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
767  #else           /* EBCDIC coding */  #else           /* EBCDIC coding */
768        if (cc <= 'z') cc += 64;              /* Convert to upper case */        if (cc <= CHAR_z) cc += 64;              /* Convert to upper case */
769        c = c * 16 + cc - ((cc >= '0')? '0' : ('A' - 10));        c = c * 16 + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
770  #endif  #endif
771        }        }
772      break;      break;
# Line 730  else Line 775  else
775      This coding is ASCII-specific, but then the whole concept of \cx is      This coding is ASCII-specific, but then the whole concept of \cx is
776      ASCII-specific. (However, an EBCDIC equivalent has now been added.) */      ASCII-specific. (However, an EBCDIC equivalent has now been added.) */
777    
778      case 'c':      case CHAR_c:
779      c = *(++ptr);      c = *(++ptr);
780      if (c == 0)      if (c == 0)
781        {        {
# Line 738  else Line 783  else
783        break;        break;
784        }        }
785    
786  #ifndef EBCDIC  /* ASCII coding */  #ifndef EBCDIC  /* ASCII/UTF-8 coding */
787      if (c >= 'a' && c <= 'z') c -= 32;      if (c >= CHAR_a && c <= CHAR_z) c -= 32;
788      c ^= 0x40;      c ^= 0x40;
789  #else           /* EBCDIC coding */  #else           /* EBCDIC coding */
790      if (c >= 'a' && c <= 'z') c += 64;      if (c >= CHAR_a && c <= CHAR_z) c += 64;
791      c ^= 0xC0;      c ^= 0xC0;
792  #endif  #endif
793      break;      break;
# Line 804  if (c == 0) goto ERROR_RETURN; Line 849  if (c == 0) goto ERROR_RETURN;
849  /* \P or \p can be followed by a name in {}, optionally preceded by ^ for  /* \P or \p can be followed by a name in {}, optionally preceded by ^ for
850  negation. */  negation. */
851    
852  if (c == '{')  if (c == CHAR_LEFT_CURLY_BRACKET)
853    {    {
854    if (ptr[1] == '^')    if (ptr[1] == CHAR_CIRCUMFLEX_ACCENT)
855      {      {
856      *negptr = TRUE;      *negptr = TRUE;
857      ptr++;      ptr++;
# Line 815  if (c == '{') Line 860  if (c == '{')
860      {      {
861      c = *(++ptr);      c = *(++ptr);
862      if (c == 0) goto ERROR_RETURN;      if (c == 0) goto ERROR_RETURN;
863      if (c == '}') break;      if (c == CHAR_RIGHT_CURLY_BRACKET) break;
864      name[i] = c;      name[i] = c;
865      }      }
866    if (c !='}') goto ERROR_RETURN;    if (c != CHAR_RIGHT_CURLY_BRACKET) goto ERROR_RETURN;
867    name[i] = 0;    name[i] = 0;
868    }    }
869    
# Line 883  is_counted_repeat(const uschar *p) Line 928  is_counted_repeat(const uschar *p)
928  {  {
929  if ((digitab[*p++] & ctype_digit) == 0) return FALSE;  if ((digitab[*p++] & ctype_digit) == 0) return FALSE;
930  while ((digitab[*p] & ctype_digit) != 0) p++;  while ((digitab[*p] & ctype_digit) != 0) p++;
931  if (*p == '}') return TRUE;  if (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE;
932    
933  if (*p++ != ',') return FALSE;  if (*p++ != CHAR_COMMA) return FALSE;
934  if (*p == '}') return TRUE;  if (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE;
935    
936  if ((digitab[*p++] & ctype_digit) == 0) return FALSE;  if ((digitab[*p++] & ctype_digit) == 0) return FALSE;
937  while ((digitab[*p] & ctype_digit) != 0) p++;  while ((digitab[*p] & ctype_digit) != 0) p++;
938    
939  return (*p == '}');  return (*p == CHAR_RIGHT_CURLY_BRACKET);
940  }  }
941    
942    
# Line 924  int max = -1; Line 969  int max = -1;
969  /* Read the minimum value and do a paranoid check: a negative value indicates  /* Read the minimum value and do a paranoid check: a negative value indicates
970  an integer overflow. */  an integer overflow. */
971    
972  while ((digitab[*p] & ctype_digit) != 0) min = min * 10 + *p++ - '0';  while ((digitab[*p] & ctype_digit) != 0) min = min * 10 + *p++ - CHAR_0;
973  if (min < 0 || min > 65535)  if (min < 0 || min > 65535)
974    {    {
975    *errorcodeptr = ERR5;    *errorcodeptr = ERR5;
# Line 934  if (min < 0 || min > 65535) Line 979  if (min < 0 || min > 65535)
979  /* Read the maximum value if there is one, and again do a paranoid on its size.  /* Read the maximum value if there is one, and again do a paranoid on its size.
980  Also, max must not be less than min. */  Also, max must not be less than min. */
981    
982  if (*p == '}') max = min; else  if (*p == CHAR_RIGHT_CURLY_BRACKET) max = min; else
983    {    {
984    if (*(++p) != '}')    if (*(++p) != CHAR_RIGHT_CURLY_BRACKET)
985      {      {
986      max = 0;      max = 0;
987      while((digitab[*p] & ctype_digit) != 0) max = max * 10 + *p++ - '0';      while((digitab[*p] & ctype_digit) != 0) max = max * 10 + *p++ - CHAR_0;
988      if (max < 0 || max > 65535)      if (max < 0 || max > 65535)
989        {        {
990        *errorcodeptr = ERR5;        *errorcodeptr = ERR5;
# Line 964  return p; Line 1009  return p;
1009    
1010    
1011  /*************************************************  /*************************************************
1012  *       Find forward referenced subpattern       *  *  Subroutine for finding forward reference      *
1013  *************************************************/  *************************************************/
1014    
1015  /* This function scans along a pattern's text looking for capturing  /* This recursive function is called only from find_parens() below. The
1016    top-level call starts at the beginning of the pattern. All other calls must
1017    start at a parenthesis. It scans along a pattern's text looking for capturing
1018  subpatterns, and counting them. If it finds a named pattern that matches the  subpatterns, and counting them. If it finds a named pattern that matches the
1019  name it is given, it returns its number. Alternatively, if the name is NULL, it  name it is given, it returns its number. Alternatively, if the name is NULL, it
1020  returns when it reaches a given numbered subpattern. This is used for forward  returns when it reaches a given numbered subpattern. We know that if (?P< is
1021  references to subpatterns. We know that if (?P< is encountered, the name will  encountered, the name will be terminated by '>' because that is checked in the
1022  be terminated by '>' because that is checked in the first pass.  first pass. Recursion is used to keep track of subpatterns that reset the
1023    capturing group numbers - the (?| feature.
1024    
1025  Arguments:  Arguments:
1026    ptr          current position in the pattern    ptrptr       address of the current character pointer (updated)
1027    cd           compile background data    cd           compile background data
1028    name         name to seek, or NULL if seeking a numbered subpattern    name         name to seek, or NULL if seeking a numbered subpattern
1029    lorn         name length, or subpattern number if name is NULL    lorn         name length, or subpattern number if name is NULL
1030    xmode        TRUE if we are in /x mode    xmode        TRUE if we are in /x mode
1031      count        pointer to the current capturing subpattern number (updated)
1032    
1033  Returns:       the number of the named subpattern, or -1 if not found  Returns:       the number of the named subpattern, or -1 if not found
1034  */  */
1035    
1036  static int  static int
1037  find_parens(const uschar *ptr, compile_data *cd, const uschar *name, int lorn,  find_parens_sub(uschar **ptrptr, compile_data *cd, const uschar *name, int lorn,
1038    BOOL xmode)    BOOL xmode, int *count)
1039  {  {
1040  const uschar *thisname;  uschar *ptr = *ptrptr;
1041  int count = cd->bracount;  int start_count = *count;
1042    int hwm_count = start_count;
1043    BOOL dup_parens = FALSE;
1044    
1045  for (; *ptr != 0; ptr++)  /* If the first character is a parenthesis, check on the type of group we are
1046    dealing with. The very first call may not start with a parenthesis. */
1047    
1048    if (ptr[0] == CHAR_LEFT_PARENTHESIS)
1049    {    {
1050    int term;    if (ptr[1] == CHAR_QUESTION_MARK &&
1051          ptr[2] == CHAR_VERTICAL_LINE)
1052        {
1053        ptr += 3;
1054        dup_parens = TRUE;
1055        }
1056    
1057      /* Handle a normal, unnamed capturing parenthesis */
1058    
1059      else if (ptr[1] != CHAR_QUESTION_MARK && ptr[1] != CHAR_ASTERISK)
1060        {
1061        *count += 1;
1062        if (name == NULL && *count == lorn) return *count;
1063        ptr++;
1064        }
1065    
1066      /* Handle a condition. If it is an assertion, just carry on so that it
1067      is processed as normal. If not, skip to the closing parenthesis of the
1068      condition (there can't be any nested parens. */
1069    
1070      else if (ptr[2] == CHAR_LEFT_PARENTHESIS)
1071        {
1072        ptr += 2;
1073        if (ptr[1] != CHAR_QUESTION_MARK)
1074          {
1075          while (*ptr != 0 && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
1076          if (*ptr != 0) ptr++;
1077          }
1078        }
1079    
1080      /* We have either (? or (* and not a condition */
1081    
1082      else
1083        {
1084        ptr += 2;
1085        if (*ptr == CHAR_P) ptr++;                      /* Allow optional P */
1086    
1087        /* We have to disambiguate (?<! and (?<= from (?<name> for named groups */
1088    
1089        if ((*ptr == CHAR_LESS_THAN_SIGN && ptr[1] != CHAR_EXCLAMATION_MARK &&
1090            ptr[1] != CHAR_EQUALS_SIGN) || *ptr == CHAR_APOSTROPHE)
1091          {
1092          int term;
1093          const uschar *thisname;
1094          *count += 1;
1095          if (name == NULL && *count == lorn) return *count;
1096          term = *ptr++;
1097          if (term == CHAR_LESS_THAN_SIGN) term = CHAR_GREATER_THAN_SIGN;
1098          thisname = ptr;
1099          while (*ptr != term) ptr++;
1100          if (name != NULL && lorn == ptr - thisname &&
1101              strncmp((const char *)name, (const char *)thisname, lorn) == 0)
1102            return *count;
1103          }
1104        }
1105      }
1106    
1107    /* Past any initial parenthesis handling, scan for parentheses or vertical
1108    bars. */
1109    
1110    for (; *ptr != 0; ptr++)
1111      {
1112    /* Skip over backslashed characters and also entire \Q...\E */    /* Skip over backslashed characters and also entire \Q...\E */
1113    
1114    if (*ptr == '\\')    if (*ptr == CHAR_BACKSLASH)
1115      {      {
1116      if (*(++ptr) == 0) return -1;      if (*(++ptr) == 0) goto FAIL_EXIT;
1117      if (*ptr == 'Q') for (;;)      if (*ptr == CHAR_Q) for (;;)
1118        {        {
1119        while (*(++ptr) != 0 && *ptr != '\\');        while (*(++ptr) != 0 && *ptr != CHAR_BACKSLASH) {};
1120        if (*ptr == 0) return -1;        if (*ptr == 0) goto FAIL_EXIT;
1121        if (*(++ptr) == 'E') break;        if (*(++ptr) == CHAR_E) break;
1122        }        }
1123      continue;      continue;
1124      }      }
# Line 1012  for (; *ptr != 0; ptr++) Line 1126  for (; *ptr != 0; ptr++)
1126    /* Skip over character classes; this logic must be similar to the way they    /* Skip over character classes; this logic must be similar to the way they
1127    are handled for real. If the first character is '^', skip it. Also, if the    are handled for real. If the first character is '^', skip it. Also, if the
1128    first few characters (either before or after ^) are \Q\E or \E we skip them    first few characters (either before or after ^) are \Q\E or \E we skip them
1129    too. This makes for compatibility with Perl. */    too. This makes for compatibility with Perl. Note the use of STR macros to
1130      encode "Q\\E" so that it works in UTF-8 on EBCDIC platforms. */
1131    
1132    if (*ptr == '[')    if (*ptr == CHAR_LEFT_SQUARE_BRACKET)
1133      {      {
1134      BOOL negate_class = FALSE;      BOOL negate_class = FALSE;
1135      for (;;)      for (;;)
1136        {        {
1137        int c = *(++ptr);        int c = *(++ptr);
1138        if (c == '\\')        if (c == CHAR_BACKSLASH)
1139          {          {
1140          if (ptr[1] == 'E') ptr++;          if (ptr[1] == CHAR_E)
1141            else if (strncmp((const char *)ptr+1, "Q\\E", 3) == 0) ptr += 3;            ptr++;
1142              else break;          else if (strncmp((const char *)ptr+1,
1143                     STR_Q STR_BACKSLASH STR_E, 3) == 0)
1144              ptr += 3;
1145            else
1146              break;
1147          }          }
1148        else if (!negate_class && c == '^')        else if (!negate_class && c == CHAR_CIRCUMFLEX_ACCENT)
1149          negate_class = TRUE;          negate_class = TRUE;
1150        else break;        else break;
1151        }        }
# Line 1034  for (; *ptr != 0; ptr++) Line 1153  for (; *ptr != 0; ptr++)
1153      /* If the next character is ']', it is a data character that must be      /* If the next character is ']', it is a data character that must be
1154      skipped, except in JavaScript compatibility mode. */      skipped, except in JavaScript compatibility mode. */
1155    
1156      if (ptr[1] == ']' && (cd->external_options & PCRE_JAVASCRIPT_COMPAT) == 0)      if (ptr[1] == CHAR_RIGHT_SQUARE_BRACKET &&
1157            (cd->external_options & PCRE_JAVASCRIPT_COMPAT) == 0)
1158        ptr++;        ptr++;
1159    
1160      while (*(++ptr) != ']')      while (*(++ptr) != CHAR_RIGHT_SQUARE_BRACKET)
1161        {        {
1162        if (*ptr == 0) return -1;        if (*ptr == 0) return -1;
1163        if (*ptr == '\\')        if (*ptr == CHAR_BACKSLASH)
1164          {          {
1165          if (*(++ptr) == 0) return -1;          if (*(++ptr) == 0) goto FAIL_EXIT;
1166          if (*ptr == 'Q') for (;;)          if (*ptr == CHAR_Q) for (;;)
1167            {            {
1168            while (*(++ptr) != 0 && *ptr != '\\');            while (*(++ptr) != 0 && *ptr != CHAR_BACKSLASH) {};
1169            if (*ptr == 0) return -1;            if (*ptr == 0) goto FAIL_EXIT;
1170            if (*(++ptr) == 'E') break;            if (*(++ptr) == CHAR_E) break;
1171            }            }
1172          continue;          continue;
1173          }          }
# Line 1057  for (; *ptr != 0; ptr++) Line 1177  for (; *ptr != 0; ptr++)
1177    
1178    /* Skip comments in /x mode */    /* Skip comments in /x mode */
1179    
1180    if (xmode && *ptr == '#')    if (xmode && *ptr == CHAR_NUMBER_SIGN)
1181      {      {
1182      while (*(++ptr) != 0 && *ptr != '\n');      while (*(++ptr) != 0 && *ptr != CHAR_NL) {};
1183      if (*ptr == 0) return -1;      if (*ptr == 0) goto FAIL_EXIT;
1184      continue;      continue;
1185      }      }
1186    
1187    /* An opening parens must now be a real metacharacter */    /* Check for the special metacharacters */
1188    
1189    if (*ptr != '(') continue;    if (*ptr == CHAR_LEFT_PARENTHESIS)
   if (ptr[1] != '?' && ptr[1] != '*')  
1190      {      {
1191      count++;      int rc = find_parens_sub(&ptr, cd, name, lorn, xmode, count);
1192      if (name == NULL && count == lorn) return count;      if (rc > 0) return rc;
1193      continue;      if (*ptr == 0) goto FAIL_EXIT;
1194      }      }
1195    
1196    ptr += 2;    else if (*ptr == CHAR_RIGHT_PARENTHESIS)
1197    if (*ptr == 'P') ptr++;                      /* Allow optional P */      {
1198        if (dup_parens && *count < hwm_count) *count = hwm_count;
1199        *ptrptr = ptr;
1200        return -1;
1201        }
1202    
1203    /* We have to disambiguate (?<! and (?<= from (?<name> */    else if (*ptr == CHAR_VERTICAL_LINE && dup_parens)
1204        {
1205        if (*count > hwm_count) hwm_count = *count;
1206        *count = start_count;
1207        }
1208      }
1209    
1210    if ((*ptr != '<' || ptr[1] == '!' || ptr[1] == '=') &&  FAIL_EXIT:
1211         *ptr != '\'')  *ptrptr = ptr;
1212      continue;  return -1;
1213    }
1214    
1215    
1216    
1217    
1218    /*************************************************
1219    *       Find forward referenced subpattern       *
1220    *************************************************/
1221    
1222    count++;  /* This function scans along a pattern's text looking for capturing
1223    subpatterns, and counting them. If it finds a named pattern that matches the
1224    name it is given, it returns its number. Alternatively, if the name is NULL, it
1225    returns when it reaches a given numbered subpattern. This is used for forward
1226    references to subpatterns. We used to be able to start this scan from the
1227    current compiling point, using the current count value from cd->bracount, and
1228    do it all in a single loop, but the addition of the possibility of duplicate
1229    subpattern numbers means that we have to scan from the very start, in order to
1230    take account of such duplicates, and to use a recursive function to keep track
1231    of the different types of group.
1232    
1233    if (name == NULL && count == lorn) return count;  Arguments:
1234    term = *ptr++;    cd           compile background data
1235    if (term == '<') term = '>';    name         name to seek, or NULL if seeking a numbered subpattern
1236    thisname = ptr;    lorn         name length, or subpattern number if name is NULL
1237    while (*ptr != term) ptr++;    xmode        TRUE if we are in /x mode
1238    if (name != NULL && lorn == ptr - thisname &&  
1239        strncmp((const char *)name, (const char *)thisname, lorn) == 0)  Returns:       the number of the found subpattern, or -1 if not found
1240      return count;  */
1241    
1242    static int
1243    find_parens(compile_data *cd, const uschar *name, int lorn, BOOL xmode)
1244    {
1245    uschar *ptr = (uschar *)cd->start_pattern;
1246    int count = 0;
1247    int rc;
1248    
1249    /* If the pattern does not start with an opening parenthesis, the first call
1250    to find_parens_sub() will scan right to the end (if necessary). However, if it
1251    does start with a parenthesis, find_parens_sub() will return when it hits the
1252    matching closing parens. That is why we have to have a loop. */
1253    
1254    for (;;)
1255      {
1256      rc = find_parens_sub(&ptr, cd, name, lorn, xmode, &count);
1257      if (rc > 0 || *ptr++ == 0) break;
1258    }    }
1259    
1260  return -1;  return rc;
1261  }  }
1262    
1263    
1264    
1265    
1266  /*************************************************  /*************************************************
1267  *      Find first significant op code            *  *      Find first significant op code            *
1268  *************************************************/  *************************************************/
# Line 1260  for (;;) Line 1423  for (;;)
1423      branchlength++;      branchlength++;
1424      cc += 2;      cc += 2;
1425  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1426      if ((options & PCRE_UTF8) != 0)      if ((options & PCRE_UTF8) != 0 && cc[-1] >= 0xc0)
1427        {        cc += _pcre_utf8_table4[cc[-1] & 0x3f];
       while ((*cc & 0xc0) == 0x80) cc++;  
       }  
1428  #endif  #endif
1429      break;      break;
1430    
# Line 1274  for (;;) Line 1435  for (;;)
1435      branchlength += GET2(cc,1);      branchlength += GET2(cc,1);
1436      cc += 4;      cc += 4;
1437  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1438      if ((options & PCRE_UTF8) != 0)      if ((options & PCRE_UTF8) != 0 && cc[-1] >= 0xc0)
1439        {        cc += _pcre_utf8_table4[cc[-1] & 0x3f];
       while((*cc & 0x80) == 0x80) cc++;  
       }  
1440  #endif  #endif
1441      break;      break;
1442    
# Line 1450  for (;;) Line 1609  for (;;)
1609        if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];        if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];
1610        break;        break;
1611        }        }
1612    #else
1613        (void)(utf8);  /* Keep compiler happy by referencing function argument */
1614  #endif  #endif
1615      }      }
1616    }    }
# Line 1543  for (;;) Line 1704  for (;;)
1704        if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];        if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];
1705        break;        break;
1706        }        }
1707    #else
1708        (void)(utf8);  /* Keep compiler happy by referencing function argument */
1709  #endif  #endif
1710      }      }
1711    }    }
# Line 1609  for (code = first_significant_code(code Line 1772  for (code = first_significant_code(code
1772      BOOL empty_branch;      BOOL empty_branch;
1773      if (GET(code, 1) == 0) return TRUE;    /* Hit unclosed bracket */      if (GET(code, 1) == 0) return TRUE;    /* Hit unclosed bracket */
1774    
1775      /* Scan a closed bracket */      /* If a conditional group has only one branch, there is a second, implied,
1776        empty branch, so just skip over the conditional, because it could be empty.
1777        Otherwise, scan the individual branches of the group. */
1778    
1779      empty_branch = FALSE;      if (c == OP_COND && code[GET(code, 1)] != OP_ALT)
     do  
       {  
       if (!empty_branch && could_be_empty_branch(code, endcode, utf8))  
         empty_branch = TRUE;  
1780        code += GET(code, 1);        code += GET(code, 1);
1781        else
1782          {
1783          empty_branch = FALSE;
1784          do
1785            {
1786            if (!empty_branch && could_be_empty_branch(code, endcode, utf8))
1787              empty_branch = TRUE;
1788            code += GET(code, 1);
1789            }
1790          while (*code == OP_ALT);
1791          if (!empty_branch) return FALSE;   /* All branches are non-empty */
1792        }        }
1793      while (*code == OP_ALT);  
     if (!empty_branch) return FALSE;   /* All branches are non-empty */  
1794      c = *code;      c = *code;
1795      continue;      continue;
1796      }      }
# Line 1737  for (code = first_significant_code(code Line 1908  for (code = first_significant_code(code
1908      case OP_QUERY:      case OP_QUERY:
1909      case OP_MINQUERY:      case OP_MINQUERY:
1910      case OP_POSQUERY:      case OP_POSQUERY:
1911        if (utf8 && code[1] >= 0xc0) code += _pcre_utf8_table4[code[1] & 0x3f];
1912        break;
1913    
1914      case OP_UPTO:      case OP_UPTO:
1915      case OP_MINUPTO:      case OP_MINUPTO:
1916      case OP_POSUPTO:      case OP_POSUPTO:
1917      if (utf8) while ((code[2] & 0xc0) == 0x80) code++;      if (utf8 && code[3] >= 0xc0) code += _pcre_utf8_table4[code[3] & 0x3f];
1918      break;      break;
1919  #endif  #endif
1920      }      }
# Line 1821  int terminator;          /* Don't combin Line 1995  int terminator;          /* Don't combin
1995  terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */  terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */
1996  for (++ptr; *ptr != 0; ptr++)  for (++ptr; *ptr != 0; ptr++)
1997    {    {
1998    if (*ptr == '\\' && ptr[1] == ']') ptr++; else    if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET) ptr++; else
1999      {      {
2000      if (*ptr == ']') return FALSE;      if (*ptr == CHAR_RIGHT_SQUARE_BRACKET) return FALSE;
2001      if (*ptr == terminator && ptr[1] == ']')      if (*ptr == terminator && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
2002        {        {
2003        *endptr = ptr;        *endptr = ptr;
2004        return TRUE;        return TRUE;
# Line 2070  if ((options & PCRE_EXTENDED) != 0) Line 2244  if ((options & PCRE_EXTENDED) != 0)
2244    for (;;)    for (;;)
2245      {      {
2246      while ((cd->ctypes[*ptr] & ctype_space) != 0) ptr++;      while ((cd->ctypes[*ptr] & ctype_space) != 0) ptr++;
2247      if (*ptr == '#')      if (*ptr == CHAR_NUMBER_SIGN)
2248        {        {
2249        while (*(++ptr) != 0)        while (*(++ptr) != 0)
2250          if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }          if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }
# Line 2082  if ((options & PCRE_EXTENDED) != 0) Line 2256  if ((options & PCRE_EXTENDED) != 0)
2256  /* If the next item is one that we can handle, get its value. A non-negative  /* If the next item is one that we can handle, get its value. A non-negative
2257  value is a character, a negative value is an escape value. */  value is a character, a negative value is an escape value. */
2258    
2259  if (*ptr == '\\')  if (*ptr == CHAR_BACKSLASH)
2260    {    {
2261    int temperrorcode = 0;    int temperrorcode = 0;
2262    next = check_escape(&ptr, &temperrorcode, cd->bracount, options, FALSE);    next = check_escape(&ptr, &temperrorcode, cd->bracount, options, FALSE);
# Line 2107  if ((options & PCRE_EXTENDED) != 0) Line 2281  if ((options & PCRE_EXTENDED) != 0)
2281    for (;;)    for (;;)
2282      {      {
2283      while ((cd->ctypes[*ptr] & ctype_space) != 0) ptr++;      while ((cd->ctypes[*ptr] & ctype_space) != 0) ptr++;
2284      if (*ptr == '#')      if (*ptr == CHAR_NUMBER_SIGN)
2285        {        {
2286        while (*(++ptr) != 0)        while (*(++ptr) != 0)
2287          if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }          if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }
# Line 2118  if ((options & PCRE_EXTENDED) != 0) Line 2292  if ((options & PCRE_EXTENDED) != 0)
2292    
2293  /* If the next thing is itself optional, we have to give up. */  /* If the next thing is itself optional, we have to give up. */
2294    
2295  if (*ptr == '*' || *ptr == '?' || strncmp((char *)ptr, "{0,", 3) == 0)  if (*ptr == CHAR_ASTERISK || *ptr == CHAR_QUESTION_MARK ||
2296    return FALSE;    strncmp((char *)ptr, STR_LEFT_CURLY_BRACKET STR_0 STR_COMMA, 3) == 0)
2297        return FALSE;
2298    
2299  /* Now compare the next item with the previous opcode. If the previous is a  /* Now compare the next item with the previous opcode. If the previous is a
2300  positive single character match, "item" either contains the character or, if  positive single character match, "item" either contains the character or, if
# Line 2134  if (next >= 0) switch(op_code) Line 2309  if (next >= 0) switch(op_code)
2309    case OP_CHAR:    case OP_CHAR:
2310  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2311    if (utf8 && item > 127) { GETCHAR(item, utf8_char); }    if (utf8 && item > 127) { GETCHAR(item, utf8_char); }
2312    #else
2313      (void)(utf8_char);  /* Keep compiler happy by referencing function argument */
2314  #endif  #endif
2315    return item != next;    return item != next;
2316    
# Line 2555  for (;; ptr++) Line 2732  for (;; ptr++)
2732    
2733    if (inescq && c != 0)    if (inescq && c != 0)
2734      {      {
2735      if (c == '\\' && ptr[1] == 'E')      if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E)
2736        {        {
2737        inescq = FALSE;        inescq = FALSE;
2738        ptr++;        ptr++;
# Line 2581  for (;; ptr++) Line 2758  for (;; ptr++)
2758    /* Fill in length of a previous callout, except when the next thing is    /* Fill in length of a previous callout, except when the next thing is
2759    a quantifier. */    a quantifier. */
2760    
2761    is_quantifier = c == '*' || c == '+' || c == '?' ||    is_quantifier =
2762      (c == '{' && is_counted_repeat(ptr+1));      c == CHAR_ASTERISK || c == CHAR_PLUS || c == CHAR_QUESTION_MARK ||
2763        (c == CHAR_LEFT_CURLY_BRACKET && is_counted_repeat(ptr+1));
2764    
2765    if (!is_quantifier && previous_callout != NULL &&    if (!is_quantifier && previous_callout != NULL &&
2766         after_manual_callout-- <= 0)         after_manual_callout-- <= 0)
# Line 2597  for (;; ptr++) Line 2775  for (;; ptr++)
2775    if ((options & PCRE_EXTENDED) != 0)    if ((options & PCRE_EXTENDED) != 0)
2776      {      {
2777      if ((cd->ctypes[c] & ctype_space) != 0) continue;      if ((cd->ctypes[c] & ctype_space) != 0) continue;
2778      if (c == '#')      if (c == CHAR_NUMBER_SIGN)
2779        {        {
2780        while (*(++ptr) != 0)        while (*(++ptr) != 0)
2781          {          {
# Line 2622  for (;; ptr++) Line 2800  for (;; ptr++)
2800      {      {
2801      /* ===================================================================*/      /* ===================================================================*/
2802      case 0:                        /* The branch terminates at string end */      case 0:                        /* The branch terminates at string end */
2803      case '|':                      /* or | or ) */      case CHAR_VERTICAL_LINE:       /* or | or ) */
2804      case ')':      case CHAR_RIGHT_PARENTHESIS:
2805      *firstbyteptr = firstbyte;      *firstbyteptr = firstbyte;
2806      *reqbyteptr = reqbyte;      *reqbyteptr = reqbyte;
2807      *codeptr = code;      *codeptr = code;
# Line 2645  for (;; ptr++) Line 2823  for (;; ptr++)
2823      /* Handle single-character metacharacters. In multiline mode, ^ disables      /* Handle single-character metacharacters. In multiline mode, ^ disables
2824      the setting of any following char as a first character. */      the setting of any following char as a first character. */
2825    
2826      case '^':      case CHAR_CIRCUMFLEX_ACCENT:
2827      if ((options & PCRE_MULTILINE) != 0)      if ((options & PCRE_MULTILINE) != 0)
2828        {        {
2829        if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;        if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
# Line 2654  for (;; ptr++) Line 2832  for (;; ptr++)
2832      *code++ = OP_CIRC;      *code++ = OP_CIRC;
2833      break;      break;
2834    
2835      case '$':      case CHAR_DOLLAR_SIGN:
2836      previous = NULL;      previous = NULL;
2837      *code++ = OP_DOLL;      *code++ = OP_DOLL;
2838      break;      break;
# Line 2662  for (;; ptr++) Line 2840  for (;; ptr++)
2840      /* There can never be a first char if '.' is first, whatever happens about      /* There can never be a first char if '.' is first, whatever happens about
2841      repeats. The value of reqbyte doesn't change either. */      repeats. The value of reqbyte doesn't change either. */
2842    
2843      case '.':      case CHAR_DOT:
2844      if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;      if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
2845      zerofirstbyte = firstbyte;      zerofirstbyte = firstbyte;
2846      zeroreqbyte = reqbyte;      zeroreqbyte = reqbyte;
# Line 2686  for (;; ptr++) Line 2864  for (;; ptr++)
2864      In JavaScript compatibility mode, an isolated ']' causes an error. In      In JavaScript compatibility mode, an isolated ']' causes an error. In
2865      default (Perl) mode, it is treated as a data character. */      default (Perl) mode, it is treated as a data character. */
2866    
2867      case ']':      case CHAR_RIGHT_SQUARE_BRACKET:
2868      if ((cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0)      if ((cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0)
2869        {        {
2870        *errorcodeptr = ERR64;        *errorcodeptr = ERR64;
# Line 2694  for (;; ptr++) Line 2872  for (;; ptr++)
2872        }        }
2873      goto NORMAL_CHAR;      goto NORMAL_CHAR;
2874    
2875      case '[':      case CHAR_LEFT_SQUARE_BRACKET:
2876      previous = code;      previous = code;
2877    
2878      /* PCRE supports POSIX class stuff inside a class. Perl gives an error if      /* PCRE supports POSIX class stuff inside a class. Perl gives an error if
2879      they are encountered at the top level, so we'll do that too. */      they are encountered at the top level, so we'll do that too. */
2880    
2881      if ((ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') &&      if ((ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT ||
2882             ptr[1] == CHAR_EQUALS_SIGN) &&
2883          check_posix_syntax(ptr, &tempptr))          check_posix_syntax(ptr, &tempptr))
2884        {        {
2885        *errorcodeptr = (ptr[1] == ':')? ERR13 : ERR31;        *errorcodeptr = (ptr[1] == CHAR_COLON)? ERR13 : ERR31;
2886        goto FAILED;        goto FAILED;
2887        }        }
2888    
# Line 2715  for (;; ptr++) Line 2894  for (;; ptr++)
2894      for (;;)      for (;;)
2895        {        {
2896        c = *(++ptr);        c = *(++ptr);
2897        if (c == '\\')        if (c == CHAR_BACKSLASH)
2898          {          {
2899          if (ptr[1] == 'E') ptr++;          if (ptr[1] == CHAR_E)
2900            else if (strncmp((const char *)ptr+1, "Q\\E", 3) == 0) ptr += 3;            ptr++;
2901              else break;          else if (strncmp((const char *)ptr+1,
2902                              STR_Q STR_BACKSLASH STR_E, 3) == 0)
2903              ptr += 3;
2904            else
2905              break;
2906          }          }
2907        else if (!negate_class && c == '^')        else if (!negate_class && c == CHAR_CIRCUMFLEX_ACCENT)
2908          negate_class = TRUE;          negate_class = TRUE;
2909        else break;        else break;
2910        }        }
# Line 2731  for (;; ptr++) Line 2914  for (;; ptr++)
2914      that. In JS mode, [] must always fail, so generate OP_FAIL, whereas      that. In JS mode, [] must always fail, so generate OP_FAIL, whereas
2915      [^] must match any character, so generate OP_ALLANY. */      [^] must match any character, so generate OP_ALLANY. */
2916    
2917      if (c ==']' && (cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0)      if (c == CHAR_RIGHT_SQUARE_BRACKET &&
2918            (cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0)
2919        {        {
2920        *code++ = negate_class? OP_ALLANY : OP_FAIL;        *code++ = negate_class? OP_ALLANY : OP_FAIL;
2921        if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;        if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
# Line 2796  for (;; ptr++) Line 2980  for (;; ptr++)
2980    
2981        if (inescq)        if (inescq)
2982          {          {
2983          if (c == '\\' && ptr[1] == 'E')     /* If we are at \E */          if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E)  /* If we are at \E */
2984            {            {
2985            inescq = FALSE;                   /* Reset literal state */            inescq = FALSE;                   /* Reset literal state */
2986            ptr++;                            /* Skip the 'E' */            ptr++;                            /* Skip the 'E' */
# Line 2811  for (;; ptr++) Line 2995  for (;; ptr++)
2995        [.ch.] and [=ch=] ("collating elements") and fault them, as Perl        [.ch.] and [=ch=] ("collating elements") and fault them, as Perl
2996        5.6 and 5.8 do. */        5.6 and 5.8 do. */
2997    
2998        if (c == '[' &&        if (c == CHAR_LEFT_SQUARE_BRACKET &&
2999            (ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') &&            (ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT ||
3000            check_posix_syntax(ptr, &tempptr))             ptr[1] == CHAR_EQUALS_SIGN) && check_posix_syntax(ptr, &tempptr))
3001          {          {
3002          BOOL local_negate = FALSE;          BOOL local_negate = FALSE;
3003          int posix_class, taboffset, tabopt;          int posix_class, taboffset, tabopt;
3004          register const uschar *cbits = cd->cbits;          register const uschar *cbits = cd->cbits;
3005          uschar pbits[32];          uschar pbits[32];
3006    
3007          if (ptr[1] != ':')          if (ptr[1] != CHAR_COLON)
3008            {            {
3009            *errorcodeptr = ERR31;            *errorcodeptr = ERR31;
3010            goto FAILED;            goto FAILED;
3011            }            }
3012    
3013          ptr += 2;          ptr += 2;
3014          if (*ptr == '^')          if (*ptr == CHAR_CIRCUMFLEX_ACCENT)
3015            {            {
3016            local_negate = TRUE;            local_negate = TRUE;
3017            should_flip_negation = TRUE;  /* Note negative special */            should_flip_negation = TRUE;  /* Note negative special */
# Line 2900  for (;; ptr++) Line 3084  for (;; ptr++)
3084        to 'or' into the one we are building. We assume they have more than one        to 'or' into the one we are building. We assume they have more than one
3085        character in them, so set class_charcount bigger than one. */        character in them, so set class_charcount bigger than one. */
3086    
3087        if (c == '\\')        if (c == CHAR_BACKSLASH)
3088          {          {
3089          c = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);          c = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);
3090          if (*errorcodeptr != 0) goto FAILED;          if (*errorcodeptr != 0) goto FAILED;
3091    
3092          if (-c == ESC_b) c = '\b';       /* \b is backspace in a class */          if (-c == ESC_b) c = CHAR_BS;       /* \b is backspace in a class */
3093          else if (-c == ESC_X) c = 'X';   /* \X is literal X in a class */          else if (-c == ESC_X) c = CHAR_X;   /* \X is literal X in a class */
3094          else if (-c == ESC_R) c = 'R';   /* \R is literal R in a class */          else if (-c == ESC_R) c = CHAR_R;   /* \R is literal R in a class */
3095          else if (-c == ESC_Q)            /* Handle start of quoted string */          else if (-c == ESC_Q)            /* Handle start of quoted string */
3096            {            {
3097            if (ptr[1] == '\\' && ptr[2] == 'E')            if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)
3098              {              {
3099              ptr += 2; /* avoid empty string */              ptr += 2; /* avoid empty string */
3100              }              }
# Line 3136  for (;; ptr++) Line 3320  for (;; ptr++)
3320        entirely. The code for handling \Q and \E is messy. */        entirely. The code for handling \Q and \E is messy. */
3321    
3322        CHECK_RANGE:        CHECK_RANGE:
3323        while (ptr[1] == '\\' && ptr[2] == 'E')        while (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)
3324          {          {
3325          inescq = FALSE;          inescq = FALSE;
3326          ptr += 2;          ptr += 2;
# Line 3146  for (;; ptr++) Line 3330  for (;; ptr++)
3330    
3331        /* Remember \r or \n */        /* Remember \r or \n */
3332    
3333        if (c == '\r' || c == '\n') cd->external_flags |= PCRE_HASCRORLF;        if (c == CHAR_CR || c == CHAR_NL) cd->external_flags |= PCRE_HASCRORLF;
3334    
3335        /* Check for range */        /* Check for range */
3336    
3337        if (!inescq && ptr[1] == '-')        if (!inescq && ptr[1] == CHAR_MINUS)
3338          {          {
3339          int d;          int d;
3340          ptr += 2;          ptr += 2;
3341          while (*ptr == '\\' && ptr[1] == 'E') ptr += 2;          while (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_E) ptr += 2;
3342    
3343          /* If we hit \Q (not followed by \E) at this point, go into escaped          /* If we hit \Q (not followed by \E) at this point, go into escaped
3344          mode. */          mode. */
3345    
3346          while (*ptr == '\\' && ptr[1] == 'Q')          while (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_Q)
3347            {            {
3348            ptr += 2;            ptr += 2;
3349            if (*ptr == '\\' && ptr[1] == 'E') { ptr += 2; continue; }            if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_E)
3350                { ptr += 2; continue; }
3351            inescq = TRUE;            inescq = TRUE;
3352            break;            break;
3353            }            }
3354    
3355          if (*ptr == 0 || (!inescq && *ptr == ']'))          if (*ptr == 0 || (!inescq && *ptr == CHAR_RIGHT_SQUARE_BRACKET))
3356            {            {
3357            ptr = oldptr;            ptr = oldptr;
3358            goto LONE_SINGLE_CHARACTER;            goto LONE_SINGLE_CHARACTER;
# Line 3186  for (;; ptr++) Line 3371  for (;; ptr++)
3371          not any of the other escapes. Perl 5.6 treats a hyphen as a literal          not any of the other escapes. Perl 5.6 treats a hyphen as a literal
3372          in such circumstances. */          in such circumstances. */
3373    
3374          if (!inescq && d == '\\')          if (!inescq && d == CHAR_BACKSLASH)
3375            {            {
3376            d = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);            d = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);
3377            if (*errorcodeptr != 0) goto FAILED;            if (*errorcodeptr != 0) goto FAILED;
# Line 3196  for (;; ptr++) Line 3381  for (;; ptr++)
3381    
3382            if (d < 0)            if (d < 0)
3383              {              {
3384              if (d == -ESC_b) d = '\b';              if (d == -ESC_b) d = CHAR_BS;
3385              else if (d == -ESC_X) d = 'X';              else if (d == -ESC_X) d = CHAR_X;
3386              else if (d == -ESC_R) d = 'R'; else              else if (d == -ESC_R) d = CHAR_R; else
3387                {                {
3388                ptr = oldptr;                ptr = oldptr;
3389                goto LONE_SINGLE_CHARACTER;  /* A few lines below */                goto LONE_SINGLE_CHARACTER;  /* A few lines below */
# Line 3219  for (;; ptr++) Line 3404  for (;; ptr++)
3404    
3405          /* Remember \r or \n */          /* Remember \r or \n */
3406    
3407          if (d == '\r' || d == '\n') cd->external_flags |= PCRE_HASCRORLF;          if (d == CHAR_CR || d == CHAR_NL) cd->external_flags |= PCRE_HASCRORLF;
3408    
3409          /* In UTF-8 mode, if the upper limit is > 255, or > 127 for caseless          /* In UTF-8 mode, if the upper limit is > 255, or > 127 for caseless
3410          matching, we have to use an XCLASS with extra data items. Caseless          matching, we have to use an XCLASS with extra data items. Caseless
# Line 3366  for (;; ptr++) Line 3551  for (;; ptr++)
3551    
3552      /* Loop until ']' reached. This "while" is the end of the "do" above. */      /* Loop until ']' reached. This "while" is the end of the "do" above. */
3553    
3554      while ((c = *(++ptr)) != 0 && (c != ']' || inescq));      while ((c = *(++ptr)) != 0 && (c != CHAR_RIGHT_SQUARE_BRACKET || inescq));
3555    
3556      if (c == 0)                          /* Missing terminating ']' */      if (c == 0)                          /* Missing terminating ']' */
3557        {        {
# Line 3511  we set the flag only if there is a liter Line 3696  we set the flag only if there is a liter
3696      /* Various kinds of repeat; '{' is not necessarily a quantifier, but this      /* Various kinds of repeat; '{' is not necessarily a quantifier, but this
3697      has been tested above. */      has been tested above. */
3698    
3699      case '{':      case CHAR_LEFT_CURLY_BRACKET:
3700      if (!is_quantifier) goto NORMAL_CHAR;      if (!is_quantifier) goto NORMAL_CHAR;
3701      ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorcodeptr);      ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorcodeptr);
3702      if (*errorcodeptr != 0) goto FAILED;      if (*errorcodeptr != 0) goto FAILED;
3703      goto REPEAT;      goto REPEAT;
3704    
3705      case '*':      case CHAR_ASTERISK:
3706      repeat_min = 0;      repeat_min = 0;
3707      repeat_max = -1;      repeat_max = -1;
3708      goto REPEAT;      goto REPEAT;
3709    
3710      case '+':      case CHAR_PLUS:
3711      repeat_min = 1;      repeat_min = 1;
3712      repeat_max = -1;      repeat_max = -1;
3713      goto REPEAT;      goto REPEAT;
3714    
3715      case '?':      case CHAR_QUESTION_MARK:
3716      repeat_min = 0;      repeat_min = 0;
3717      repeat_max = 1;      repeat_max = 1;
3718    
# Line 3562  we set the flag only if there is a liter Line 3747  we set the flag only if there is a liter
3747      but if PCRE_UNGREEDY is set, it works the other way round. We change the      but if PCRE_UNGREEDY is set, it works the other way round. We change the
3748      repeat type to the non-default. */      repeat type to the non-default. */
3749    
3750      if (ptr[1] == '+')      if (ptr[1] == CHAR_PLUS)
3751        {        {
3752        repeat_type = 0;                  /* Force greedy */        repeat_type = 0;                  /* Force greedy */
3753        possessive_quantifier = TRUE;        possessive_quantifier = TRUE;
3754        ptr++;        ptr++;
3755        }        }
3756      else if (ptr[1] == '?')      else if (ptr[1] == CHAR_QUESTION_MARK)
3757        {        {
3758        repeat_type = greedy_non_default;        repeat_type = greedy_non_default;
3759        ptr++;        ptr++;
# Line 3683  we set the flag only if there is a liter Line 3868  we set the flag only if there is a liter
3868    
3869        if (repeat_max == 0) goto END_REPEAT;        if (repeat_max == 0) goto END_REPEAT;
3870    
3871          /*--------------------------------------------------------------------*/
3872          /* This code is obsolete from release 8.00; the restriction was finally
3873          removed: */
3874    
3875        /* All real repeats make it impossible to handle partial matching (maybe        /* All real repeats make it impossible to handle partial matching (maybe
3876        one day we will be able to remove this restriction). */        one day we will be able to remove this restriction). */
3877    
3878        if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL;        /* if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL; */
3879          /*--------------------------------------------------------------------*/
3880    
3881        /* Combine the op_type with the repeat_type */        /* Combine the op_type with the repeat_type */
3882    
# Line 3833  we set the flag only if there is a liter Line 4023  we set the flag only if there is a liter
4023          goto END_REPEAT;          goto END_REPEAT;
4024          }          }
4025    
4026          /*--------------------------------------------------------------------*/
4027          /* This code is obsolete from release 8.00; the restriction was finally
4028          removed: */
4029    
4030        /* All real repeats make it impossible to handle partial matching (maybe        /* All real repeats make it impossible to handle partial matching (maybe
4031        one day we will be able to remove this restriction). */        one day we will be able to remove this restriction). */
4032    
4033        if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL;        /* if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL; */
4034          /*--------------------------------------------------------------------*/
4035    
4036        if (repeat_min == 0 && repeat_max == -1)        if (repeat_min == 0 && repeat_max == -1)
4037          *code++ = OP_CRSTAR + repeat_type;          *code++ = OP_CRSTAR + repeat_type;
# Line 4151  we set the flag only if there is a liter Line 4346  we set the flag only if there is a liter
4346      if (possessive_quantifier)      if (possessive_quantifier)
4347        {        {
4348        int len;        int len;
4349        if (*tempcode == OP_EXACT || *tempcode == OP_TYPEEXACT ||  
4350            *tempcode == OP_NOTEXACT)        if (*tempcode == OP_TYPEEXACT)
4351          tempcode += _pcre_OP_lengths[*tempcode] +          tempcode += _pcre_OP_lengths[*tempcode] +
4352            ((*tempcode == OP_TYPEEXACT &&            ((tempcode[3] == OP_PROP || tempcode[3] == OP_NOTPROP)? 2 : 0);
4353               (tempcode[3] == OP_PROP || tempcode[3] == OP_NOTPROP))? 2:0);  
4354          else if (*tempcode == OP_EXACT || *tempcode == OP_NOTEXACT)
4355            {
4356            tempcode += _pcre_OP_lengths[*tempcode];
4357    #ifdef SUPPORT_UTF8
4358            if (utf8 && tempcode[-1] >= 0xc0)
4359              tempcode += _pcre_utf8_table4[tempcode[-1] & 0x3f];
4360    #endif
4361            }
4362    
4363        len = code - tempcode;        len = code - tempcode;
4364        if (len > 0) switch (*tempcode)        if (len > 0) switch (*tempcode)
4365          {          {
# Line 4201  we set the flag only if there is a liter Line 4405  we set the flag only if there is a liter
4405      lookbehind or option setting or condition or all the other extended      lookbehind or option setting or condition or all the other extended
4406      parenthesis forms.  */      parenthesis forms.  */
4407    
4408      case '(':      case CHAR_LEFT_PARENTHESIS:
4409      newoptions = options;      newoptions = options;
4410      skipbytes = 0;      skipbytes = 0;
4411      bravalue = OP_CBRA;      bravalue = OP_CBRA;
# Line 4210  we set the flag only if there is a liter Line 4414  we set the flag only if there is a liter
4414    
4415      /* First deal with various "verbs" that can be introduced by '*'. */      /* First deal with various "verbs" that can be introduced by '*'. */
4416    
4417      if (*(++ptr) == '*' && (cd->ctypes[ptr[1]] & ctype_letter) != 0)      if (*(++ptr) == CHAR_ASTERISK && (cd->ctypes[ptr[1]] & ctype_letter) != 0)
4418        {        {
4419        int i, namelen;        int i, namelen;
4420        const char *vn = verbnames;        const char *vn = verbnames;
4421        const uschar *name = ++ptr;        const uschar *name = ++ptr;
4422        previous = NULL;        previous = NULL;
4423        while ((cd->ctypes[*++ptr] & ctype_letter) != 0);        while ((cd->ctypes[*++ptr] & ctype_letter) != 0) {};
4424        if (*ptr == ':')        if (*ptr == CHAR_COLON)
4425          {          {
4426          *errorcodeptr = ERR59;   /* Not supported */          *errorcodeptr = ERR59;   /* Not supported */
4427          goto FAILED;          goto FAILED;
4428          }          }
4429        if (*ptr != ')')        if (*ptr != CHAR_RIGHT_PARENTHESIS)
4430          {          {
4431          *errorcodeptr = ERR60;          *errorcodeptr = ERR60;
4432          goto FAILED;          goto FAILED;
# Line 4247  we set the flag only if there is a liter Line 4451  we set the flag only if there is a liter
4451      /* Deal with the extended parentheses; all are introduced by '?', and the      /* Deal with the extended parentheses; all are introduced by '?', and the
4452      appearance of any of them means that this is not a capturing group. */      appearance of any of them means that this is not a capturing group. */
4453    
4454      else if (*ptr == '?')      else if (*ptr == CHAR_QUESTION_MARK)
4455        {        {
4456        int i, set, unset, namelen;        int i, set, unset, namelen;
4457        int *optset;        int *optset;
# Line 4256  we set the flag only if there is a liter Line 4460  we set the flag only if there is a liter
4460    
4461        switch (*(++ptr))        switch (*(++ptr))
4462          {          {
4463          case '#':                 /* Comment; skip to ket */          case CHAR_NUMBER_SIGN:                 /* Comment; skip to ket */
4464          ptr++;          ptr++;
4465          while (*ptr != 0 && *ptr != ')') ptr++;          while (*ptr != 0 && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
4466          if (*ptr == 0)          if (*ptr == 0)
4467            {            {
4468            *errorcodeptr = ERR18;            *errorcodeptr = ERR18;
# Line 4268  we set the flag only if there is a liter Line 4472  we set the flag only if there is a liter
4472    
4473    
4474          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4475          case '|':                 /* Reset capture count for each branch */          case CHAR_VERTICAL_LINE:  /* Reset capture count for each branch */
4476          reset_bracount = TRUE;          reset_bracount = TRUE;
4477          /* Fall through */          /* Fall through */
4478    
4479          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4480          case ':':                 /* Non-capturing bracket */          case CHAR_COLON:          /* Non-capturing bracket */
4481          bravalue = OP_BRA;          bravalue = OP_BRA;
4482          ptr++;          ptr++;
4483          break;          break;
4484    
4485    
4486          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4487          case '(':          case CHAR_LEFT_PARENTHESIS:
4488          bravalue = OP_COND;       /* Conditional group */          bravalue = OP_COND;       /* Conditional group */
4489    
4490          /* A condition can be an assertion, a number (referring to a numbered          /* A condition can be an assertion, a number (referring to a numbered
# Line 4300  we set the flag only if there is a liter Line 4504  we set the flag only if there is a liter
4504          the switch. This will take control down to where bracketed groups,          the switch. This will take control down to where bracketed groups,
4505          including assertions, are processed. */          including assertions, are processed. */
4506    
4507          if (ptr[1] == '?' && (ptr[2] == '=' || ptr[2] == '!' || ptr[2] == '<'))          if (ptr[1] == CHAR_QUESTION_MARK && (ptr[2] == CHAR_EQUALS_SIGN ||
4508                ptr[2] == CHAR_EXCLAMATION_MARK || ptr[2] == CHAR_LESS_THAN_SIGN))
4509            break;            break;
4510    
4511          /* Most other conditions use OP_CREF (a couple change to OP_RREF          /* Most other conditions use OP_CREF (a couple change to OP_RREF
# Line 4312  we set the flag only if there is a liter Line 4517  we set the flag only if there is a liter
4517    
4518          /* Check for a test for recursion in a named group. */          /* Check for a test for recursion in a named group. */
4519    
4520          if (ptr[1] == 'R' && ptr[2] == '&')          if (ptr[1] == CHAR_R && ptr[2] == CHAR_AMPERSAND)
4521            {            {
4522            terminator = -1;            terminator = -1;
4523            ptr += 2;            ptr += 2;
# Line 4322  we set the flag only if there is a liter Line 4527  we set the flag only if there is a liter
4527          /* Check for a test for a named group's having been set, using the Perl          /* Check for a test for a named group's having been set, using the Perl
4528          syntax (?(<name>) or (?('name') */          syntax (?(<name>) or (?('name') */
4529    
4530          else if (ptr[1] == '<')          else if (ptr[1] == CHAR_LESS_THAN_SIGN)
4531            {            {
4532            terminator = '>';            terminator = CHAR_GREATER_THAN_SIGN;
4533            ptr++;            ptr++;
4534            }            }
4535          else if (ptr[1] == '\'')          else if (ptr[1] == CHAR_APOSTROPHE)
4536            {            {
4537            terminator = '\'';            terminator = CHAR_APOSTROPHE;
4538            ptr++;            ptr++;
4539            }            }
4540          else          else
4541            {            {
4542            terminator = 0;            terminator = 0;
4543            if (ptr[1] == '-' || ptr[1] == '+') refsign = *(++ptr);            if (ptr[1] == CHAR_MINUS || ptr[1] == CHAR_PLUS) refsign = *(++ptr);
4544            }            }
4545    
4546          /* We now expect to read a name; any thing else is an error */          /* We now expect to read a name; any thing else is an error */
# Line 4355  we set the flag only if there is a liter Line 4560  we set the flag only if there is a liter
4560            {            {
4561            if (recno >= 0)            if (recno >= 0)
4562              recno = ((digitab[*ptr] & ctype_digit) != 0)?              recno = ((digitab[*ptr] & ctype_digit) != 0)?
4563                recno * 10 + *ptr - '0' : -1;                recno * 10 + *ptr - CHAR_0 : -1;
4564            ptr++;            ptr++;
4565            }            }
4566          namelen = ptr - name;          namelen = ptr - name;
4567    
4568          if ((terminator > 0 && *ptr++ != terminator) || *ptr++ != ')')          if ((terminator > 0 && *ptr++ != terminator) ||
4569                *ptr++ != CHAR_RIGHT_PARENTHESIS)
4570            {            {
4571            ptr--;      /* Error offset */            ptr--;      /* Error offset */
4572            *errorcodeptr = ERR26;            *errorcodeptr = ERR26;
# Line 4382  we set the flag only if there is a liter Line 4588  we set the flag only if there is a liter
4588              *errorcodeptr = ERR58;              *errorcodeptr = ERR58;
4589              goto FAILED;              goto FAILED;
4590              }              }
4591            recno = (refsign == '-')?            recno = (refsign == CHAR_MINUS)?
4592              cd->bracount - recno + 1 : recno +cd->bracount;              cd->bracount - recno + 1 : recno +cd->bracount;
4593            if (recno <= 0 || recno > cd->final_bracount)            if (recno <= 0 || recno > cd->final_bracount)
4594              {              {
# Line 4413  we set the flag only if there is a liter Line 4619  we set the flag only if there is a liter
4619    
4620          /* Search the pattern for a forward reference */          /* Search the pattern for a forward reference */
4621    
4622          else if ((i = find_parens(ptr, cd, name, namelen,          else if ((i = find_parens(cd, name, namelen,
4623                          (options & PCRE_EXTENDED) != 0)) > 0)                          (options & PCRE_EXTENDED) != 0)) > 0)
4624            {            {
4625            PUT2(code, 2+LINK_SIZE, i);            PUT2(code, 2+LINK_SIZE, i);
# Line 4434  we set the flag only if there is a liter Line 4640  we set the flag only if there is a liter
4640          /* Check for (?(R) for recursion. Allow digits after R to specify a          /* Check for (?(R) for recursion. Allow digits after R to specify a
4641          specific group number. */          specific group number. */
4642    
4643          else if (*name == 'R')          else if (*name == CHAR_R)
4644            {            {
4645            recno = 0;            recno = 0;
4646            for (i = 1; i < namelen; i++)            for (i = 1; i < namelen; i++)
# Line 4444  we set the flag only if there is a liter Line 4650  we set the flag only if there is a liter
4650                *errorcodeptr = ERR15;                *errorcodeptr = ERR15;
4651                goto FAILED;                goto FAILED;
4652                }                }
4653              recno = recno * 10 + name[i] - '0';              recno = recno * 10 + name[i] - CHAR_0;
4654              }              }
4655            if (recno == 0) recno = RREF_ANY;            if (recno == 0) recno = RREF_ANY;
4656            code[1+LINK_SIZE] = OP_RREF;      /* Change test type */            code[1+LINK_SIZE] = OP_RREF;      /* Change test type */
# Line 4454  we set the flag only if there is a liter Line 4660  we set the flag only if there is a liter
4660          /* Similarly, check for the (?(DEFINE) "condition", which is always          /* Similarly, check for the (?(DEFINE) "condition", which is always
4661          false. */          false. */
4662    
4663          else if (namelen == 6 && strncmp((char *)name, "DEFINE", 6) == 0)          else if (namelen == 6 && strncmp((char *)name, STRING_DEFINE, 6) == 0)
4664            {            {
4665            code[1+LINK_SIZE] = OP_DEF;            code[1+LINK_SIZE] = OP_DEF;
4666            skipbytes = 1;            skipbytes = 1;
# Line 4479  we set the flag only if there is a liter Line 4685  we set the flag only if there is a liter
4685    
4686    
4687          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4688          case '=':                 /* Positive lookahead */          case CHAR_EQUALS_SIGN:                 /* Positive lookahead */
4689          bravalue = OP_ASSERT;          bravalue = OP_ASSERT;
4690          ptr++;          ptr++;
4691          break;          break;
4692    
4693    
4694          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4695          case '!':                 /* Negative lookahead */          case CHAR_EXCLAMATION_MARK:            /* Negative lookahead */
4696          ptr++;          ptr++;
4697          if (*ptr == ')')          /* Optimize (?!) */          if (*ptr == CHAR_RIGHT_PARENTHESIS)    /* Optimize (?!) */
4698            {            {
4699            *code++ = OP_FAIL;            *code++ = OP_FAIL;
4700            previous = NULL;            previous = NULL;
# Line 4499  we set the flag only if there is a liter Line 4705  we set the flag only if there is a liter
4705    
4706    
4707          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4708          case '<':                 /* Lookbehind or named define */          case CHAR_LESS_THAN_SIGN:              /* Lookbehind or named define */
4709          switch (ptr[1])          switch (ptr[1])
4710            {            {
4711            case '=':               /* Positive lookbehind */            case CHAR_EQUALS_SIGN:               /* Positive lookbehind */
4712            bravalue = OP_ASSERTBACK;            bravalue = OP_ASSERTBACK;
4713            ptr += 2;            ptr += 2;
4714            break;            break;
4715    
4716            case '!':               /* Negative lookbehind */            case CHAR_EXCLAMATION_MARK:          /* Negative lookbehind */
4717            bravalue = OP_ASSERTBACK_NOT;            bravalue = OP_ASSERTBACK_NOT;
4718            ptr += 2;            ptr += 2;
4719            break;            break;
# Line 4522  we set the flag only if there is a liter Line 4728  we set the flag only if there is a liter
4728    
4729    
4730          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4731          case '>':                 /* One-time brackets */          case CHAR_GREATER_THAN_SIGN:           /* One-time brackets */
4732          bravalue = OP_ONCE;          bravalue = OP_ONCE;
4733          ptr++;          ptr++;
4734          break;          break;
4735    
4736    
4737          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4738          case 'C':                 /* Callout - may be followed by digits; */          case CHAR_C:                 /* Callout - may be followed by digits; */
4739          previous_callout = code;  /* Save for later completion */          previous_callout = code;  /* Save for later completion */
4740          after_manual_callout = 1; /* Skip one item before completing */          after_manual_callout = 1; /* Skip one item before completing */
4741          *code++ = OP_CALLOUT;          *code++ = OP_CALLOUT;
4742            {            {
4743            int n = 0;            int n = 0;
4744            while ((digitab[*(++ptr)] & ctype_digit) != 0)            while ((digitab[*(++ptr)] & ctype_digit) != 0)
4745              n = n * 10 + *ptr - '0';              n = n * 10 + *ptr - CHAR_0;
4746            if (*ptr != ')')            if (*ptr != CHAR_RIGHT_PARENTHESIS)
4747              {              {
4748              *errorcodeptr = ERR39;              *errorcodeptr = ERR39;
4749              goto FAILED;              goto FAILED;
# Line 4557  we set the flag only if there is a liter Line 4763  we set the flag only if there is a liter
4763    
4764    
4765          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4766          case 'P':                 /* Python-style named subpattern handling */          case CHAR_P:              /* Python-style named subpattern handling */
4767          if (*(++ptr) == '=' || *ptr == '>')  /* Reference or recursion */          if (*(++ptr) == CHAR_EQUALS_SIGN ||
4768                *ptr == CHAR_GREATER_THAN_SIGN)  /* Reference or recursion */
4769            {            {
4770            is_recurse = *ptr == '>';            is_recurse = *ptr == CHAR_GREATER_THAN_SIGN;
4771            terminator = ')';            terminator = CHAR_RIGHT_PARENTHESIS;
4772            goto NAMED_REF_OR_RECURSE;            goto NAMED_REF_OR_RECURSE;
4773            }            }
4774          else if (*ptr != '<')    /* Test for Python-style definition */          else if (*ptr != CHAR_LESS_THAN_SIGN)  /* Test for Python-style defn */
4775            {            {
4776            *errorcodeptr = ERR41;            *errorcodeptr = ERR41;
4777            goto FAILED;            goto FAILED;
# Line 4574  we set the flag only if there is a liter Line 4781  we set the flag only if there is a liter
4781    
4782          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4783          DEFINE_NAME:    /* Come here from (?< handling */          DEFINE_NAME:    /* Come here from (?< handling */
4784          case '\'':          case CHAR_APOSTROPHE:
4785            {            {
4786            terminator = (*ptr == '<')? '>' : '\'';            terminator = (*ptr == CHAR_LESS_THAN_SIGN)?
4787                CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;
4788            name = ++ptr;            name = ++ptr;
4789    
4790            while ((cd->ctypes[*ptr] & ctype_word) != 0) ptr++;            while ((cd->ctypes[*ptr] & ctype_word) != 0) ptr++;
# Line 4650  we set the flag only if there is a liter Line 4858  we set the flag only if there is a liter
4858    
4859    
4860          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4861          case '&':                 /* Perl recursion/subroutine syntax */          case CHAR_AMPERSAND:            /* Perl recursion/subroutine syntax */
4862          terminator = ')';          terminator = CHAR_RIGHT_PARENTHESIS;
4863          is_recurse = TRUE;          is_recurse = TRUE;
4864          /* Fall through */          /* Fall through */
4865    
# Line 4710  we set the flag only if there is a liter Line 4918  we set the flag only if there is a liter
4918              recno = GET2(slot, 0);              recno = GET2(slot, 0);
4919              }              }
4920            else if ((recno =                /* Forward back reference */            else if ((recno =                /* Forward back reference */
4921                      find_parens(ptr, cd, name, namelen,                      find_parens(cd, name, namelen,
4922                        (options & PCRE_EXTENDED) != 0)) <= 0)                        (options & PCRE_EXTENDED) != 0)) <= 0)
4923              {              {
4924              *errorcodeptr = ERR15;              *errorcodeptr = ERR15;
# Line 4726  we set the flag only if there is a liter Line 4934  we set the flag only if there is a liter
4934    
4935    
4936          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4937          case 'R':                 /* Recursion */          case CHAR_R:              /* Recursion */
4938          ptr++;                    /* Same as (?0)      */          ptr++;                    /* Same as (?0)      */
4939          /* Fall through */          /* Fall through */
4940    
4941    
4942          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4943          case '-': case '+':          case CHAR_MINUS: case CHAR_PLUS:  /* Recursion or subroutine */
4944          case '0': case '1': case '2': case '3': case '4':   /* Recursion or */          case CHAR_0: case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4:
4945          case '5': case '6': case '7': case '8': case '9':   /* subroutine */          case CHAR_5: case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9:
4946            {            {
4947            const uschar *called;            const uschar *called;
4948            terminator = ')';            terminator = CHAR_RIGHT_PARENTHESIS;
4949    
4950            /* Come here from the \g<...> and \g'...' code (Oniguruma            /* Come here from the \g<...> and \g'...' code (Oniguruma
4951            compatibility). However, the syntax has been checked to ensure that            compatibility). However, the syntax has been checked to ensure that
# Line 4747  we set the flag only if there is a liter Line 4955  we set the flag only if there is a liter
4955    
4956            HANDLE_NUMERICAL_RECURSION:            HANDLE_NUMERICAL_RECURSION:
4957    
4958            if ((refsign = *ptr) == '+')            if ((refsign = *ptr) == CHAR_PLUS)
4959              {              {
4960              ptr++;              ptr++;
4961              if ((digitab[*ptr] & ctype_digit) == 0)              if ((digitab[*ptr] & ctype_digit) == 0)
# Line 4756  we set the flag only if there is a liter Line 4964  we set the flag only if there is a liter
4964                goto FAILED;                goto FAILED;
4965                }                }
4966              }              }
4967            else if (refsign == '-')            else if (refsign == CHAR_MINUS)
4968              {              {
4969              if ((digitab[ptr[1]] & ctype_digit) == 0)              if ((digitab[ptr[1]] & ctype_digit) == 0)
4970                goto OTHER_CHAR_AFTER_QUERY;                goto OTHER_CHAR_AFTER_QUERY;
# Line 4765  we set the flag only if there is a liter Line 4973  we set the flag only if there is a liter
4973    
4974            recno = 0;            recno = 0;
4975            while((digitab[*ptr] & ctype_digit) != 0)            while((digitab[*ptr] & ctype_digit) != 0)
4976              recno = recno * 10 + *ptr++ - '0';              recno = recno * 10 + *ptr++ - CHAR_0;
4977    
4978            if (*ptr != terminator)            if (*ptr != terminator)
4979              {              {
# Line 4773  we set the flag only if there is a liter Line 4981  we set the flag only if there is a liter
4981              goto FAILED;              goto FAILED;
4982              }              }
4983    
4984            if (refsign == '-')            if (refsign == CHAR_MINUS)
4985              {              {
4986              if (recno == 0)              if (recno == 0)
4987                {                {
# Line 4787  we set the flag only if there is a liter Line 4995  we set the flag only if there is a liter
4995                goto FAILED;                goto FAILED;
4996                }                }
4997              }              }
4998            else if (refsign == '+')            else if (refsign == CHAR_PLUS)
4999              {              {
5000              if (recno == 0)              if (recno == 0)
5001                {                {
# Line 4820  we set the flag only if there is a liter Line 5028  we set the flag only if there is a liter
5028    
5029              if (called == NULL)              if (called == NULL)
5030                {                {
5031                if (find_parens(ptr, cd, NULL, recno,                if (find_parens(cd, NULL, recno,
5032                      (options & PCRE_EXTENDED) != 0) < 0)                      (options & PCRE_EXTENDED) != 0) < 0)
5033                  {                  {
5034                  *errorcodeptr = ERR15;                  *errorcodeptr = ERR15;
# Line 4873  we set the flag only if there is a liter Line 5081  we set the flag only if there is a liter
5081          set = unset = 0;          set = unset = 0;
5082          optset = &set;          optset = &set;
5083    
5084          while (*ptr != ')' && *ptr != ':')          while (*ptr != CHAR_RIGHT_PARENTHESIS && *ptr != CHAR_COLON)
5085            {            {
5086            switch (*ptr++)            switch (*ptr++)
5087              {              {
5088              case '-': optset = &unset; break;              case CHAR_MINUS: optset = &unset; break;
5089    
5090              case 'J':    /* Record that it changed in the external options */              case CHAR_J:    /* Record that it changed in the external options */
5091              *optset |= PCRE_DUPNAMES;              *optset |= PCRE_DUPNAMES;
5092              cd->external_flags |= PCRE_JCHANGED;              cd->external_flags |= PCRE_JCHANGED;
5093              break;              break;
5094    
5095              case 'i': *optset |= PCRE_CASELESS; break;              case CHAR_i: *optset |= PCRE_CASELESS; break;
5096              case 'm': *optset |= PCRE_MULTILINE; break;              case CHAR_m: *optset |= PCRE_MULTILINE; break;
5097              case 's': *optset |= PCRE_DOTALL; break;              case CHAR_s: *optset |= PCRE_DOTALL; break;
5098              case 'x': *optset |= PCRE_EXTENDED; break;              case CHAR_x: *optset |= PCRE_EXTENDED; break;
5099              case 'U': *optset |= PCRE_UNGREEDY; break;              case CHAR_U: *optset |= PCRE_UNGREEDY; break;
5100              case 'X': *optset |= PCRE_EXTRA; break;              case CHAR_X: *optset |= PCRE_EXTRA; break;
5101    
5102              default:  *errorcodeptr = ERR12;              default:  *errorcodeptr = ERR12;
5103                        ptr--;    /* Correct the offset */                        ptr--;    /* Correct the offset */
# Line 4920  we set the flag only if there is a liter Line 5128  we set the flag only if there is a liter
5128          both phases.          both phases.
5129    
5130          If we are not at the pattern start, compile code to change the ims          If we are not at the pattern start, compile code to change the ims
5131          options if this setting actually changes any of them, and reset the          options if this setting actually changes any of them, and reset the
5132          greedy defaults and the case value for firstbyte and reqbyte. */          greedy defaults and the case value for firstbyte and reqbyte. */
5133    
5134          if (*ptr == ')')          if (*ptr == CHAR_RIGHT_PARENTHESIS)
5135            {            {
5136            if (code == cd->start_code + 1 + LINK_SIZE &&            if (code == cd->start_code + 1 + LINK_SIZE &&
5137                 (lengthptr == NULL || *lengthptr == 2 + 2*LINK_SIZE))                 (lengthptr == NULL || *lengthptr == 2 + 2*LINK_SIZE))
# Line 4944  we set the flag only if there is a liter Line 5152  we set the flag only if there is a liter
5152    
5153            /* Change options at this level, and pass them back for use            /* Change options at this level, and pass them back for use
5154            in subsequent branches. When not at the start of the pattern, this            in subsequent branches. When not at the start of the pattern, this
5155            information is also necessary so that a resetting item can be            information is also necessary so that a resetting item can be
5156            compiled at the end of a group (if we are in a group). */            compiled at the end of a group (if we are in a group). */
5157    
5158            *optionsptr = options = newoptions;            *optionsptr = options = newoptions;
# Line 5063  we set the flag only if there is a liter Line 5271  we set the flag only if there is a liter
5271    
5272      /* Error if hit end of pattern */      /* Error if hit end of pattern */
5273    
5274      if (*ptr != ')')      if (*ptr != CHAR_RIGHT_PARENTHESIS)
5275        {        {
5276        *errorcodeptr = ERR14;        *errorcodeptr = ERR14;
5277        goto FAILED;        goto FAILED;
# Line 5161  we set the flag only if there is a liter Line 5369  we set the flag only if there is a liter
5369      We can test for values between ESC_b and ESC_Z for the latter; this may      We can test for values between ESC_b and ESC_Z for the latter; this may
5370      have to change if any new ones are ever created. */      have to change if any new ones are ever created. */
5371    
5372      case '\\':      case CHAR_BACKSLASH:
5373      tempptr = ptr;      tempptr = ptr;
5374      c = check_escape(&ptr, errorcodeptr, cd->bracount, options, FALSE);      c = check_escape(&ptr, errorcodeptr, cd->bracount, options, FALSE);
5375      if (*errorcodeptr != 0) goto FAILED;      if (*errorcodeptr != 0) goto FAILED;
# Line 5170  we set the flag only if there is a liter Line 5378  we set the flag only if there is a liter
5378        {        {
5379        if (-c == ESC_Q)            /* Handle start of quoted string */        if (-c == ESC_Q)            /* Handle start of quoted string */
5380          {          {
5381          if (ptr[1] == '\\' && ptr[2] == 'E') ptr += 2; /* avoid empty string */          if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)
5382            else inescq = TRUE;            ptr += 2;               /* avoid empty string */
5383                else inescq = TRUE;
5384          continue;          continue;
5385          }          }
5386    
# Line 5199  we set the flag only if there is a liter Line 5408  we set the flag only if there is a liter
5408          {          {
5409          const uschar *p;          const uschar *p;
5410          save_hwm = cd->hwm;   /* Normally this is set when '(' is read */          save_hwm = cd->hwm;   /* Normally this is set when '(' is read */
5411          terminator = (*(++ptr) == '<')? '>' : '\'';          terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?
5412              CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;
5413    
5414          /* These two statements stop the compiler for warning about possibly          /* These two statements stop the compiler for warning about possibly
5415          unset variables caused by the jump to HANDLE_NUMERICAL_RECURSION. In          unset variables caused by the jump to HANDLE_NUMERICAL_RECURSION. In
# Line 5211  we set the flag only if there is a liter Line 5421  we set the flag only if there is a liter
5421    
5422          /* Test for a name */          /* Test for a name */
5423    
5424          if (ptr[1] != '+' && ptr[1] != '-')          if (ptr[1] != CHAR_PLUS && ptr[1] != CHAR_MINUS)
5425            {            {
5426            BOOL isnumber = TRUE;            BOOL isnumber = TRUE;
5427            for (p = ptr + 1; *p != 0 && *p != terminator; p++)            for (p = ptr + 1; *p != 0 && *p != terminator; p++)
# Line 5249  we set the flag only if there is a liter Line 5459  we set the flag only if there is a liter
5459        /* \k<name> or \k'name' is a back reference by name (Perl syntax).        /* \k<name> or \k'name' is a back reference by name (Perl syntax).
5460        We also support \k{name} (.NET syntax) */        We also support \k{name} (.NET syntax) */
5461    
5462        if (-c == ESC_k && (ptr[1] == '<' || ptr[1] == '\'' || ptr[1] == '{'))        if (-c == ESC_k && (ptr[1] == CHAR_LESS_THAN_SIGN ||
5463              ptr[1] == CHAR_APOSTROPHE || ptr[1] == CHAR_LEFT_CURLY_BRACKET))
5464          {          {
5465          is_recurse = FALSE;          is_recurse = FALSE;
5466          terminator = (*(++ptr) == '<')? '>' : (*ptr == '\'')? '\'' : '}';          terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?
5467              CHAR_GREATER_THAN_SIGN : (*ptr == CHAR_APOSTROPHE)?
5468              CHAR_APOSTROPHE : CHAR_RIGHT_CURLY_BRACKET;
5469          goto NAMED_REF_OR_RECURSE;          goto NAMED_REF_OR_RECURSE;
5470          }          }
5471    
# Line 5355  we set the flag only if there is a liter Line 5568  we set the flag only if there is a liter
5568    
5569      /* Remember if \r or \n were seen */      /* Remember if \r or \n were seen */
5570    
5571      if (mcbuffer[0] == '\r' || mcbuffer[0] == '\n')      if (mcbuffer[0] == CHAR_CR || mcbuffer[0] == CHAR_NL)
5572        cd->external_flags |= PCRE_HASCRORLF;        cd->external_flags |= PCRE_HASCRORLF;
5573    
5574      /* Set the first and required bytes appropriately. If no previous first      /* Set the first and required bytes appropriately. If no previous first
# Line 5600  for (;;) Line 5813  for (;;)
5813    compile a resetting op-code following, except at the very end of the pattern.    compile a resetting op-code following, except at the very end of the pattern.
5814    Return leaving the pointer at the terminating char. */    Return leaving the pointer at the terminating char. */
5815    
5816    if (*ptr != '|')    if (*ptr != CHAR_VERTICAL_LINE)
5817      {      {
5818      if (lengthptr == NULL)      if (lengthptr == NULL)
5819        {        {
# Line 5623  for (;;) Line 5836  for (;;)
5836    
5837      /* Resetting option if needed */      /* Resetting option if needed */
5838    
5839      if ((options & PCRE_IMS) != oldims && *ptr == ')')      if ((options & PCRE_IMS) != oldims && *ptr == CHAR_RIGHT_PARENTHESIS)
5840        {        {
5841        *code++ = OP_OPT;        *code++ = OP_OPT;
5842        *code++ = oldims;        *code++ = oldims;
# Line 5805  do { Line 6018  do {
6018       NULL, 0, FALSE);       NULL, 0, FALSE);
6019     register int op = *scode;     register int op = *scode;
6020    
6021       /* If we are at the start of a conditional assertion group, *both* the
6022       conditional assertion *and* what follows the condition must satisfy the test
6023       for start of line. Other kinds of condition fail. Note that there may be an
6024       auto-callout at the start of a condition. */
6025    
6026       if (op == OP_COND)
6027         {
6028         scode += 1 + LINK_SIZE;
6029         if (*scode == OP_CALLOUT) scode += _pcre_OP_lengths[OP_CALLOUT];
6030         switch (*scode)
6031           {
6032           case OP_CREF:
6033           case OP_RREF:
6034           case OP_DEF:
6035           return FALSE;
6036    
6037           default:     /* Assertion */
6038           if (!is_startline(scode, bracket_map, backref_map)) return FALSE;
6039           do scode += GET(scode, 1); while (*scode == OP_ALT);
6040           scode += 1 + LINK_SIZE;
6041           break;
6042           }
6043         scode = first_significant_code(scode, NULL, 0, FALSE);
6044         op = *scode;
6045         }
6046    
6047     /* Non-capturing brackets */     /* Non-capturing brackets */
6048    
6049     if (op == OP_BRA)     if (op == OP_BRA)
# Line 5823  do { Line 6062  do {
6062    
6063     /* Other brackets */     /* Other brackets */
6064    
6065     else if (op == OP_ASSERT || op == OP_ONCE || op == OP_COND)     else if (op == OP_ASSERT || op == OP_ONCE)
6066       { if (!is_startline(scode, bracket_map, backref_map)) return FALSE; }       {
6067         if (!is_startline(scode, bracket_map, backref_map)) return FALSE;
6068         }
6069    
6070     /* .* means "start at start or after \n" if it isn't in brackets that     /* .* means "start at start or after \n" if it isn't in brackets that
6071     may be referenced. */     may be referenced. */
# Line 6003  if (erroroffset == NULL) Line 6244  if (erroroffset == NULL)
6244    
6245  *erroroffset = 0;  *erroroffset = 0;
6246    
 /* Can't support UTF8 unless PCRE has been compiled to include the code. */  
   
 #ifdef SUPPORT_UTF8  
 utf8 = (options & PCRE_UTF8) != 0;  
 if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 &&  
      (*erroroffset = _pcre_valid_utf8((uschar *)pattern, -1)) >= 0)  
   {  
   errorcode = ERR44;  
   goto PCRE_EARLY_ERROR_RETURN2;  
   }  
 #else  
 if ((options & PCRE_UTF8) != 0)  
   {  
   errorcode = ERR32;  
   goto PCRE_EARLY_ERROR_RETURN;  
   }  
 #endif  
   
 if ((options & ~PUBLIC_OPTIONS) != 0)  
   {  
   errorcode = ERR17;  
   goto PCRE_EARLY_ERROR_RETURN;  
   }  
   
6247  /* Set up pointers to the individual character tables */  /* Set up pointers to the individual character tables */
6248    
6249  if (tables == NULL) tables = _pcre_default_tables;  if (tables == NULL) tables = _pcre_default_tables;
# Line 6035  cd->fcc = tables + fcc_offset; Line 6252  cd->fcc = tables + fcc_offset;
6252  cd->cbits = tables + cbits_offset;  cd->cbits = tables + cbits_offset;
6253  cd->ctypes = tables + ctypes_offset;  cd->ctypes = tables + ctypes_offset;
6254    
6255    /* Check that all undefined public option bits are zero */
6256    
6257    if ((options & ~PUBLIC_COMPILE_OPTIONS) != 0)
6258      {
6259      errorcode = ERR17;
6260      goto PCRE_EARLY_ERROR_RETURN;
6261      }
6262    
6263  /* Check for global one-time settings at the start of the pattern, and remember  /* Check for global one-time settings at the start of the pattern, and remember
6264  the offset for later. */  the offset for later. */
6265    
6266  while (ptr[skipatstart] == '(' && ptr[skipatstart+1] == '*')  while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&
6267           ptr[skipatstart+1] == CHAR_ASTERISK)
6268    {    {
6269    int newnl = 0;    int newnl = 0;
6270    int newbsr = 0;    int newbsr = 0;
6271    
6272    if (strncmp((char *)(ptr+skipatstart+2), "CR)", 3) == 0)    if (strncmp((char *)(ptr+skipatstart+2), STRING_UTF8_RIGHTPAR, 5) == 0)
6273        { skipatstart += 7; options |= PCRE_UTF8; continue; }
6274    
6275      if (strncmp((char *)(ptr+skipatstart+2), STRING_CR_RIGHTPAR, 3) == 0)
6276      { skipatstart += 5; newnl = PCRE_NEWLINE_CR; }      { skipatstart += 5; newnl = PCRE_NEWLINE_CR; }
6277    else if (strncmp((char *)(ptr+skipatstart+2), "LF)", 3)  == 0)    else if (strncmp((char *)(ptr+skipatstart+2), STRING_LF_RIGHTPAR, 3)  == 0)
6278      { skipatstart += 5; newnl = PCRE_NEWLINE_LF; }      { skipatstart += 5; newnl = PCRE_NEWLINE_LF; }
6279    else if (strncmp((char *)(ptr+skipatstart+2), "CRLF)", 5)  == 0)    else if (strncmp((char *)(ptr+skipatstart+2), STRING_CRLF_RIGHTPAR, 5)  == 0)
6280      { skipatstart += 7; newnl = PCRE_NEWLINE_CR + PCRE_NEWLINE_LF; }      { skipatstart += 7; newnl = PCRE_NEWLINE_CR + PCRE_NEWLINE_LF; }
6281    else if (strncmp((char *)(ptr+skipatstart+2), "ANY)", 4) == 0)    else if (strncmp((char *)(ptr+skipatstart+2), STRING_ANY_RIGHTPAR, 4) == 0)
6282      { skipatstart += 6; newnl = PCRE_NEWLINE_ANY; }      { skipatstart += 6; newnl = PCRE_NEWLINE_ANY; }
6283    else if (strncmp((char *)(ptr+skipatstart+2), "ANYCRLF)", 8)  == 0)    else if (strncmp((char *)(ptr+skipatstart+2), STRING_ANYCRLF_RIGHTPAR, 8) == 0)
6284      { skipatstart += 10; newnl = PCRE_NEWLINE_ANYCRLF; }      { skipatstart += 10; newnl = PCRE_NEWLINE_ANYCRLF; }
6285    
6286    else if (strncmp((char *)(ptr+skipatstart+2), "BSR_ANYCRLF)", 12) == 0)    else if (strncmp((char *)(ptr+skipatstart+2), STRING_BSR_ANYCRLF_RIGHTPAR, 12) == 0)
6287      { skipatstart += 14; newbsr = PCRE_BSR_ANYCRLF; }      { skipatstart += 14; newbsr = PCRE_BSR_ANYCRLF; }
6288    else if (strncmp((char *)(ptr+skipatstart+2), "BSR_UNICODE)", 12) == 0)    else if (strncmp((char *)(ptr+skipatstart+2), STRING_BSR_UNICODE_RIGHTPAR, 12) == 0)
6289      { skipatstart += 14; newbsr = PCRE_BSR_UNICODE; }      { skipatstart += 14; newbsr = PCRE_BSR_UNICODE; }
6290    
6291    if (newnl != 0)    if (newnl != 0)
# Line 6066  while (ptr[skipatstart] == '(' && ptr[sk Line 6295  while (ptr[skipatstart] == '(' && ptr[sk
6295    else break;    else break;
6296    }    }
6297    
6298    /* Can't support UTF8 unless PCRE has been compiled to include the code. */
6299    
6300    #ifdef SUPPORT_UTF8
6301    utf8 = (options & PCRE_UTF8) != 0;
6302    if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 &&
6303         (*erroroffset = _pcre_valid_utf8((uschar *)pattern, -1)) >= 0)
6304      {
6305      errorcode = ERR44;
6306      goto PCRE_EARLY_ERROR_RETURN2;
6307      }
6308    #else
6309    if ((options & PCRE_UTF8) != 0)
6310      {
6311      errorcode = ERR32;
6312      goto PCRE_EARLY_ERROR_RETURN;
6313      }
6314    #endif
6315    
6316  /* Check validity of \R options. */  /* Check validity of \R options. */
6317    
6318  switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))  switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
# Line 6084  current code allows for fixed one- or tw Line 6331  current code allows for fixed one- or tw
6331  switch (options & PCRE_NEWLINE_BITS)  switch (options & PCRE_NEWLINE_BITS)
6332    {    {
6333    case 0: newline = NEWLINE; break;   /* Build-time default */    case 0: newline = NEWLINE; break;   /* Build-time default */
6334    case PCRE_NEWLINE_CR: newline = '\r'; break;    case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
6335    case PCRE_NEWLINE_LF: newline = '\n'; break;    case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
6336    case PCRE_NEWLINE_CR+    case PCRE_NEWLINE_CR+
6337         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;         PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
6338    case PCRE_NEWLINE_ANY: newline = -1; break;    case PCRE_NEWLINE_ANY: newline = -1; break;
6339    case PCRE_NEWLINE_ANYCRLF: newline = -2; break;    case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
6340    default: errorcode = ERR56; goto PCRE_EARLY_ERROR_RETURN;    default: errorcode = ERR56; goto PCRE_EARLY_ERROR_RETURN;

Legend:
Removed from v.360  
changed lines
  Added in v.426

  ViewVC Help
Powered by ViewVC 1.1.5