/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 334 by ph10, Fri Apr 11 15:48:14 2008 UTC revision 426 by ph10, Wed Aug 26 15:38:32 2009 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2008 University of Cambridge             Copyright (c) 1997-2009 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 97  are simple data values; negative values Line 97  are simple data values; negative values
97  on. Zero means further processing is needed (for things like \x), or the escape  on. Zero means further processing is needed (for things like \x), or the escape
98  is invalid. */  is invalid. */
99    
100  #ifndef EBCDIC  /* This is the "normal" table for ASCII systems */  #ifndef EBCDIC
101    
102    /* This is the "normal" table for ASCII systems or for EBCDIC systems running
103    in UTF-8 mode. */
104    
105  static const short int escapes[] = {  static const short int escapes[] = {
106       0,      0,      0,      0,      0,      0,      0,      0,   /* 0 - 7 */       0,                       0,
107       0,      0,    ':',    ';',    '<',    '=',    '>',    '?',   /* 8 - ? */       0,                       0,
108     '@', -ESC_A, -ESC_B, -ESC_C, -ESC_D, -ESC_E,      0, -ESC_G,   /* @ - G */       0,                       0,
109  -ESC_H,      0,      0, -ESC_K,      0,      0,      0,      0,   /* H - O */       0,                       0,
110  -ESC_P, -ESC_Q, -ESC_R, -ESC_S,      0,      0, -ESC_V, -ESC_W,   /* P - W */       0,                       0,
111  -ESC_X,      0, -ESC_Z,    '[',   '\\',    ']',    '^',    '_',   /* X - _ */       CHAR_COLON,              CHAR_SEMICOLON,
112     '`',      7, -ESC_b,      0, -ESC_d,  ESC_e,  ESC_f,      0,   /* ` - g */       CHAR_LESS_THAN_SIGN,     CHAR_EQUALS_SIGN,
113  -ESC_h,      0,      0, -ESC_k,      0,      0,  ESC_n,      0,   /* h - o */       CHAR_GREATER_THAN_SIGN,  CHAR_QUESTION_MARK,
114  -ESC_p,      0,  ESC_r, -ESC_s,  ESC_tee,    0, -ESC_v, -ESC_w,   /* p - w */       CHAR_COMMERCIAL_AT,      -ESC_A,
115       0,      0, -ESC_z                                            /* x - z */       -ESC_B,                  -ESC_C,
116         -ESC_D,                  -ESC_E,
117         0,                       -ESC_G,
118         -ESC_H,                  0,
119         0,                       -ESC_K,
120         0,                       0,
121         0,                       0,
122         -ESC_P,                  -ESC_Q,
123         -ESC_R,                  -ESC_S,
124         0,                       0,
125         -ESC_V,                  -ESC_W,
126         -ESC_X,                  0,
127         -ESC_Z,                  CHAR_LEFT_SQUARE_BRACKET,
128         CHAR_BACKSLASH,          CHAR_RIGHT_SQUARE_BRACKET,
129         CHAR_CIRCUMFLEX_ACCENT,  CHAR_UNDERSCORE,
130         CHAR_GRAVE_ACCENT,       7,
131         -ESC_b,                  0,
132         -ESC_d,                  ESC_e,
133         ESC_f,                   0,
134         -ESC_h,                  0,
135         0,                       -ESC_k,
136         0,                       0,
137         ESC_n,                   0,
138         -ESC_p,                  0,
139         ESC_r,                   -ESC_s,
140         ESC_tee,                 0,
141         -ESC_v,                  -ESC_w,
142         0,                       0,
143         -ESC_z
144  };  };
145    
146  #else           /* This is the "abnormal" table for EBCDIC systems */  #else
147    
148    /* This is the "abnormal" table for EBCDIC systems without UTF-8 support. */
149    
150  static const short int escapes[] = {  static const short int escapes[] = {
151  /*  48 */     0,     0,      0,     '.',    '<',   '(',    '+',    '|',  /*  48 */     0,     0,      0,     '.',    '<',   '(',    '+',    '|',
152  /*  50 */   '&',     0,      0,       0,      0,     0,      0,      0,  /*  50 */   '&',     0,      0,       0,      0,     0,      0,      0,
# Line 142  static const short int escapes[] = { Line 177  static const short int escapes[] = {
177    
178  /* Table of special "verbs" like (*PRUNE). This is a short table, so it is  /* Table of special "verbs" like (*PRUNE). This is a short table, so it is
179  searched linearly. Put all the names into a single string, in order to reduce  searched linearly. Put all the names into a single string, in order to reduce
180  the number of relocations when a shared library is dynamically linked. */  the number of relocations when a shared library is dynamically linked. The
181    string is built from string macros so that it works in UTF-8 mode on EBCDIC
182    platforms. */
183    
184  typedef struct verbitem {  typedef struct verbitem {
185    int   len;    int   len;
# Line 150  typedef struct verbitem { Line 187  typedef struct verbitem {
187  } verbitem;  } verbitem;
188    
189  static const char verbnames[] =  static const char verbnames[] =
190    "ACCEPT\0"    STRING_ACCEPT0
191    "COMMIT\0"    STRING_COMMIT0
192    "F\0"    STRING_F0
193    "FAIL\0"    STRING_FAIL0
194    "PRUNE\0"    STRING_PRUNE0
195    "SKIP\0"    STRING_SKIP0
196    "THEN";    STRING_THEN;
197    
198  static const verbitem verbs[] = {  static const verbitem verbs[] = {
199    { 6, OP_ACCEPT },    { 6, OP_ACCEPT },
# Line 178  length entry. The first three must be al Line 215  length entry. The first three must be al
215  for handling case independence. */  for handling case independence. */
216    
217  static const char posix_names[] =  static const char posix_names[] =
218    "alpha\0"  "lower\0"  "upper\0"  "alnum\0"  "ascii\0"  "blank\0"    STRING_alpha0 STRING_lower0 STRING_upper0 STRING_alnum0
219    "cntrl\0"  "digit\0"  "graph\0"  "print\0"  "punct\0"  "space\0"    STRING_ascii0 STRING_blank0 STRING_cntrl0 STRING_digit0
220    "word\0"   "xdigit";    STRING_graph0 STRING_print0 STRING_punct0 STRING_space0
221      STRING_word0  STRING_xdigit;
222    
223  static const uschar posix_name_lengths[] = {  static const uschar posix_name_lengths[] = {
224    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 };    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 };
# Line 302  static const char error_texts[] = Line 340  static const char error_texts[] =
340    "(*VERB) not recognized\0"    "(*VERB) not recognized\0"
341    "number is too big\0"    "number is too big\0"
342    "subpattern name expected\0"    "subpattern name expected\0"
343    "digit expected after (?+";    "digit expected after (?+\0"
344      "] is an invalid data character in JavaScript compatibility mode";
345    
346    
347  /* Table to identify digits and hex digits. This is used when compiling  /* Table to identify digits and hex digits. This is used when compiling
# Line 321  For convenience, we use the same bit def Line 360  For convenience, we use the same bit def
360    
361  Then we can use ctype_digit and ctype_xdigit in the code. */  Then we can use ctype_digit and ctype_xdigit in the code. */
362    
363  #ifndef EBCDIC  /* This is the "normal" case, for ASCII systems */  #ifndef EBCDIC
364    
365    /* This is the "normal" case, for ASCII systems, and EBCDIC systems running in
366    UTF-8 mode. */
367    
368  static const unsigned char digitab[] =  static const unsigned char digitab[] =
369    {    {
370    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
# Line 357  static const unsigned char digitab[] = Line 400  static const unsigned char digitab[] =
400    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
401    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
402    
403  #else           /* This is the "abnormal" case, for EBCDIC systems */  #else
404    
405    /* This is the "abnormal" case, for EBCDIC systems not running in UTF-8 mode. */
406    
407  static const unsigned char digitab[] =  static const unsigned char digitab[] =
408    {    {
409    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7  0 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7  0 */
# Line 454  static const char * Line 500  static const char *
500  find_error_text(int n)  find_error_text(int n)
501  {  {
502  const char *s = error_texts;  const char *s = error_texts;
503  for (; n > 0; n--) while (*s++ != 0);  for (; n > 0; n--) while (*s++ != 0) {};
504  return s;  return s;
505  }  }
506    
# Line 502  if (c == 0) *errorcodeptr = ERR1; Line 548  if (c == 0) *errorcodeptr = ERR1;
548  in a table. A non-zero result is something that can be returned immediately.  in a table. A non-zero result is something that can be returned immediately.
549  Otherwise further processing may be required. */  Otherwise further processing may be required. */
550    
551  #ifndef EBCDIC  /* ASCII coding */  #ifndef EBCDIC  /* ASCII/UTF-8 coding */
552  else if (c < '0' || c > 'z') {}                           /* Not alphanumeric */  else if (c < CHAR_0 || c > CHAR_z) {}                     /* Not alphanumeric */
553  else if ((i = escapes[c - '0']) != 0) c = i;  else if ((i = escapes[c - CHAR_0]) != 0) c = i;
554    
555  #else           /* EBCDIC coding */  #else           /* EBCDIC coding */
556  else if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {}   /* Not alphanumeric */  else if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {}   /* Not alphanumeric */
# Line 523  else Line 569  else
569      /* A number of Perl escapes are not handled by PCRE. We give an explicit      /* A number of Perl escapes are not handled by PCRE. We give an explicit
570      error. */      error. */
571    
572      case 'l':      case CHAR_l:
573      case 'L':      case CHAR_L:
574      case 'N':      case CHAR_N:
575      case 'u':      case CHAR_u:
576      case 'U':      case CHAR_U:
577      *errorcodeptr = ERR37;      *errorcodeptr = ERR37;
578      break;      break;
579    
580      /* \g must be followed by one of a number of specific things:      /* \g must be followed by one of a number of specific things:
581    
582      (1) A number, either plain or braced. If positive, it is an absolute      (1) A number, either plain or braced. If positive, it is an absolute
583      backreference. If negative, it is a relative backreference. This is a Perl      backreference. If negative, it is a relative backreference. This is a Perl
584      5.10 feature.      5.10 feature.
585    
586      (2) Perl 5.10 also supports \g{name} as a reference to a named group. This      (2) Perl 5.10 also supports \g{name} as a reference to a named group. This
587      is part of Perl's movement towards a unified syntax for back references. As      is part of Perl's movement towards a unified syntax for back references. As
588      this is synonymous with \k{name}, we fudge it up by pretending it really      this is synonymous with \k{name}, we fudge it up by pretending it really
589      was \k.      was \k.
590    
591      (3) For Oniguruma compatibility we also support \g followed by a name or a      (3) For Oniguruma compatibility we also support \g followed by a name or a
592      number either in angle brackets or in single quotes. However, these are      number either in angle brackets or in single quotes. However, these are
593      (possibly recursive) subroutine calls, _not_ backreferences. Just return      (possibly recursive) subroutine calls, _not_ backreferences. Just return
594      the -ESC_g code (cf \k). */      the -ESC_g code (cf \k). */
595    
596      case 'g':      case CHAR_g:
597      if (ptr[1] == '<' || ptr[1] == '\'')      if (ptr[1] == CHAR_LESS_THAN_SIGN || ptr[1] == CHAR_APOSTROPHE)
598        {        {
599        c = -ESC_g;        c = -ESC_g;
600        break;        break;
601        }        }
602    
603      /* Handle the Perl-compatible cases */      /* Handle the Perl-compatible cases */
604    
605      if (ptr[1] == '{')      if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)
606        {        {
607        const uschar *p;        const uschar *p;
608        for (p = ptr+2; *p != 0 && *p != '}'; p++)        for (p = ptr+2; *p != 0 && *p != CHAR_RIGHT_CURLY_BRACKET; p++)
609          if (*p != '-' && (digitab[*p] & ctype_digit) == 0) break;          if (*p != CHAR_MINUS && (digitab[*p] & ctype_digit) == 0) break;
610        if (*p != 0 && *p != '}')        if (*p != 0 && *p != CHAR_RIGHT_CURLY_BRACKET)
611          {          {
612          c = -ESC_k;          c = -ESC_k;
613          break;          break;
# Line 571  else Line 617  else
617        }        }
618      else braced = FALSE;      else braced = FALSE;
619    
620      if (ptr[1] == '-')      if (ptr[1] == CHAR_MINUS)
621        {        {
622        negated = TRUE;        negated = TRUE;
623        ptr++;        ptr++;
# Line 580  else Line 626  else
626    
627      c = 0;      c = 0;
628      while ((digitab[ptr[1]] & ctype_digit) != 0)      while ((digitab[ptr[1]] & ctype_digit) != 0)
629        c = c * 10 + *(++ptr) - '0';        c = c * 10 + *(++ptr) - CHAR_0;
630    
631      if (c < 0)   /* Integer overflow */      if (c < 0)   /* Integer overflow */
632        {        {
633        *errorcodeptr = ERR61;        *errorcodeptr = ERR61;
634        break;        break;
635        }        }
636    
637      if (braced && *(++ptr) != '}')      if (braced && *(++ptr) != CHAR_RIGHT_CURLY_BRACKET)
638        {        {
639        *errorcodeptr = ERR57;        *errorcodeptr = ERR57;
640        break;        break;
641        }        }
642    
643      if (c == 0)      if (c == 0)
644        {        {
645        *errorcodeptr = ERR58;        *errorcodeptr = ERR58;
646        break;        break;
647        }        }
648    
649      if (negated)      if (negated)
650        {        {
# Line 625  else Line 671  else
671      value is greater than 377, the least significant 8 bits are taken. Inside a      value is greater than 377, the least significant 8 bits are taken. Inside a
672      character class, \ followed by a digit is always an octal number. */      character class, \ followed by a digit is always an octal number. */
673    
674      case '1': case '2': case '3': case '4': case '5':      case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4: case CHAR_5:
675      case '6': case '7': case '8': case '9':      case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9:
676    
677      if (!isclass)      if (!isclass)
678        {        {
679        oldptr = ptr;        oldptr = ptr;
680        c -= '0';        c -= CHAR_0;
681        while ((digitab[ptr[1]] & ctype_digit) != 0)        while ((digitab[ptr[1]] & ctype_digit) != 0)
682          c = c * 10 + *(++ptr) - '0';          c = c * 10 + *(++ptr) - CHAR_0;
683        if (c < 0)    /* Integer overflow */        if (c < 0)    /* Integer overflow */
684          {          {
685          *errorcodeptr = ERR61;          *errorcodeptr = ERR61;
# Line 651  else Line 697  else
697      generates a binary zero byte and treats the digit as a following literal.      generates a binary zero byte and treats the digit as a following literal.
698      Thus we have to pull back the pointer by one. */      Thus we have to pull back the pointer by one. */
699    
700      if ((c = *ptr) >= '8')      if ((c = *ptr) >= CHAR_8)
701        {        {
702        ptr--;        ptr--;
703        c = 0;        c = 0;
# Line 664  else Line 710  else
710      to do). Nowadays we allow for larger numbers in UTF-8 mode, but no more      to do). Nowadays we allow for larger numbers in UTF-8 mode, but no more
711      than 3 octal digits. */      than 3 octal digits. */
712    
713      case '0':      case CHAR_0:
714      c -= '0';      c -= CHAR_0;
715      while(i++ < 2 && ptr[1] >= '0' && ptr[1] <= '7')      while(i++ < 2 && ptr[1] >= CHAR_0 && ptr[1] <= CHAR_7)
716          c = c * 8 + *(++ptr) - '0';          c = c * 8 + *(++ptr) - CHAR_0;
717      if (!utf8 && c > 255) *errorcodeptr = ERR51;      if (!utf8 && c > 255) *errorcodeptr = ERR51;
718      break;      break;
719    
# Line 675  else Line 721  else
721      than 0xff in utf8 mode, but only if the ddd are hex digits. If not, { is      than 0xff in utf8 mode, but only if the ddd are hex digits. If not, { is
722      treated as a data character. */      treated as a data character. */
723    
724      case 'x':      case CHAR_x:
725      if (ptr[1] == '{')      if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)
726        {        {
727        const uschar *pt = ptr + 2;        const uschar *pt = ptr + 2;
728        int count = 0;        int count = 0;
# Line 685  else Line 731  else
731        while ((digitab[*pt] & ctype_xdigit) != 0)        while ((digitab[*pt] & ctype_xdigit) != 0)
732          {          {
733          register int cc = *pt++;          register int cc = *pt++;
734          if (c == 0 && cc == '0') continue;     /* Leading zeroes */          if (c == 0 && cc == CHAR_0) continue;     /* Leading zeroes */
735          count++;          count++;
736    
737  #ifndef EBCDIC  /* ASCII coding */  #ifndef EBCDIC  /* ASCII/UTF-8 coding */
738          if (cc >= 'a') cc -= 32;               /* Convert to upper case */          if (cc >= CHAR_a) cc -= 32;               /* Convert to upper case */
739          c = (c << 4) + cc - ((cc < 'A')? '0' : ('A' - 10));          c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
740  #else           /* EBCDIC coding */  #else           /* EBCDIC coding */
741          if (cc >= 'a' && cc <= 'z') cc += 64;  /* Convert to upper case */          if (cc >= CHAR_a && cc <= CHAR_z) cc += 64;  /* Convert to upper case */
742          c = (c << 4) + cc - ((cc >= '0')? '0' : ('A' - 10));          c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
743  #endif  #endif
744          }          }
745    
746        if (*pt == '}')        if (*pt == CHAR_RIGHT_CURLY_BRACKET)
747          {          {
748          if (c < 0 || count > (utf8? 8 : 2)) *errorcodeptr = ERR34;          if (c < 0 || count > (utf8? 8 : 2)) *errorcodeptr = ERR34;
749          ptr = pt;          ptr = pt;
# Line 713  else Line 759  else
759      c = 0;      c = 0;
760      while (i++ < 2 && (digitab[ptr[1]] & ctype_xdigit) != 0)      while (i++ < 2 && (digitab[ptr[1]] & ctype_xdigit) != 0)
761        {        {
762        int cc;                               /* Some compilers don't like ++ */        int cc;                                  /* Some compilers don't like */
763        cc = *(++ptr);                        /* in initializers */        cc = *(++ptr);                           /* ++ in initializers */
764  #ifndef EBCDIC  /* ASCII coding */  #ifndef EBCDIC  /* ASCII/UTF-8 coding */
765        if (cc >= 'a') cc -= 32;              /* Convert to upper case */        if (cc >= CHAR_a) cc -= 32;              /* Convert to upper case */
766        c = c * 16 + cc - ((cc < 'A')? '0' : ('A' - 10));        c = c * 16 + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
767  #else           /* EBCDIC coding */  #else           /* EBCDIC coding */
768        if (cc <= 'z') cc += 64;              /* Convert to upper case */        if (cc <= CHAR_z) cc += 64;              /* Convert to upper case */
769        c = c * 16 + cc - ((cc >= '0')? '0' : ('A' - 10));        c = c * 16 + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
770  #endif  #endif
771        }        }
772      break;      break;
# Line 729  else Line 775  else
775      This coding is ASCII-specific, but then the whole concept of \cx is      This coding is ASCII-specific, but then the whole concept of \cx is
776      ASCII-specific. (However, an EBCDIC equivalent has now been added.) */      ASCII-specific. (However, an EBCDIC equivalent has now been added.) */
777    
778      case 'c':      case CHAR_c:
779      c = *(++ptr);      c = *(++ptr);
780      if (c == 0)      if (c == 0)
781        {        {
# Line 737  else Line 783  else
783        break;        break;
784        }        }
785    
786  #ifndef EBCDIC  /* ASCII coding */  #ifndef EBCDIC  /* ASCII/UTF-8 coding */
787      if (c >= 'a' && c <= 'z') c -= 32;      if (c >= CHAR_a && c <= CHAR_z) c -= 32;
788      c ^= 0x40;      c ^= 0x40;
789  #else           /* EBCDIC coding */  #else           /* EBCDIC coding */
790      if (c >= 'a' && c <= 'z') c += 64;      if (c >= CHAR_a && c <= CHAR_z) c += 64;
791      c ^= 0xC0;      c ^= 0xC0;
792  #endif  #endif
793      break;      break;
# Line 803  if (c == 0) goto ERROR_RETURN; Line 849  if (c == 0) goto ERROR_RETURN;
849  /* \P or \p can be followed by a name in {}, optionally preceded by ^ for  /* \P or \p can be followed by a name in {}, optionally preceded by ^ for
850  negation. */  negation. */
851    
852  if (c == '{')  if (c == CHAR_LEFT_CURLY_BRACKET)
853    {    {
854    if (ptr[1] == '^')    if (ptr[1] == CHAR_CIRCUMFLEX_ACCENT)
855      {      {
856      *negptr = TRUE;      *negptr = TRUE;
857      ptr++;      ptr++;
# Line 814  if (c == '{') Line 860  if (c == '{')
860      {      {
861      c = *(++ptr);      c = *(++ptr);
862      if (c == 0) goto ERROR_RETURN;      if (c == 0) goto ERROR_RETURN;
863      if (c == '}') break;      if (c == CHAR_RIGHT_CURLY_BRACKET) break;
864      name[i] = c;      name[i] = c;
865      }      }
866    if (c !='}') goto ERROR_RETURN;    if (c != CHAR_RIGHT_CURLY_BRACKET) goto ERROR_RETURN;
867    name[i] = 0;    name[i] = 0;
868    }    }
869    
# Line 882  is_counted_repeat(const uschar *p) Line 928  is_counted_repeat(const uschar *p)
928  {  {
929  if ((digitab[*p++] & ctype_digit) == 0) return FALSE;  if ((digitab[*p++] & ctype_digit) == 0) return FALSE;
930  while ((digitab[*p] & ctype_digit) != 0) p++;  while ((digitab[*p] & ctype_digit) != 0) p++;
931  if (*p == '}') return TRUE;  if (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE;
932    
933  if (*p++ != ',') return FALSE;  if (*p++ != CHAR_COMMA) return FALSE;
934  if (*p == '}') return TRUE;  if (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE;
935    
936  if ((digitab[*p++] & ctype_digit) == 0) return FALSE;  if ((digitab[*p++] & ctype_digit) == 0) return FALSE;
937  while ((digitab[*p] & ctype_digit) != 0) p++;  while ((digitab[*p] & ctype_digit) != 0) p++;
938    
939  return (*p == '}');  return (*p == CHAR_RIGHT_CURLY_BRACKET);
940  }  }
941    
942    
# Line 923  int max = -1; Line 969  int max = -1;
969  /* Read the minimum value and do a paranoid check: a negative value indicates  /* Read the minimum value and do a paranoid check: a negative value indicates
970  an integer overflow. */  an integer overflow. */
971    
972  while ((digitab[*p] & ctype_digit) != 0) min = min * 10 + *p++ - '0';  while ((digitab[*p] & ctype_digit) != 0) min = min * 10 + *p++ - CHAR_0;
973  if (min < 0 || min > 65535)  if (min < 0 || min > 65535)
974    {    {
975    *errorcodeptr = ERR5;    *errorcodeptr = ERR5;
# Line 933  if (min < 0 || min > 65535) Line 979  if (min < 0 || min > 65535)
979  /* Read the maximum value if there is one, and again do a paranoid on its size.  /* Read the maximum value if there is one, and again do a paranoid on its size.
980  Also, max must not be less than min. */  Also, max must not be less than min. */
981    
982  if (*p == '}') max = min; else  if (*p == CHAR_RIGHT_CURLY_BRACKET) max = min; else
983    {    {
984    if (*(++p) != '}')    if (*(++p) != CHAR_RIGHT_CURLY_BRACKET)
985      {      {
986      max = 0;      max = 0;
987      while((digitab[*p] & ctype_digit) != 0) max = max * 10 + *p++ - '0';      while((digitab[*p] & ctype_digit) != 0) max = max * 10 + *p++ - CHAR_0;
988      if (max < 0 || max > 65535)      if (max < 0 || max > 65535)
989        {        {
990        *errorcodeptr = ERR5;        *errorcodeptr = ERR5;
# Line 963  return p; Line 1009  return p;
1009    
1010    
1011  /*************************************************  /*************************************************
1012  *       Find forward referenced subpattern       *  *  Subroutine for finding forward reference      *
1013  *************************************************/  *************************************************/
1014    
1015  /* This function scans along a pattern's text looking for capturing  /* This recursive function is called only from find_parens() below. The
1016    top-level call starts at the beginning of the pattern. All other calls must
1017    start at a parenthesis. It scans along a pattern's text looking for capturing
1018  subpatterns, and counting them. If it finds a named pattern that matches the  subpatterns, and counting them. If it finds a named pattern that matches the
1019  name it is given, it returns its number. Alternatively, if the name is NULL, it  name it is given, it returns its number. Alternatively, if the name is NULL, it
1020  returns when it reaches a given numbered subpattern. This is used for forward  returns when it reaches a given numbered subpattern. We know that if (?P< is
1021  references to subpatterns. We know that if (?P< is encountered, the name will  encountered, the name will be terminated by '>' because that is checked in the
1022  be terminated by '>' because that is checked in the first pass.  first pass. Recursion is used to keep track of subpatterns that reset the
1023    capturing group numbers - the (?| feature.
1024    
1025  Arguments:  Arguments:
1026    ptr          current position in the pattern    ptrptr       address of the current character pointer (updated)
1027    count        current count of capturing parens so far encountered    cd           compile background data
1028    name         name to seek, or NULL if seeking a numbered subpattern    name         name to seek, or NULL if seeking a numbered subpattern
1029    lorn         name length, or subpattern number if name is NULL    lorn         name length, or subpattern number if name is NULL
1030    xmode        TRUE if we are in /x mode    xmode        TRUE if we are in /x mode
1031      count        pointer to the current capturing subpattern number (updated)
1032    
1033  Returns:       the number of the named subpattern, or -1 if not found  Returns:       the number of the named subpattern, or -1 if not found
1034  */  */
1035    
1036  static int  static int
1037  find_parens(const uschar *ptr, int count, const uschar *name, int lorn,  find_parens_sub(uschar **ptrptr, compile_data *cd, const uschar *name, int lorn,
1038    BOOL xmode)    BOOL xmode, int *count)
1039  {  {
1040  const uschar *thisname;  uschar *ptr = *ptrptr;
1041    int start_count = *count;
1042    int hwm_count = start_count;
1043    BOOL dup_parens = FALSE;
1044    
1045  for (; *ptr != 0; ptr++)  /* If the first character is a parenthesis, check on the type of group we are
1046    dealing with. The very first call may not start with a parenthesis. */
1047    
1048    if (ptr[0] == CHAR_LEFT_PARENTHESIS)
1049    {    {
1050    int term;    if (ptr[1] == CHAR_QUESTION_MARK &&
1051          ptr[2] == CHAR_VERTICAL_LINE)
1052        {
1053        ptr += 3;
1054        dup_parens = TRUE;
1055        }
1056    
1057      /* Handle a normal, unnamed capturing parenthesis */
1058    
1059      else if (ptr[1] != CHAR_QUESTION_MARK && ptr[1] != CHAR_ASTERISK)
1060        {
1061        *count += 1;
1062        if (name == NULL && *count == lorn) return *count;
1063        ptr++;
1064        }
1065    
1066      /* Handle a condition. If it is an assertion, just carry on so that it
1067      is processed as normal. If not, skip to the closing parenthesis of the
1068      condition (there can't be any nested parens. */
1069    
1070      else if (ptr[2] == CHAR_LEFT_PARENTHESIS)
1071        {
1072        ptr += 2;
1073        if (ptr[1] != CHAR_QUESTION_MARK)
1074          {
1075          while (*ptr != 0 && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
1076          if (*ptr != 0) ptr++;
1077          }
1078        }
1079    
1080      /* We have either (? or (* and not a condition */
1081    
1082      else
1083        {
1084        ptr += 2;
1085        if (*ptr == CHAR_P) ptr++;                      /* Allow optional P */
1086    
1087        /* We have to disambiguate (?<! and (?<= from (?<name> for named groups */
1088    
1089        if ((*ptr == CHAR_LESS_THAN_SIGN && ptr[1] != CHAR_EXCLAMATION_MARK &&
1090            ptr[1] != CHAR_EQUALS_SIGN) || *ptr == CHAR_APOSTROPHE)
1091          {
1092          int term;
1093          const uschar *thisname;
1094          *count += 1;
1095          if (name == NULL && *count == lorn) return *count;
1096          term = *ptr++;
1097          if (term == CHAR_LESS_THAN_SIGN) term = CHAR_GREATER_THAN_SIGN;
1098          thisname = ptr;
1099          while (*ptr != term) ptr++;
1100          if (name != NULL && lorn == ptr - thisname &&
1101              strncmp((const char *)name, (const char *)thisname, lorn) == 0)
1102            return *count;
1103          }
1104        }
1105      }
1106    
1107    /* Past any initial parenthesis handling, scan for parentheses or vertical
1108    bars. */
1109    
1110    for (; *ptr != 0; ptr++)
1111      {
1112    /* Skip over backslashed characters and also entire \Q...\E */    /* Skip over backslashed characters and also entire \Q...\E */
1113    
1114    if (*ptr == '\\')    if (*ptr == CHAR_BACKSLASH)
1115      {      {
1116      if (*(++ptr) == 0) return -1;      if (*(++ptr) == 0) goto FAIL_EXIT;
1117      if (*ptr == 'Q') for (;;)      if (*ptr == CHAR_Q) for (;;)
1118        {        {
1119        while (*(++ptr) != 0 && *ptr != '\\');        while (*(++ptr) != 0 && *ptr != CHAR_BACKSLASH) {};
1120        if (*ptr == 0) return -1;        if (*ptr == 0) goto FAIL_EXIT;
1121        if (*(++ptr) == 'E') break;        if (*(++ptr) == CHAR_E) break;
1122        }        }
1123      continue;      continue;
1124      }      }
1125    
1126    /* Skip over character classes */    /* Skip over character classes; this logic must be similar to the way they
1127      are handled for real. If the first character is '^', skip it. Also, if the
1128      first few characters (either before or after ^) are \Q\E or \E we skip them
1129      too. This makes for compatibility with Perl. Note the use of STR macros to
1130      encode "Q\\E" so that it works in UTF-8 on EBCDIC platforms. */
1131    
1132    if (*ptr == '[')    if (*ptr == CHAR_LEFT_SQUARE_BRACKET)
1133      {      {
1134      while (*(++ptr) != ']')      BOOL negate_class = FALSE;
1135        for (;;)
1136          {
1137          int c = *(++ptr);
1138          if (c == CHAR_BACKSLASH)
1139            {
1140            if (ptr[1] == CHAR_E)
1141              ptr++;
1142            else if (strncmp((const char *)ptr+1,
1143                     STR_Q STR_BACKSLASH STR_E, 3) == 0)
1144              ptr += 3;
1145            else
1146              break;
1147            }
1148          else if (!negate_class && c == CHAR_CIRCUMFLEX_ACCENT)
1149            negate_class = TRUE;
1150          else break;
1151          }
1152    
1153        /* If the next character is ']', it is a data character that must be
1154        skipped, except in JavaScript compatibility mode. */
1155    
1156        if (ptr[1] == CHAR_RIGHT_SQUARE_BRACKET &&
1157            (cd->external_options & PCRE_JAVASCRIPT_COMPAT) == 0)
1158          ptr++;
1159    
1160        while (*(++ptr) != CHAR_RIGHT_SQUARE_BRACKET)
1161        {        {
1162        if (*ptr == 0) return -1;        if (*ptr == 0) return -1;
1163        if (*ptr == '\\')        if (*ptr == CHAR_BACKSLASH)
1164          {          {
1165          if (*(++ptr) == 0) return -1;          if (*(++ptr) == 0) goto FAIL_EXIT;
1166          if (*ptr == 'Q') for (;;)          if (*ptr == CHAR_Q) for (;;)
1167            {            {
1168            while (*(++ptr) != 0 && *ptr != '\\');            while (*(++ptr) != 0 && *ptr != CHAR_BACKSLASH) {};
1169            if (*ptr == 0) return -1;            if (*ptr == 0) goto FAIL_EXIT;
1170            if (*(++ptr) == 'E') break;            if (*(++ptr) == CHAR_E) break;
1171            }            }
1172          continue;          continue;
1173          }          }
# Line 1031  for (; *ptr != 0; ptr++) Line 1177  for (; *ptr != 0; ptr++)
1177    
1178    /* Skip comments in /x mode */    /* Skip comments in /x mode */
1179    
1180    if (xmode && *ptr == '#')    if (xmode && *ptr == CHAR_NUMBER_SIGN)
1181      {      {
1182      while (*(++ptr) != 0 && *ptr != '\n');      while (*(++ptr) != 0 && *ptr != CHAR_NL) {};
1183      if (*ptr == 0) return -1;      if (*ptr == 0) goto FAIL_EXIT;
1184      continue;      continue;
1185      }      }
1186    
1187    /* An opening parens must now be a real metacharacter */    /* Check for the special metacharacters */
1188    
1189    if (*ptr != '(') continue;    if (*ptr == CHAR_LEFT_PARENTHESIS)
   if (ptr[1] != '?' && ptr[1] != '*')  
1190      {      {
1191      count++;      int rc = find_parens_sub(&ptr, cd, name, lorn, xmode, count);
1192      if (name == NULL && count == lorn) return count;      if (rc > 0) return rc;
1193      continue;      if (*ptr == 0) goto FAIL_EXIT;
1194        }
1195    
1196      else if (*ptr == CHAR_RIGHT_PARENTHESIS)
1197        {
1198        if (dup_parens && *count < hwm_count) *count = hwm_count;
1199        *ptrptr = ptr;
1200        return -1;
1201        }
1202    
1203      else if (*ptr == CHAR_VERTICAL_LINE && dup_parens)
1204        {
1205        if (*count > hwm_count) hwm_count = *count;
1206        *count = start_count;
1207      }      }
1208      }
1209    
1210    FAIL_EXIT:
1211    *ptrptr = ptr;
1212    return -1;
1213    }
1214    
   ptr += 2;  
   if (*ptr == 'P') ptr++;                      /* Allow optional P */  
1215    
   /* We have to disambiguate (?<! and (?<= from (?<name> */  
1216    
   if ((*ptr != '<' || ptr[1] == '!' || ptr[1] == '=') &&  
        *ptr != '\'')  
     continue;  
1217    
1218    count++;  /*************************************************
1219    *       Find forward referenced subpattern       *
1220    *************************************************/
1221    
1222    /* This function scans along a pattern's text looking for capturing
1223    subpatterns, and counting them. If it finds a named pattern that matches the
1224    name it is given, it returns its number. Alternatively, if the name is NULL, it
1225    returns when it reaches a given numbered subpattern. This is used for forward
1226    references to subpatterns. We used to be able to start this scan from the
1227    current compiling point, using the current count value from cd->bracount, and
1228    do it all in a single loop, but the addition of the possibility of duplicate
1229    subpattern numbers means that we have to scan from the very start, in order to
1230    take account of such duplicates, and to use a recursive function to keep track
1231    of the different types of group.
1232    
1233    if (name == NULL && count == lorn) return count;  Arguments:
1234    term = *ptr++;    cd           compile background data
1235    if (term == '<') term = '>';    name         name to seek, or NULL if seeking a numbered subpattern
1236    thisname = ptr;    lorn         name length, or subpattern number if name is NULL
1237    while (*ptr != term) ptr++;    xmode        TRUE if we are in /x mode
1238    if (name != NULL && lorn == ptr - thisname &&  
1239        strncmp((const char *)name, (const char *)thisname, lorn) == 0)  Returns:       the number of the found subpattern, or -1 if not found
1240      return count;  */
1241    
1242    static int
1243    find_parens(compile_data *cd, const uschar *name, int lorn, BOOL xmode)
1244    {
1245    uschar *ptr = (uschar *)cd->start_pattern;
1246    int count = 0;
1247    int rc;
1248    
1249    /* If the pattern does not start with an opening parenthesis, the first call
1250    to find_parens_sub() will scan right to the end (if necessary). However, if it
1251    does start with a parenthesis, find_parens_sub() will return when it hits the
1252    matching closing parens. That is why we have to have a loop. */
1253    
1254    for (;;)
1255      {
1256      rc = find_parens_sub(&ptr, cd, name, lorn, xmode, &count);
1257      if (rc > 0 || *ptr++ == 0) break;
1258    }    }
1259    
1260  return -1;  return rc;
1261  }  }
1262    
1263    
1264    
1265    
1266  /*************************************************  /*************************************************
1267  *      Find first significant op code            *  *      Find first significant op code            *
1268  *************************************************/  *************************************************/
# Line 1234  for (;;) Line 1423  for (;;)
1423      branchlength++;      branchlength++;
1424      cc += 2;      cc += 2;
1425  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1426      if ((options & PCRE_UTF8) != 0)      if ((options & PCRE_UTF8) != 0 && cc[-1] >= 0xc0)
1427        {        cc += _pcre_utf8_table4[cc[-1] & 0x3f];
       while ((*cc & 0xc0) == 0x80) cc++;  
       }  
1428  #endif  #endif
1429      break;      break;
1430    
# Line 1248  for (;;) Line 1435  for (;;)
1435      branchlength += GET2(cc,1);      branchlength += GET2(cc,1);
1436      cc += 4;      cc += 4;
1437  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1438      if ((options & PCRE_UTF8) != 0)      if ((options & PCRE_UTF8) != 0 && cc[-1] >= 0xc0)
1439        {        cc += _pcre_utf8_table4[cc[-1] & 0x3f];
       while((*cc & 0x80) == 0x80) cc++;  
       }  
1440  #endif  #endif
1441      break;      break;
1442    
# Line 1275  for (;;) Line 1460  for (;;)
1460      case OP_NOT_WORDCHAR:      case OP_NOT_WORDCHAR:
1461      case OP_WORDCHAR:      case OP_WORDCHAR:
1462      case OP_ANY:      case OP_ANY:
1463        case OP_ALLANY:
1464      branchlength++;      branchlength++;
1465      cc++;      cc++;
1466      break;      break;
# Line 1423  for (;;) Line 1609  for (;;)
1609        if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];        if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];
1610        break;        break;
1611        }        }
1612    #else
1613        (void)(utf8);  /* Keep compiler happy by referencing function argument */
1614  #endif  #endif
1615      }      }
1616    }    }
# Line 1516  for (;;) Line 1704  for (;;)
1704        if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];        if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];
1705        break;        break;
1706        }        }
1707    #else
1708        (void)(utf8);  /* Keep compiler happy by referencing function argument */
1709  #endif  #endif
1710      }      }
1711    }    }
# Line 1567  for (code = first_significant_code(code Line 1757  for (code = first_significant_code(code
1757    
1758    /* Groups with zero repeats can of course be empty; skip them. */    /* Groups with zero repeats can of course be empty; skip them. */
1759    
1760    if (c == OP_BRAZERO || c == OP_BRAMINZERO)    if (c == OP_BRAZERO || c == OP_BRAMINZERO || c == OP_SKIPZERO)
1761      {      {
1762      code += _pcre_OP_lengths[c];      code += _pcre_OP_lengths[c];
1763      do code += GET(code, 1); while (*code == OP_ALT);      do code += GET(code, 1); while (*code == OP_ALT);
# Line 1582  for (code = first_significant_code(code Line 1772  for (code = first_significant_code(code
1772      BOOL empty_branch;      BOOL empty_branch;
1773      if (GET(code, 1) == 0) return TRUE;    /* Hit unclosed bracket */      if (GET(code, 1) == 0) return TRUE;    /* Hit unclosed bracket */
1774    
1775      /* Scan a closed bracket */      /* If a conditional group has only one branch, there is a second, implied,
1776        empty branch, so just skip over the conditional, because it could be empty.
1777        Otherwise, scan the individual branches of the group. */
1778    
1779      empty_branch = FALSE;      if (c == OP_COND && code[GET(code, 1)] != OP_ALT)
     do  
       {  
       if (!empty_branch && could_be_empty_branch(code, endcode, utf8))  
         empty_branch = TRUE;  
1780        code += GET(code, 1);        code += GET(code, 1);
1781        else
1782          {
1783          empty_branch = FALSE;
1784          do
1785            {
1786            if (!empty_branch && could_be_empty_branch(code, endcode, utf8))
1787              empty_branch = TRUE;
1788            code += GET(code, 1);
1789            }
1790          while (*code == OP_ALT);
1791          if (!empty_branch) return FALSE;   /* All branches are non-empty */
1792        }        }
1793      while (*code == OP_ALT);  
     if (!empty_branch) return FALSE;   /* All branches are non-empty */  
1794      c = *code;      c = *code;
1795      continue;      continue;
1796      }      }
# Line 1653  for (code = first_significant_code(code Line 1851  for (code = first_significant_code(code
1851      case OP_NOT_WORDCHAR:      case OP_NOT_WORDCHAR:
1852      case OP_WORDCHAR:      case OP_WORDCHAR:
1853      case OP_ANY:      case OP_ANY:
1854        case OP_ALLANY:
1855      case OP_ANYBYTE:      case OP_ANYBYTE:
1856      case OP_CHAR:      case OP_CHAR:
1857      case OP_CHARNC:      case OP_CHARNC:
# Line 1709  for (code = first_significant_code(code Line 1908  for (code = first_significant_code(code
1908      case OP_QUERY:      case OP_QUERY:
1909      case OP_MINQUERY:      case OP_MINQUERY:
1910      case OP_POSQUERY:      case OP_POSQUERY:
1911        if (utf8 && code[1] >= 0xc0) code += _pcre_utf8_table4[code[1] & 0x3f];
1912        break;
1913    
1914      case OP_UPTO:      case OP_UPTO:
1915      case OP_MINUPTO:      case OP_MINUPTO:
1916      case OP_POSUPTO:      case OP_POSUPTO:
1917      if (utf8) while ((code[2] & 0xc0) == 0x80) code++;      if (utf8 && code[3] >= 0xc0) code += _pcre_utf8_table4[code[3] & 0x3f];
1918      break;      break;
1919  #endif  #endif
1920      }      }
# Line 1793  int terminator;          /* Don't combin Line 1995  int terminator;          /* Don't combin
1995  terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */  terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */
1996  for (++ptr; *ptr != 0; ptr++)  for (++ptr; *ptr != 0; ptr++)
1997    {    {
1998    if (*ptr == '\\' && ptr[1] == ']') ptr++; else    if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET) ptr++; else
1999      {      {
2000      if (*ptr == ']') return FALSE;      if (*ptr == CHAR_RIGHT_SQUARE_BRACKET) return FALSE;
2001      if (*ptr == terminator && ptr[1] == ']')      if (*ptr == terminator && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
2002        {        {
2003        *endptr = ptr;        *endptr = ptr;
2004        return TRUE;        return TRUE;
# Line 1847  return -1; Line 2049  return -1;
2049  that is referenced. This means that groups can be replicated for fixed  that is referenced. This means that groups can be replicated for fixed
2050  repetition simply by copying (because the recursion is allowed to refer to  repetition simply by copying (because the recursion is allowed to refer to
2051  earlier groups that are outside the current group). However, when a group is  earlier groups that are outside the current group). However, when a group is
2052  optional (i.e. the minimum quantifier is zero), OP_BRAZERO is inserted before  optional (i.e. the minimum quantifier is zero), OP_BRAZERO or OP_SKIPZERO is
2053  it, after it has been compiled. This means that any OP_RECURSE items within it  inserted before it, after it has been compiled. This means that any OP_RECURSE
2054  that refer to the group itself or any contained groups have to have their  items within it that refer to the group itself or any contained groups have to
2055  offsets adjusted. That one of the jobs of this function. Before it is called,  have their offsets adjusted. That one of the jobs of this function. Before it
2056  the partially compiled regex must be temporarily terminated with OP_END.  is called, the partially compiled regex must be temporarily terminated with
2057    OP_END.
2058    
2059  This function has been extended with the possibility of forward references for  This function has been extended with the possibility of forward references for
2060  recursions and subroutine calls. It must also check the list of such references  recursions and subroutine calls. It must also check the list of such references
# Line 1882  while ((ptr = (uschar *)find_recurse(ptr Line 2085  while ((ptr = (uschar *)find_recurse(ptr
2085    
2086    /* See if this recursion is on the forward reference list. If so, adjust the    /* See if this recursion is on the forward reference list. If so, adjust the
2087    reference. */    reference. */
2088    
2089    for (hc = save_hwm; hc < cd->hwm; hc += LINK_SIZE)    for (hc = save_hwm; hc < cd->hwm; hc += LINK_SIZE)
2090      {      {
2091      offset = GET(hc, 0);      offset = GET(hc, 0);
# Line 1986  get_othercase_range(unsigned int *cptr, Line 2189  get_othercase_range(unsigned int *cptr,
2189  unsigned int c, othercase, next;  unsigned int c, othercase, next;
2190    
2191  for (c = *cptr; c <= d; c++)  for (c = *cptr; c <= d; c++)
2192    { if ((othercase = _pcre_ucp_othercase(c)) != NOTACHAR) break; }    { if ((othercase = UCD_OTHERCASE(c)) != c) break; }
2193    
2194  if (c > d) return FALSE;  if (c > d) return FALSE;
2195    
# Line 1995  next = othercase + 1; Line 2198  next = othercase + 1;
2198    
2199  for (++c; c <= d; c++)  for (++c; c <= d; c++)
2200    {    {
2201    if (_pcre_ucp_othercase(c) != next) break;    if (UCD_OTHERCASE(c) != next) break;
2202    next++;    next++;
2203    }    }
2204    
# Line 2041  if ((options & PCRE_EXTENDED) != 0) Line 2244  if ((options & PCRE_EXTENDED) != 0)
2244    for (;;)    for (;;)
2245      {      {
2246      while ((cd->ctypes[*ptr] & ctype_space) != 0) ptr++;      while ((cd->ctypes[*ptr] & ctype_space) != 0) ptr++;
2247      if (*ptr == '#')      if (*ptr == CHAR_NUMBER_SIGN)
2248        {        {
2249        while (*(++ptr) != 0)        while (*(++ptr) != 0)
2250          if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }          if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }
# Line 2053  if ((options & PCRE_EXTENDED) != 0) Line 2256  if ((options & PCRE_EXTENDED) != 0)
2256  /* If the next item is one that we can handle, get its value. A non-negative  /* If the next item is one that we can handle, get its value. A non-negative
2257  value is a character, a negative value is an escape value. */  value is a character, a negative value is an escape value. */
2258    
2259  if (*ptr == '\\')  if (*ptr == CHAR_BACKSLASH)
2260    {    {
2261    int temperrorcode = 0;    int temperrorcode = 0;
2262    next = check_escape(&ptr, &temperrorcode, cd->bracount, options, FALSE);    next = check_escape(&ptr, &temperrorcode, cd->bracount, options, FALSE);
# Line 2078  if ((options & PCRE_EXTENDED) != 0) Line 2281  if ((options & PCRE_EXTENDED) != 0)
2281    for (;;)    for (;;)
2282      {      {
2283      while ((cd->ctypes[*ptr] & ctype_space) != 0) ptr++;      while ((cd->ctypes[*ptr] & ctype_space) != 0) ptr++;
2284      if (*ptr == '#')      if (*ptr == CHAR_NUMBER_SIGN)
2285        {        {
2286        while (*(++ptr) != 0)        while (*(++ptr) != 0)
2287          if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }          if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }
# Line 2089  if ((options & PCRE_EXTENDED) != 0) Line 2292  if ((options & PCRE_EXTENDED) != 0)
2292    
2293  /* If the next thing is itself optional, we have to give up. */  /* If the next thing is itself optional, we have to give up. */
2294    
2295  if (*ptr == '*' || *ptr == '?' || strncmp((char *)ptr, "{0,", 3) == 0)  if (*ptr == CHAR_ASTERISK || *ptr == CHAR_QUESTION_MARK ||
2296    return FALSE;    strncmp((char *)ptr, STR_LEFT_CURLY_BRACKET STR_0 STR_COMMA, 3) == 0)
2297        return FALSE;
2298    
2299  /* Now compare the next item with the previous opcode. If the previous is a  /* Now compare the next item with the previous opcode. If the previous is a
2300  positive single character match, "item" either contains the character or, if  positive single character match, "item" either contains the character or, if
# Line 2105  if (next >= 0) switch(op_code) Line 2309  if (next >= 0) switch(op_code)
2309    case OP_CHAR:    case OP_CHAR:
2310  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2311    if (utf8 && item > 127) { GETCHAR(item, utf8_char); }    if (utf8 && item > 127) { GETCHAR(item, utf8_char); }
2312    #else
2313      (void)(utf8_char);  /* Keep compiler happy by referencing function argument */
2314  #endif  #endif
2315    return item != next;    return item != next;
2316    
# Line 2123  if (next >= 0) switch(op_code) Line 2329  if (next >= 0) switch(op_code)
2329      unsigned int othercase;      unsigned int othercase;
2330      if (next < 128) othercase = cd->fcc[next]; else      if (next < 128) othercase = cd->fcc[next]; else
2331  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2332      othercase = _pcre_ucp_othercase((unsigned int)next);      othercase = UCD_OTHERCASE((unsigned int)next);
2333  #else  #else
2334      othercase = NOTACHAR;      othercase = NOTACHAR;
2335  #endif  #endif
# Line 2144  if (next >= 0) switch(op_code) Line 2350  if (next >= 0) switch(op_code)
2350      unsigned int othercase;      unsigned int othercase;
2351      if (next < 128) othercase = cd->fcc[next]; else      if (next < 128) othercase = cd->fcc[next]; else
2352  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2353      othercase = _pcre_ucp_othercase(next);      othercase = UCD_OTHERCASE(next);
2354  #else  #else
2355      othercase = NOTACHAR;      othercase = NOTACHAR;
2356  #endif  #endif
# Line 2458  for (;; ptr++) Line 2664  for (;; ptr++)
2664    /* Get next byte in the pattern */    /* Get next byte in the pattern */
2665    
2666    c = *ptr;    c = *ptr;
2667    
2668    /* If we are in the pre-compile phase, accumulate the length used for the    /* If we are in the pre-compile phase, accumulate the length used for the
2669    previous cycle of this loop. */    previous cycle of this loop. */
2670    
# Line 2526  for (;; ptr++) Line 2732  for (;; ptr++)
2732    
2733    if (inescq && c != 0)    if (inescq && c != 0)
2734      {      {
2735      if (c == '\\' && ptr[1] == 'E')      if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E)
2736        {        {
2737        inescq = FALSE;        inescq = FALSE;
2738        ptr++;        ptr++;
# Line 2552  for (;; ptr++) Line 2758  for (;; ptr++)
2758    /* Fill in length of a previous callout, except when the next thing is    /* Fill in length of a previous callout, except when the next thing is
2759    a quantifier. */    a quantifier. */
2760    
2761    is_quantifier = c == '*' || c == '+' || c == '?' ||    is_quantifier =
2762      (c == '{' && is_counted_repeat(ptr+1));      c == CHAR_ASTERISK || c == CHAR_PLUS || c == CHAR_QUESTION_MARK ||
2763        (c == CHAR_LEFT_CURLY_BRACKET && is_counted_repeat(ptr+1));
2764    
2765    if (!is_quantifier && previous_callout != NULL &&    if (!is_quantifier && previous_callout != NULL &&
2766         after_manual_callout-- <= 0)         after_manual_callout-- <= 0)
# Line 2568  for (;; ptr++) Line 2775  for (;; ptr++)
2775    if ((options & PCRE_EXTENDED) != 0)    if ((options & PCRE_EXTENDED) != 0)
2776      {      {
2777      if ((cd->ctypes[c] & ctype_space) != 0) continue;      if ((cd->ctypes[c] & ctype_space) != 0) continue;
2778      if (c == '#')      if (c == CHAR_NUMBER_SIGN)
2779        {        {
2780        while (*(++ptr) != 0)        while (*(++ptr) != 0)
2781          {          {
# Line 2593  for (;; ptr++) Line 2800  for (;; ptr++)
2800      {      {
2801      /* ===================================================================*/      /* ===================================================================*/
2802      case 0:                        /* The branch terminates at string end */      case 0:                        /* The branch terminates at string end */
2803      case '|':                      /* or | or ) */      case CHAR_VERTICAL_LINE:       /* or | or ) */
2804      case ')':      case CHAR_RIGHT_PARENTHESIS:
2805      *firstbyteptr = firstbyte;      *firstbyteptr = firstbyte;
2806      *reqbyteptr = reqbyte;      *reqbyteptr = reqbyte;
2807      *codeptr = code;      *codeptr = code;
# Line 2616  for (;; ptr++) Line 2823  for (;; ptr++)
2823      /* Handle single-character metacharacters. In multiline mode, ^ disables      /* Handle single-character metacharacters. In multiline mode, ^ disables
2824      the setting of any following char as a first character. */      the setting of any following char as a first character. */
2825    
2826      case '^':      case CHAR_CIRCUMFLEX_ACCENT:
2827      if ((options & PCRE_MULTILINE) != 0)      if ((options & PCRE_MULTILINE) != 0)
2828        {        {
2829        if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;        if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
# Line 2625  for (;; ptr++) Line 2832  for (;; ptr++)
2832      *code++ = OP_CIRC;      *code++ = OP_CIRC;
2833      break;      break;
2834    
2835      case '$':      case CHAR_DOLLAR_SIGN:
2836      previous = NULL;      previous = NULL;
2837      *code++ = OP_DOLL;      *code++ = OP_DOLL;
2838      break;      break;
# Line 2633  for (;; ptr++) Line 2840  for (;; ptr++)
2840      /* There can never be a first char if '.' is first, whatever happens about      /* There can never be a first char if '.' is first, whatever happens about
2841      repeats. The value of reqbyte doesn't change either. */      repeats. The value of reqbyte doesn't change either. */
2842    
2843      case '.':      case CHAR_DOT:
2844      if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;      if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
2845      zerofirstbyte = firstbyte;      zerofirstbyte = firstbyte;
2846      zeroreqbyte = reqbyte;      zeroreqbyte = reqbyte;
2847      previous = code;      previous = code;
2848      *code++ = OP_ANY;      *code++ = ((options & PCRE_DOTALL) != 0)? OP_ALLANY: OP_ANY;
2849      break;      break;
2850    
2851    
# Line 2653  for (;; ptr++) Line 2860  for (;; ptr++)
2860      opcode is compiled. It may optionally have a bit map for characters < 256,      opcode is compiled. It may optionally have a bit map for characters < 256,
2861      but those above are are explicitly listed afterwards. A flag byte tells      but those above are are explicitly listed afterwards. A flag byte tells
2862      whether the bitmap is present, and whether this is a negated class or not.      whether the bitmap is present, and whether this is a negated class or not.
     */  
2863    
2864      case '[':      In JavaScript compatibility mode, an isolated ']' causes an error. In
2865        default (Perl) mode, it is treated as a data character. */
2866    
2867        case CHAR_RIGHT_SQUARE_BRACKET:
2868        if ((cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0)
2869          {
2870          *errorcodeptr = ERR64;
2871          goto FAILED;
2872          }
2873        goto NORMAL_CHAR;
2874    
2875        case CHAR_LEFT_SQUARE_BRACKET:
2876      previous = code;      previous = code;
2877    
2878      /* PCRE supports POSIX class stuff inside a class. Perl gives an error if      /* PCRE supports POSIX class stuff inside a class. Perl gives an error if
2879      they are encountered at the top level, so we'll do that too. */      they are encountered at the top level, so we'll do that too. */
2880    
2881      if ((ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') &&      if ((ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT ||
2882             ptr[1] == CHAR_EQUALS_SIGN) &&
2883          check_posix_syntax(ptr, &tempptr))          check_posix_syntax(ptr, &tempptr))
2884        {        {
2885        *errorcodeptr = (ptr[1] == ':')? ERR13 : ERR31;        *errorcodeptr = (ptr[1] == CHAR_COLON)? ERR13 : ERR31;
2886        goto FAILED;        goto FAILED;
2887        }        }
2888    
# Line 2676  for (;; ptr++) Line 2894  for (;; ptr++)
2894      for (;;)      for (;;)
2895        {        {
2896        c = *(++ptr);        c = *(++ptr);
2897        if (c == '\\')        if (c == CHAR_BACKSLASH)
2898          {          {
2899          if (ptr[1] == 'E') ptr++;          if (ptr[1] == CHAR_E)
2900            else if (strncmp((const char *)ptr+1, "Q\\E", 3) == 0) ptr += 3;            ptr++;
2901              else break;          else if (strncmp((const char *)ptr+1,
2902                              STR_Q STR_BACKSLASH STR_E, 3) == 0)
2903              ptr += 3;
2904            else
2905              break;
2906          }          }
2907        else if (!negate_class && c == '^')        else if (!negate_class && c == CHAR_CIRCUMFLEX_ACCENT)
2908          negate_class = TRUE;          negate_class = TRUE;
2909        else break;        else break;
2910        }        }
2911    
2912        /* Empty classes are allowed in JavaScript compatibility mode. Otherwise,
2913        an initial ']' is taken as a data character -- the code below handles
2914        that. In JS mode, [] must always fail, so generate OP_FAIL, whereas
2915        [^] must match any character, so generate OP_ALLANY. */
2916    
2917        if (c == CHAR_RIGHT_SQUARE_BRACKET &&
2918            (cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0)
2919          {
2920          *code++ = negate_class? OP_ALLANY : OP_FAIL;
2921          if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
2922          zerofirstbyte = firstbyte;
2923          break;
2924          }
2925    
2926      /* If a class contains a negative special such as \S, we need to flip the      /* If a class contains a negative special such as \S, we need to flip the
2927      negation flag at the end, so that support for characters > 255 works      negation flag at the end, so that support for characters > 255 works
2928      correctly (they are all included in the class). */      correctly (they are all included in the class). */
# Line 2744  for (;; ptr++) Line 2980  for (;; ptr++)
2980    
2981        if (inescq)        if (inescq)
2982          {          {
2983          if (c == '\\' && ptr[1] == 'E')     /* If we are at \E */          if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E)  /* If we are at \E */
2984            {            {
2985            inescq = FALSE;                   /* Reset literal state */            inescq = FALSE;                   /* Reset literal state */
2986            ptr++;                            /* Skip the 'E' */            ptr++;                            /* Skip the 'E' */
# Line 2759  for (;; ptr++) Line 2995  for (;; ptr++)
2995        [.ch.] and [=ch=] ("collating elements") and fault them, as Perl        [.ch.] and [=ch=] ("collating elements") and fault them, as Perl
2996        5.6 and 5.8 do. */        5.6 and 5.8 do. */
2997    
2998        if (c == '[' &&        if (c == CHAR_LEFT_SQUARE_BRACKET &&
2999            (ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') &&            (ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT ||
3000            check_posix_syntax(ptr, &tempptr))             ptr[1] == CHAR_EQUALS_SIGN) && check_posix_syntax(ptr, &tempptr))
3001          {          {
3002          BOOL local_negate = FALSE;          BOOL local_negate = FALSE;
3003          int posix_class, taboffset, tabopt;          int posix_class, taboffset, tabopt;
3004          register const uschar *cbits = cd->cbits;          register const uschar *cbits = cd->cbits;
3005          uschar pbits[32];          uschar pbits[32];
3006    
3007          if (ptr[1] != ':')          if (ptr[1] != CHAR_COLON)
3008            {            {
3009            *errorcodeptr = ERR31;            *errorcodeptr = ERR31;
3010            goto FAILED;            goto FAILED;
3011            }            }
3012    
3013          ptr += 2;          ptr += 2;
3014          if (*ptr == '^')          if (*ptr == CHAR_CIRCUMFLEX_ACCENT)
3015            {            {
3016            local_negate = TRUE;            local_negate = TRUE;
3017            should_flip_negation = TRUE;  /* Note negative special */            should_flip_negation = TRUE;  /* Note negative special */
# Line 2848  for (;; ptr++) Line 3084  for (;; ptr++)
3084        to 'or' into the one we are building. We assume they have more than one        to 'or' into the one we are building. We assume they have more than one
3085        character in them, so set class_charcount bigger than one. */        character in them, so set class_charcount bigger than one. */
3086    
3087        if (c == '\\')        if (c == CHAR_BACKSLASH)
3088          {          {
3089          c = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);          c = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);
3090          if (*errorcodeptr != 0) goto FAILED;          if (*errorcodeptr != 0) goto FAILED;
3091    
3092          if (-c == ESC_b) c = '\b';       /* \b is backspace in a class */          if (-c == ESC_b) c = CHAR_BS;       /* \b is backspace in a class */
3093          else if (-c == ESC_X) c = 'X';   /* \X is literal X in a class */          else if (-c == ESC_X) c = CHAR_X;   /* \X is literal X in a class */
3094          else if (-c == ESC_R) c = 'R';   /* \R is literal R in a class */          else if (-c == ESC_R) c = CHAR_R;   /* \R is literal R in a class */
3095          else if (-c == ESC_Q)            /* Handle start of quoted string */          else if (-c == ESC_Q)            /* Handle start of quoted string */
3096            {            {
3097            if (ptr[1] == '\\' && ptr[2] == 'E')            if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)
3098              {              {
3099              ptr += 2; /* avoid empty string */              ptr += 2; /* avoid empty string */
3100              }              }
# Line 3084  for (;; ptr++) Line 3320  for (;; ptr++)
3320        entirely. The code for handling \Q and \E is messy. */        entirely. The code for handling \Q and \E is messy. */
3321    
3322        CHECK_RANGE:        CHECK_RANGE:
3323        while (ptr[1] == '\\' && ptr[2] == 'E')        while (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)
3324          {          {
3325          inescq = FALSE;          inescq = FALSE;
3326          ptr += 2;          ptr += 2;
# Line 3094  for (;; ptr++) Line 3330  for (;; ptr++)
3330    
3331        /* Remember \r or \n */        /* Remember \r or \n */
3332    
3333        if (c == '\r' || c == '\n') cd->external_flags |= PCRE_HASCRORLF;        if (c == CHAR_CR || c == CHAR_NL) cd->external_flags |= PCRE_HASCRORLF;
3334    
3335        /* Check for range */        /* Check for range */
3336    
3337        if (!inescq && ptr[1] == '-')        if (!inescq && ptr[1] == CHAR_MINUS)
3338          {          {
3339          int d;          int d;
3340          ptr += 2;          ptr += 2;
3341          while (*ptr == '\\' && ptr[1] == 'E') ptr += 2;          while (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_E) ptr += 2;
3342    
3343          /* If we hit \Q (not followed by \E) at this point, go into escaped          /* If we hit \Q (not followed by \E) at this point, go into escaped
3344          mode. */          mode. */
3345    
3346          while (*ptr == '\\' && ptr[1] == 'Q')          while (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_Q)
3347            {            {
3348            ptr += 2;            ptr += 2;
3349            if (*ptr == '\\' && ptr[1] == 'E') { ptr += 2; continue; }            if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_E)
3350                { ptr += 2; continue; }
3351            inescq = TRUE;            inescq = TRUE;
3352            break;            break;
3353            }            }
3354    
3355          if (*ptr == 0 || (!inescq && *ptr == ']'))          if (*ptr == 0 || (!inescq && *ptr == CHAR_RIGHT_SQUARE_BRACKET))
3356            {            {
3357            ptr = oldptr;            ptr = oldptr;
3358            goto LONE_SINGLE_CHARACTER;            goto LONE_SINGLE_CHARACTER;
# Line 3134  for (;; ptr++) Line 3371  for (;; ptr++)
3371          not any of the other escapes. Perl 5.6 treats a hyphen as a literal          not any of the other escapes. Perl 5.6 treats a hyphen as a literal
3372          in such circumstances. */          in such circumstances. */
3373    
3374          if (!inescq && d == '\\')          if (!inescq && d == CHAR_BACKSLASH)
3375            {            {
3376            d = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);            d = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);
3377            if (*errorcodeptr != 0) goto FAILED;            if (*errorcodeptr != 0) goto FAILED;
# Line 3144  for (;; ptr++) Line 3381  for (;; ptr++)
3381    
3382            if (d < 0)            if (d < 0)
3383              {              {
3384              if (d == -ESC_b) d = '\b';              if (d == -ESC_b) d = CHAR_BS;
3385              else if (d == -ESC_X) d = 'X';              else if (d == -ESC_X) d = CHAR_X;
3386              else if (d == -ESC_R) d = 'R'; else              else if (d == -ESC_R) d = CHAR_R; else
3387                {                {
3388                ptr = oldptr;                ptr = oldptr;
3389                goto LONE_SINGLE_CHARACTER;  /* A few lines below */                goto LONE_SINGLE_CHARACTER;  /* A few lines below */
# Line 3167  for (;; ptr++) Line 3404  for (;; ptr++)
3404    
3405          /* Remember \r or \n */          /* Remember \r or \n */
3406    
3407          if (d == '\r' || d == '\n') cd->external_flags |= PCRE_HASCRORLF;          if (d == CHAR_CR || d == CHAR_NL) cd->external_flags |= PCRE_HASCRORLF;
3408    
3409          /* In UTF-8 mode, if the upper limit is > 255, or > 127 for caseless          /* In UTF-8 mode, if the upper limit is > 255, or > 127 for caseless
3410          matching, we have to use an XCLASS with extra data items. Caseless          matching, we have to use an XCLASS with extra data items. Caseless
# Line 3287  for (;; ptr++) Line 3524  for (;; ptr++)
3524          if ((options & PCRE_CASELESS) != 0)          if ((options & PCRE_CASELESS) != 0)
3525            {            {
3526            unsigned int othercase;            unsigned int othercase;
3527            if ((othercase = _pcre_ucp_othercase(c)) != NOTACHAR)            if ((othercase = UCD_OTHERCASE(c)) != c)
3528              {              {
3529              *class_utf8data++ = XCL_SINGLE;              *class_utf8data++ = XCL_SINGLE;
3530              class_utf8data += _pcre_ord2utf8(othercase, class_utf8data);              class_utf8data += _pcre_ord2utf8(othercase, class_utf8data);
# Line 3314  for (;; ptr++) Line 3551  for (;; ptr++)
3551    
3552      /* Loop until ']' reached. This "while" is the end of the "do" above. */      /* Loop until ']' reached. This "while" is the end of the "do" above. */
3553    
3554      while ((c = *(++ptr)) != 0 && (c != ']' || inescq));      while ((c = *(++ptr)) != 0 && (c != CHAR_RIGHT_SQUARE_BRACKET || inescq));
3555    
3556      if (c == 0)                          /* Missing terminating ']' */      if (c == 0)                          /* Missing terminating ']' */
3557        {        {
# Line 3459  we set the flag only if there is a liter Line 3696  we set the flag only if there is a liter
3696      /* Various kinds of repeat; '{' is not necessarily a quantifier, but this      /* Various kinds of repeat; '{' is not necessarily a quantifier, but this
3697      has been tested above. */      has been tested above. */
3698    
3699      case '{':      case CHAR_LEFT_CURLY_BRACKET:
3700      if (!is_quantifier) goto NORMAL_CHAR;      if (!is_quantifier) goto NORMAL_CHAR;
3701      ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorcodeptr);      ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorcodeptr);
3702      if (*errorcodeptr != 0) goto FAILED;      if (*errorcodeptr != 0) goto FAILED;
3703      goto REPEAT;      goto REPEAT;
3704    
3705      case '*':      case CHAR_ASTERISK:
3706      repeat_min = 0;      repeat_min = 0;
3707      repeat_max = -1;      repeat_max = -1;
3708      goto REPEAT;      goto REPEAT;
3709    
3710      case '+':      case CHAR_PLUS:
3711      repeat_min = 1;      repeat_min = 1;
3712      repeat_max = -1;      repeat_max = -1;
3713      goto REPEAT;      goto REPEAT;
3714    
3715      case '?':      case CHAR_QUESTION_MARK:
3716      repeat_min = 0;      repeat_min = 0;
3717      repeat_max = 1;      repeat_max = 1;
3718    
# Line 3510  we set the flag only if there is a liter Line 3747  we set the flag only if there is a liter
3747      but if PCRE_UNGREEDY is set, it works the other way round. We change the      but if PCRE_UNGREEDY is set, it works the other way round. We change the
3748      repeat type to the non-default. */      repeat type to the non-default. */
3749    
3750      if (ptr[1] == '+')      if (ptr[1] == CHAR_PLUS)
3751        {        {
3752        repeat_type = 0;                  /* Force greedy */        repeat_type = 0;                  /* Force greedy */
3753        possessive_quantifier = TRUE;        possessive_quantifier = TRUE;
3754        ptr++;        ptr++;
3755        }        }
3756      else if (ptr[1] == '?')      else if (ptr[1] == CHAR_QUESTION_MARK)
3757        {        {
3758        repeat_type = greedy_non_default;        repeat_type = greedy_non_default;
3759        ptr++;        ptr++;
# Line 3631  we set the flag only if there is a liter Line 3868  we set the flag only if there is a liter
3868    
3869        if (repeat_max == 0) goto END_REPEAT;        if (repeat_max == 0) goto END_REPEAT;
3870    
3871          /*--------------------------------------------------------------------*/
3872          /* This code is obsolete from release 8.00; the restriction was finally
3873          removed: */
3874    
3875        /* All real repeats make it impossible to handle partial matching (maybe        /* All real repeats make it impossible to handle partial matching (maybe
3876        one day we will be able to remove this restriction). */        one day we will be able to remove this restriction). */
3877    
3878        if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL;        /* if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL; */
3879          /*--------------------------------------------------------------------*/
3880    
3881        /* Combine the op_type with the repeat_type */        /* Combine the op_type with the repeat_type */
3882    
# Line 3781  we set the flag only if there is a liter Line 4023  we set the flag only if there is a liter
4023          goto END_REPEAT;          goto END_REPEAT;
4024          }          }
4025    
4026          /*--------------------------------------------------------------------*/
4027          /* This code is obsolete from release 8.00; the restriction was finally
4028          removed: */
4029    
4030        /* All real repeats make it impossible to handle partial matching (maybe        /* All real repeats make it impossible to handle partial matching (maybe
4031        one day we will be able to remove this restriction). */        one day we will be able to remove this restriction). */
4032    
4033        if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL;        /* if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL; */
4034          /*--------------------------------------------------------------------*/
4035    
4036        if (repeat_min == 0 && repeat_max == -1)        if (repeat_min == 0 && repeat_max == -1)
4037          *code++ = OP_CRSTAR + repeat_type;          *code++ = OP_CRSTAR + repeat_type;
# Line 3842  we set the flag only if there is a liter Line 4089  we set the flag only if there is a liter
4089    
4090        if (repeat_min == 0)        if (repeat_min == 0)
4091          {          {
4092          /* If the maximum is also zero, we just omit the group from the output          /* If the maximum is also zero, we used to just omit the group from the
4093          altogether. */          output altogether, like this:
4094    
4095          if (repeat_max == 0)          ** if (repeat_max == 0)
4096            {          **   {
4097            code = previous;          **   code = previous;
4098            goto END_REPEAT;          **   goto END_REPEAT;
4099            }          **   }
4100    
4101            However, that fails when a group is referenced as a subroutine from
4102            elsewhere in the pattern, so now we stick in OP_SKIPZERO in front of it
4103            so that it is skipped on execution. As we don't have a list of which
4104            groups are referenced, we cannot do this selectively.
4105    
4106            If the maximum is 1 or unlimited, we just have to stick in the BRAZERO
4107            and do no more at this point. However, we do need to adjust any
4108            OP_RECURSE calls inside the group that refer to the group itself or any
4109            internal or forward referenced group, because the offset is from the
4110            start of the whole regex. Temporarily terminate the pattern while doing
4111            this. */
4112    
4113          /* If the maximum is 1 or unlimited, we just have to stick in the          if (repeat_max <= 1)    /* Covers 0, 1, and unlimited */
         BRAZERO and do no more at this point. However, we do need to adjust  
         any OP_RECURSE calls inside the group that refer to the group itself or  
         any internal or forward referenced group, because the offset is from  
         the start of the whole regex. Temporarily terminate the pattern while  
         doing this. */  
   
         if (repeat_max <= 1)  
4114            {            {
4115            *code = OP_END;            *code = OP_END;
4116            adjust_recurse(previous, 1, utf8, cd, save_hwm);            adjust_recurse(previous, 1, utf8, cd, save_hwm);
4117            memmove(previous+1, previous, len);            memmove(previous+1, previous, len);
4118            code++;            code++;
4119              if (repeat_max == 0)
4120                {
4121                *previous++ = OP_SKIPZERO;
4122                goto END_REPEAT;
4123                }
4124            *previous++ = OP_BRAZERO + repeat_type;            *previous++ = OP_BRAZERO + repeat_type;
4125            }            }
4126    
# Line 4058  we set the flag only if there is a liter Line 4315  we set the flag only if there is a liter
4315          }          }
4316        }        }
4317    
4318        /* If previous is OP_FAIL, it was generated by an empty class [] in
4319        JavaScript mode. The other ways in which OP_FAIL can be generated, that is
4320        by (*FAIL) or (?!) set previous to NULL, which gives a "nothing to repeat"
4321        error above. We can just ignore the repeat in JS case. */
4322    
4323        else if (*previous == OP_FAIL) goto END_REPEAT;
4324    
4325      /* Else there's some kind of shambles */      /* Else there's some kind of shambles */
4326    
4327      else      else
# Line 4082  we set the flag only if there is a liter Line 4346  we set the flag only if there is a liter
4346      if (possessive_quantifier)      if (possessive_quantifier)
4347        {        {
4348        int len;        int len;
4349        if (*tempcode == OP_EXACT || *tempcode == OP_TYPEEXACT ||  
4350            *tempcode == OP_NOTEXACT)        if (*tempcode == OP_TYPEEXACT)
4351          tempcode += _pcre_OP_lengths[*tempcode] +          tempcode += _pcre_OP_lengths[*tempcode] +
4352            ((*tempcode == OP_TYPEEXACT &&            ((tempcode[3] == OP_PROP || tempcode[3] == OP_NOTPROP)? 2 : 0);
4353               (tempcode[3] == OP_PROP || tempcode[3] == OP_NOTPROP))? 2:0);  
4354          else if (*tempcode == OP_EXACT || *tempcode == OP_NOTEXACT)
4355            {
4356            tempcode += _pcre_OP_lengths[*tempcode];
4357    #ifdef SUPPORT_UTF8
4358            if (utf8 && tempcode[-1] >= 0xc0)
4359              tempcode += _pcre_utf8_table4[tempcode[-1] & 0x3f];
4360    #endif
4361            }
4362    
4363        len = code - tempcode;        len = code - tempcode;
4364        if (len > 0) switch (*tempcode)        if (len > 0) switch (*tempcode)
4365          {          {
# Line 4132  we set the flag only if there is a liter Line 4405  we set the flag only if there is a liter
4405      lookbehind or option setting or condition or all the other extended      lookbehind or option setting or condition or all the other extended
4406      parenthesis forms.  */      parenthesis forms.  */
4407    
4408      case '(':      case CHAR_LEFT_PARENTHESIS:
4409      newoptions = options;      newoptions = options;
4410      skipbytes = 0;      skipbytes = 0;
4411      bravalue = OP_CBRA;      bravalue = OP_CBRA;
4412      save_hwm = cd->hwm;      save_hwm = cd->hwm;
4413      reset_bracount = FALSE;      reset_bracount = FALSE;
4414    
4415      /* First deal with various "verbs" that can be introduced by '*'. */      /* First deal with various "verbs" that can be introduced by '*'. */
4416    
4417      if (*(++ptr) == '*' && (cd->ctypes[ptr[1]] & ctype_letter) != 0)      if (*(++ptr) == CHAR_ASTERISK && (cd->ctypes[ptr[1]] & ctype_letter) != 0)
4418        {        {
4419        int i, namelen;        int i, namelen;
4420        const char *vn = verbnames;        const char *vn = verbnames;
4421        const uschar *name = ++ptr;        const uschar *name = ++ptr;
4422        previous = NULL;        previous = NULL;
4423        while ((cd->ctypes[*++ptr] & ctype_letter) != 0);        while ((cd->ctypes[*++ptr] & ctype_letter) != 0) {};
4424        if (*ptr == ':')        if (*ptr == CHAR_COLON)
4425          {          {
4426          *errorcodeptr = ERR59;   /* Not supported */          *errorcodeptr = ERR59;   /* Not supported */
4427          goto FAILED;          goto FAILED;
4428          }          }
4429        if (*ptr != ')')        if (*ptr != CHAR_RIGHT_PARENTHESIS)
4430          {          {
4431          *errorcodeptr = ERR60;          *errorcodeptr = ERR60;
4432          goto FAILED;          goto FAILED;
# Line 4178  we set the flag only if there is a liter Line 4451  we set the flag only if there is a liter
4451      /* Deal with the extended parentheses; all are introduced by '?', and the      /* Deal with the extended parentheses; all are introduced by '?', and the
4452      appearance of any of them means that this is not a capturing group. */      appearance of any of them means that this is not a capturing group. */
4453    
4454      else if (*ptr == '?')      else if (*ptr == CHAR_QUESTION_MARK)
4455        {        {
4456        int i, set, unset, namelen;        int i, set, unset, namelen;
4457        int *optset;        int *optset;
# Line 4187  we set the flag only if there is a liter Line 4460  we set the flag only if there is a liter
4460    
4461        switch (*(++ptr))        switch (*(++ptr))
4462          {          {
4463          case '#':                 /* Comment; skip to ket */          case CHAR_NUMBER_SIGN:                 /* Comment; skip to ket */
4464          ptr++;          ptr++;
4465          while (*ptr != 0 && *ptr != ')') ptr++;          while (*ptr != 0 && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
4466          if (*ptr == 0)          if (*ptr == 0)
4467            {            {
4468            *errorcodeptr = ERR18;            *errorcodeptr = ERR18;
# Line 4199  we set the flag only if there is a liter Line 4472  we set the flag only if there is a liter
4472    
4473    
4474          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4475          case '|':                 /* Reset capture count for each branch */          case CHAR_VERTICAL_LINE:  /* Reset capture count for each branch */
4476          reset_bracount = TRUE;          reset_bracount = TRUE;
4477          /* Fall through */          /* Fall through */
4478    
4479          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4480          case ':':                 /* Non-capturing bracket */          case CHAR_COLON:          /* Non-capturing bracket */
4481          bravalue = OP_BRA;          bravalue = OP_BRA;
4482          ptr++;          ptr++;
4483          break;          break;
4484    
4485    
4486          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4487          case '(':          case CHAR_LEFT_PARENTHESIS:
4488          bravalue = OP_COND;       /* Conditional group */          bravalue = OP_COND;       /* Conditional group */
4489    
4490          /* A condition can be an assertion, a number (referring to a numbered          /* A condition can be an assertion, a number (referring to a numbered
# Line 4231  we set the flag only if there is a liter Line 4504  we set the flag only if there is a liter
4504          the switch. This will take control down to where bracketed groups,          the switch. This will take control down to where bracketed groups,
4505          including assertions, are processed. */          including assertions, are processed. */
4506    
4507          if (ptr[1] == '?' && (ptr[2] == '=' || ptr[2] == '!' || ptr[2] == '<'))          if (ptr[1] == CHAR_QUESTION_MARK && (ptr[2] == CHAR_EQUALS_SIGN ||
4508                ptr[2] == CHAR_EXCLAMATION_MARK || ptr[2] == CHAR_LESS_THAN_SIGN))
4509            break;            break;
4510    
4511          /* Most other conditions use OP_CREF (a couple change to OP_RREF          /* Most other conditions use OP_CREF (a couple change to OP_RREF
# Line 4243  we set the flag only if there is a liter Line 4517  we set the flag only if there is a liter
4517    
4518          /* Check for a test for recursion in a named group. */          /* Check for a test for recursion in a named group. */
4519    
4520          if (ptr[1] == 'R' && ptr[2] == '&')          if (ptr[1] == CHAR_R && ptr[2] == CHAR_AMPERSAND)
4521            {            {
4522            terminator = -1;            terminator = -1;
4523            ptr += 2;            ptr += 2;
# Line 4253  we set the flag only if there is a liter Line 4527  we set the flag only if there is a liter
4527          /* Check for a test for a named group's having been set, using the Perl          /* Check for a test for a named group's having been set, using the Perl
4528          syntax (?(<name>) or (?('name') */          syntax (?(<name>) or (?('name') */
4529    
4530          else if (ptr[1] == '<')          else if (ptr[1] == CHAR_LESS_THAN_SIGN)
4531            {            {
4532            terminator = '>';            terminator = CHAR_GREATER_THAN_SIGN;
4533            ptr++;            ptr++;
4534            }            }
4535          else if (ptr[1] == '\'')          else if (ptr[1] == CHAR_APOSTROPHE)
4536            {            {
4537            terminator = '\'';            terminator = CHAR_APOSTROPHE;
4538            ptr++;            ptr++;
4539            }            }
4540          else          else
4541            {            {
4542            terminator = 0;            terminator = 0;
4543            if (ptr[1] == '-' || ptr[1] == '+') refsign = *(++ptr);            if (ptr[1] == CHAR_MINUS || ptr[1] == CHAR_PLUS) refsign = *(++ptr);
4544            }            }
4545    
4546          /* We now expect to read a name; any thing else is an error */          /* We now expect to read a name; any thing else is an error */
# Line 4286  we set the flag only if there is a liter Line 4560  we set the flag only if there is a liter
4560            {            {
4561            if (recno >= 0)            if (recno >= 0)
4562              recno = ((digitab[*ptr] & ctype_digit) != 0)?              recno = ((digitab[*ptr] & ctype_digit) != 0)?
4563                recno * 10 + *ptr - '0' : -1;                recno * 10 + *ptr - CHAR_0 : -1;
4564            ptr++;            ptr++;
4565            }            }
4566          namelen = ptr - name;          namelen = ptr - name;
4567    
4568          if ((terminator > 0 && *ptr++ != terminator) || *ptr++ != ')')          if ((terminator > 0 && *ptr++ != terminator) ||
4569                *ptr++ != CHAR_RIGHT_PARENTHESIS)
4570            {            {
4571            ptr--;      /* Error offset */            ptr--;      /* Error offset */
4572            *errorcodeptr = ERR26;            *errorcodeptr = ERR26;
# Line 4313  we set the flag only if there is a liter Line 4588  we set the flag only if there is a liter
4588              *errorcodeptr = ERR58;              *errorcodeptr = ERR58;
4589              goto FAILED;              goto FAILED;
4590              }              }
4591            recno = (refsign == '-')?            recno = (refsign == CHAR_MINUS)?
4592              cd->bracount - recno + 1 : recno +cd->bracount;              cd->bracount - recno + 1 : recno +cd->bracount;
4593            if (recno <= 0 || recno > cd->final_bracount)            if (recno <= 0 || recno > cd->final_bracount)
4594              {              {
# Line 4344  we set the flag only if there is a liter Line 4619  we set the flag only if there is a liter
4619    
4620          /* Search the pattern for a forward reference */          /* Search the pattern for a forward reference */
4621    
4622          else if ((i = find_parens(ptr, cd->bracount, name, namelen,          else if ((i = find_parens(cd, name, namelen,
4623                          (options & PCRE_EXTENDED) != 0)) > 0)                          (options & PCRE_EXTENDED) != 0)) > 0)
4624            {            {
4625            PUT2(code, 2+LINK_SIZE, i);            PUT2(code, 2+LINK_SIZE, i);
# Line 4365  we set the flag only if there is a liter Line 4640  we set the flag only if there is a liter
4640          /* Check for (?(R) for recursion. Allow digits after R to specify a          /* Check for (?(R) for recursion. Allow digits after R to specify a
4641          specific group number. */          specific group number. */
4642    
4643          else if (*name == 'R')          else if (*name == CHAR_R)
4644            {            {
4645            recno = 0;            recno = 0;
4646            for (i = 1; i < namelen; i++)            for (i = 1; i < namelen; i++)
# Line 4375  we set the flag only if there is a liter Line 4650  we set the flag only if there is a liter
4650                *errorcodeptr = ERR15;                *errorcodeptr = ERR15;
4651                goto FAILED;                goto FAILED;
4652                }                }
4653              recno = recno * 10 + name[i] - '0';              recno = recno * 10 + name[i] - CHAR_0;
4654              }              }
4655            if (recno == 0) recno = RREF_ANY;            if (recno == 0) recno = RREF_ANY;
4656            code[1+LINK_SIZE] = OP_RREF;      /* Change test type */            code[1+LINK_SIZE] = OP_RREF;      /* Change test type */
# Line 4385  we set the flag only if there is a liter Line 4660  we set the flag only if there is a liter
4660          /* Similarly, check for the (?(DEFINE) "condition", which is always          /* Similarly, check for the (?(DEFINE) "condition", which is always
4661          false. */          false. */
4662    
4663          else if (namelen == 6 && strncmp((char *)name, "DEFINE", 6) == 0)          else if (namelen == 6 && strncmp((char *)name, STRING_DEFINE, 6) == 0)
4664            {            {
4665            code[1+LINK_SIZE] = OP_DEF;            code[1+LINK_SIZE] = OP_DEF;
4666            skipbytes = 1;            skipbytes = 1;
# Line 4410  we set the flag only if there is a liter Line 4685  we set the flag only if there is a liter
4685    
4686    
4687          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4688          case '=':                 /* Positive lookahead */          case CHAR_EQUALS_SIGN:                 /* Positive lookahead */
4689          bravalue = OP_ASSERT;          bravalue = OP_ASSERT;
4690          ptr++;          ptr++;
4691          break;          break;
4692    
4693    
4694          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4695          case '!':                 /* Negative lookahead */          case CHAR_EXCLAMATION_MARK:            /* Negative lookahead */
4696          ptr++;          ptr++;
4697          if (*ptr == ')')          /* Optimize (?!) */          if (*ptr == CHAR_RIGHT_PARENTHESIS)    /* Optimize (?!) */
4698            {            {
4699            *code++ = OP_FAIL;            *code++ = OP_FAIL;
4700            previous = NULL;            previous = NULL;
# Line 4430  we set the flag only if there is a liter Line 4705  we set the flag only if there is a liter
4705    
4706    
4707          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4708          case '<':                 /* Lookbehind or named define */          case CHAR_LESS_THAN_SIGN:              /* Lookbehind or named define */
4709          switch (ptr[1])          switch (ptr[1])
4710            {            {
4711            case '=':               /* Positive lookbehind */            case CHAR_EQUALS_SIGN:               /* Positive lookbehind */
4712            bravalue = OP_ASSERTBACK;            bravalue = OP_ASSERTBACK;
4713            ptr += 2;            ptr += 2;
4714            break;            break;
4715    
4716            case '!':               /* Negative lookbehind */            case CHAR_EXCLAMATION_MARK:          /* Negative lookbehind */
4717            bravalue = OP_ASSERTBACK_NOT;            bravalue = OP_ASSERTBACK_NOT;
4718            ptr += 2;            ptr += 2;
4719            break;            break;
# Line 4453  we set the flag only if there is a liter Line 4728  we set the flag only if there is a liter
4728    
4729    
4730          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4731          case '>':                 /* One-time brackets */          case CHAR_GREATER_THAN_SIGN:           /* One-time brackets */
4732          bravalue = OP_ONCE;          bravalue = OP_ONCE;
4733          ptr++;          ptr++;
4734          break;          break;
4735    
4736    
4737          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4738          case 'C':                 /* Callout - may be followed by digits; */          case CHAR_C:                 /* Callout - may be followed by digits; */
4739          previous_callout = code;  /* Save for later completion */          previous_callout = code;  /* Save for later completion */
4740          after_manual_callout = 1; /* Skip one item before completing */          after_manual_callout = 1; /* Skip one item before completing */
4741          *code++ = OP_CALLOUT;          *code++ = OP_CALLOUT;
4742            {            {
4743            int n = 0;            int n = 0;
4744            while ((digitab[*(++ptr)] & ctype_digit) != 0)            while ((digitab[*(++ptr)] & ctype_digit) != 0)
4745              n = n * 10 + *ptr - '0';              n = n * 10 + *ptr - CHAR_0;
4746            if (*ptr != ')')            if (*ptr != CHAR_RIGHT_PARENTHESIS)
4747              {              {
4748              *errorcodeptr = ERR39;              *errorcodeptr = ERR39;
4749              goto FAILED;              goto FAILED;
# Line 4488  we set the flag only if there is a liter Line 4763  we set the flag only if there is a liter
4763    
4764    
4765          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4766          case 'P':                 /* Python-style named subpattern handling */          case CHAR_P:              /* Python-style named subpattern handling */
4767          if (*(++ptr) == '=' || *ptr == '>')  /* Reference or recursion */          if (*(++ptr) == CHAR_EQUALS_SIGN ||
4768                *ptr == CHAR_GREATER_THAN_SIGN)  /* Reference or recursion */
4769            {            {
4770            is_recurse = *ptr == '>';            is_recurse = *ptr == CHAR_GREATER_THAN_SIGN;
4771            terminator = ')';            terminator = CHAR_RIGHT_PARENTHESIS;
4772            goto NAMED_REF_OR_RECURSE;            goto NAMED_REF_OR_RECURSE;
4773            }            }
4774          else if (*ptr != '<')    /* Test for Python-style definition */          else if (*ptr != CHAR_LESS_THAN_SIGN)  /* Test for Python-style defn */
4775            {            {
4776            *errorcodeptr = ERR41;            *errorcodeptr = ERR41;
4777            goto FAILED;            goto FAILED;
# Line 4505  we set the flag only if there is a liter Line 4781  we set the flag only if there is a liter
4781    
4782          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4783          DEFINE_NAME:    /* Come here from (?< handling */          DEFINE_NAME:    /* Come here from (?< handling */
4784          case '\'':          case CHAR_APOSTROPHE:
4785            {            {
4786            terminator = (*ptr == '<')? '>' : '\'';            terminator = (*ptr == CHAR_LESS_THAN_SIGN)?
4787                CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;
4788            name = ++ptr;            name = ++ptr;
4789    
4790            while ((cd->ctypes[*ptr] & ctype_word) != 0) ptr++;            while ((cd->ctypes[*ptr] & ctype_word) != 0) ptr++;
# Line 4581  we set the flag only if there is a liter Line 4858  we set the flag only if there is a liter
4858    
4859    
4860          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4861          case '&':                 /* Perl recursion/subroutine syntax */          case CHAR_AMPERSAND:            /* Perl recursion/subroutine syntax */
4862          terminator = ')';          terminator = CHAR_RIGHT_PARENTHESIS;
4863          is_recurse = TRUE;          is_recurse = TRUE;
4864          /* Fall through */          /* Fall through */
4865    
# Line 4641  we set the flag only if there is a liter Line 4918  we set the flag only if there is a liter
4918              recno = GET2(slot, 0);              recno = GET2(slot, 0);
4919              }              }
4920            else if ((recno =                /* Forward back reference */            else if ((recno =                /* Forward back reference */
4921                      find_parens(ptr, cd->bracount, name, namelen,                      find_parens(cd, name, namelen,
4922                        (options & PCRE_EXTENDED) != 0)) <= 0)                        (options & PCRE_EXTENDED) != 0)) <= 0)
4923              {              {
4924              *errorcodeptr = ERR15;              *errorcodeptr = ERR15;
# Line 4657  we set the flag only if there is a liter Line 4934  we set the flag only if there is a liter
4934    
4935    
4936          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4937          case 'R':                 /* Recursion */          case CHAR_R:              /* Recursion */
4938          ptr++;                    /* Same as (?0)      */          ptr++;                    /* Same as (?0)      */
4939          /* Fall through */          /* Fall through */
4940    
4941    
4942          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4943          case '-': case '+':          case CHAR_MINUS: case CHAR_PLUS:  /* Recursion or subroutine */
4944          case '0': case '1': case '2': case '3': case '4':   /* Recursion or */          case CHAR_0: case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4:
4945          case '5': case '6': case '7': case '8': case '9':   /* subroutine */          case CHAR_5: case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9:
4946            {            {
4947            const uschar *called;            const uschar *called;
4948            terminator = ')';            terminator = CHAR_RIGHT_PARENTHESIS;
4949    
4950            /* Come here from the \g<...> and \g'...' code (Oniguruma            /* Come here from the \g<...> and \g'...' code (Oniguruma
4951            compatibility). However, the syntax has been checked to ensure that            compatibility). However, the syntax has been checked to ensure that
4952            the ... are a (signed) number, so that neither ERR63 nor ERR29 will            the ... are a (signed) number, so that neither ERR63 nor ERR29 will
4953            be called on this path, nor with the jump to OTHER_CHAR_AFTER_QUERY            be called on this path, nor with the jump to OTHER_CHAR_AFTER_QUERY
4954            ever be taken. */            ever be taken. */
   
           HANDLE_NUMERICAL_RECURSION:  
4955    
4956            if ((refsign = *ptr) == '+')            HANDLE_NUMERICAL_RECURSION:
4957    
4958              if ((refsign = *ptr) == CHAR_PLUS)
4959              {              {
4960              ptr++;              ptr++;
4961              if ((digitab[*ptr] & ctype_digit) == 0)              if ((digitab[*ptr] & ctype_digit) == 0)
# Line 4687  we set the flag only if there is a liter Line 4964  we set the flag only if there is a liter
4964                goto FAILED;                goto FAILED;
4965                }                }
4966              }              }
4967            else if (refsign == '-')            else if (refsign == CHAR_MINUS)
4968              {              {
4969              if ((digitab[ptr[1]] & ctype_digit) == 0)              if ((digitab[ptr[1]] & ctype_digit) == 0)
4970                goto OTHER_CHAR_AFTER_QUERY;                goto OTHER_CHAR_AFTER_QUERY;
# Line 4696  we set the flag only if there is a liter Line 4973  we set the flag only if there is a liter
4973    
4974            recno = 0;            recno = 0;
4975            while((digitab[*ptr] & ctype_digit) != 0)            while((digitab[*ptr] & ctype_digit) != 0)
4976              recno = recno * 10 + *ptr++ - '0';              recno = recno * 10 + *ptr++ - CHAR_0;
4977    
4978            if (*ptr != terminator)            if (*ptr != terminator)
4979              {              {
# Line 4704  we set the flag only if there is a liter Line 4981  we set the flag only if there is a liter
4981              goto FAILED;              goto FAILED;
4982              }              }
4983    
4984            if (refsign == '-')            if (refsign == CHAR_MINUS)
4985              {              {
4986              if (recno == 0)              if (recno == 0)
4987                {                {
# Line 4718  we set the flag only if there is a liter Line 4995  we set the flag only if there is a liter
4995                goto FAILED;                goto FAILED;
4996                }                }
4997              }              }
4998            else if (refsign == '+')            else if (refsign == CHAR_PLUS)
4999              {              {
5000              if (recno == 0)              if (recno == 0)
5001                {                {
# Line 4751  we set the flag only if there is a liter Line 5028  we set the flag only if there is a liter
5028    
5029              if (called == NULL)              if (called == NULL)
5030                {                {
5031                if (find_parens(ptr, cd->bracount, NULL, recno,                if (find_parens(cd, NULL, recno,
5032                     (options & PCRE_EXTENDED) != 0) < 0)                      (options & PCRE_EXTENDED) != 0) < 0)
5033                  {                  {
5034                  *errorcodeptr = ERR15;                  *errorcodeptr = ERR15;
5035                  goto FAILED;                  goto FAILED;
# Line 4804  we set the flag only if there is a liter Line 5081  we set the flag only if there is a liter
5081          set = unset = 0;          set = unset = 0;
5082          optset = &set;          optset = &set;
5083    
5084          while (*ptr != ')' && *ptr != ':')          while (*ptr != CHAR_RIGHT_PARENTHESIS && *ptr != CHAR_COLON)
5085            {            {
5086            switch (*ptr++)            switch (*ptr++)
5087              {              {
5088              case '-': optset = &unset; break;              case CHAR_MINUS: optset = &unset; break;
5089    
5090              case 'J':    /* Record that it changed in the external options */              case CHAR_J:    /* Record that it changed in the external options */
5091              *optset |= PCRE_DUPNAMES;              *optset |= PCRE_DUPNAMES;
5092              cd->external_flags |= PCRE_JCHANGED;              cd->external_flags |= PCRE_JCHANGED;
5093              break;              break;
5094    
5095              case 'i': *optset |= PCRE_CASELESS; break;              case CHAR_i: *optset |= PCRE_CASELESS; break;
5096              case 'm': *optset |= PCRE_MULTILINE; break;              case CHAR_m: *optset |= PCRE_MULTILINE; break;
5097              case 's': *optset |= PCRE_DOTALL; break;              case CHAR_s: *optset |= PCRE_DOTALL; break;
5098              case 'x': *optset |= PCRE_EXTENDED; break;              case CHAR_x: *optset |= PCRE_EXTENDED; break;
5099              case 'U': *optset |= PCRE_UNGREEDY; break;              case CHAR_U: *optset |= PCRE_UNGREEDY; break;
5100              case 'X': *optset |= PCRE_EXTRA; break;              case CHAR_X: *optset |= PCRE_EXTRA; break;
5101    
5102              default:  *errorcodeptr = ERR12;              default:  *errorcodeptr = ERR12;
5103                        ptr--;    /* Correct the offset */                        ptr--;    /* Correct the offset */
# Line 4851  we set the flag only if there is a liter Line 5128  we set the flag only if there is a liter
5128          both phases.          both phases.
5129    
5130          If we are not at the pattern start, compile code to change the ims          If we are not at the pattern start, compile code to change the ims
5131          options if this setting actually changes any of them. We also pass the          options if this setting actually changes any of them, and reset the
5132          new setting back so that it can be put at the start of any following          greedy defaults and the case value for firstbyte and reqbyte. */
         branches, and when this group ends (if we are in a group), a resetting  
         item can be compiled. */  
5133    
5134          if (*ptr == ')')          if (*ptr == CHAR_RIGHT_PARENTHESIS)
5135            {            {
5136            if (code == cd->start_code + 1 + LINK_SIZE &&            if (code == cd->start_code + 1 + LINK_SIZE &&
5137                 (lengthptr == NULL || *lengthptr == 2 + 2*LINK_SIZE))                 (lengthptr == NULL || *lengthptr == 2 + 2*LINK_SIZE))
5138              {              {
5139              cd->external_options = newoptions;              cd->external_options = newoptions;
             options = newoptions;  
5140              }              }
5141           else           else
5142              {              {
# Line 4871  we set the flag only if there is a liter Line 5145  we set the flag only if there is a liter
5145                *code++ = OP_OPT;                *code++ = OP_OPT;
5146                *code++ = newoptions & PCRE_IMS;                *code++ = newoptions & PCRE_IMS;
5147                }                }
   
             /* Change options at this level, and pass them back for use  
             in subsequent branches. Reset the greedy defaults and the case  
             value for firstbyte and reqbyte. */  
   
             *optionsptr = options = newoptions;  
5148              greedy_default = ((newoptions & PCRE_UNGREEDY) != 0);              greedy_default = ((newoptions & PCRE_UNGREEDY) != 0);
5149              greedy_non_default = greedy_default ^ 1;              greedy_non_default = greedy_default ^ 1;
5150              req_caseopt = ((options & PCRE_CASELESS) != 0)? REQ_CASELESS : 0;              req_caseopt = ((newoptions & PCRE_CASELESS) != 0)? REQ_CASELESS : 0;
5151              }              }
5152    
5153              /* Change options at this level, and pass them back for use
5154              in subsequent branches. When not at the start of the pattern, this
5155              information is also necessary so that a resetting item can be
5156              compiled at the end of a group (if we are in a group). */
5157    
5158              *optionsptr = options = newoptions;
5159            previous = NULL;       /* This item can't be repeated */            previous = NULL;       /* This item can't be repeated */
5160            continue;              /* It is complete */            continue;              /* It is complete */
5161            }            }
# Line 4997  we set the flag only if there is a liter Line 5271  we set the flag only if there is a liter
5271    
5272      /* Error if hit end of pattern */      /* Error if hit end of pattern */
5273    
5274      if (*ptr != ')')      if (*ptr != CHAR_RIGHT_PARENTHESIS)
5275        {        {
5276        *errorcodeptr = ERR14;        *errorcodeptr = ERR14;
5277        goto FAILED;        goto FAILED;
# Line 5094  we set the flag only if there is a liter Line 5368  we set the flag only if there is a liter
5368      back references and those types that consume a character may be repeated.      back references and those types that consume a character may be repeated.
5369      We can test for values between ESC_b and ESC_Z for the latter; this may      We can test for values between ESC_b and ESC_Z for the latter; this may
5370      have to change if any new ones are ever created. */      have to change if any new ones are ever created. */
5371    
5372      case '\\':      case CHAR_BACKSLASH:
5373      tempptr = ptr;      tempptr = ptr;
5374      c = check_escape(&ptr, errorcodeptr, cd->bracount, options, FALSE);      c = check_escape(&ptr, errorcodeptr, cd->bracount, options, FALSE);
5375      if (*errorcodeptr != 0) goto FAILED;      if (*errorcodeptr != 0) goto FAILED;
# Line 5104  we set the flag only if there is a liter Line 5378  we set the flag only if there is a liter
5378        {        {
5379        if (-c == ESC_Q)            /* Handle start of quoted string */        if (-c == ESC_Q)            /* Handle start of quoted string */
5380          {          {
5381          if (ptr[1] == '\\' && ptr[2] == 'E') ptr += 2; /* avoid empty string */          if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)
5382            else inescq = TRUE;            ptr += 2;               /* avoid empty string */
5383                else inescq = TRUE;
5384          continue;          continue;
5385          }          }
5386    
# Line 5121  we set the flag only if there is a liter Line 5396  we set the flag only if there is a liter
5396    
5397        zerofirstbyte = firstbyte;        zerofirstbyte = firstbyte;
5398        zeroreqbyte = reqbyte;        zeroreqbyte = reqbyte;
5399    
5400        /* \g<name> or \g'name' is a subroutine call by name and \g<n> or \g'n'        /* \g<name> or \g'name' is a subroutine call by name and \g<n> or \g'n'
5401        is a subroutine call by number (Oniguruma syntax). In fact, the value        is a subroutine call by number (Oniguruma syntax). In fact, the value
5402        -ESC_g is returned only for these cases. So we don't need to check for <        -ESC_g is returned only for these cases. So we don't need to check for <
5403        or ' if the value is -ESC_g. For the Perl syntax \g{n} the value is        or ' if the value is -ESC_g. For the Perl syntax \g{n} the value is
5404        -ESC_REF+n, and for the Perl syntax \g{name} the result is -ESC_k (as        -ESC_REF+n, and for the Perl syntax \g{name} the result is -ESC_k (as
5405        that is a synonym for a named back reference). */        that is a synonym for a named back reference). */
5406    
5407        if (-c == ESC_g)        if (-c == ESC_g)
5408          {          {
5409          const uschar *p;          const uschar *p;
5410          save_hwm = cd->hwm;   /* Normally this is set when '(' is read */          save_hwm = cd->hwm;   /* Normally this is set when '(' is read */
5411          terminator = (*(++ptr) == '<')? '>' : '\'';          terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?
5412              CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;
5413    
5414          /* These two statements stop the compiler for warning about possibly          /* These two statements stop the compiler for warning about possibly
5415          unset variables caused by the jump to HANDLE_NUMERICAL_RECURSION. In          unset variables caused by the jump to HANDLE_NUMERICAL_RECURSION. In
5416          fact, because we actually check for a number below, the paths that          fact, because we actually check for a number below, the paths that
5417          would actually be in error are never taken. */          would actually be in error are never taken. */
5418    
5419          skipbytes = 0;          skipbytes = 0;
5420          reset_bracount = FALSE;          reset_bracount = FALSE;
5421    
5422          /* Test for a name */          /* Test for a name */
5423    
5424          if (ptr[1] != '+' && ptr[1] != '-')          if (ptr[1] != CHAR_PLUS && ptr[1] != CHAR_MINUS)
5425            {            {
5426            BOOL isnumber = TRUE;            BOOL isnumber = TRUE;
5427            for (p = ptr + 1; *p != 0 && *p != terminator; p++)            for (p = ptr + 1; *p != 0 && *p != terminator; p++)
5428              {              {
5429              if ((cd->ctypes[*p] & ctype_digit) == 0) isnumber = FALSE;              if ((cd->ctypes[*p] & ctype_digit) == 0) isnumber = FALSE;
5430              if ((cd->ctypes[*p] & ctype_word) == 0) break;              if ((cd->ctypes[*p] & ctype_word) == 0) break;
5431              }              }
5432            if (*p != terminator)            if (*p != terminator)
5433              {              {
5434              *errorcodeptr = ERR57;              *errorcodeptr = ERR57;
5435              break;              break;
5436              }              }
5437            if (isnumber)            if (isnumber)
5438              {              {
5439              ptr++;              ptr++;
5440              goto HANDLE_NUMERICAL_RECURSION;              goto HANDLE_NUMERICAL_RECURSION;
5441              }              }
5442            is_recurse = TRUE;            is_recurse = TRUE;
5443            goto NAMED_REF_OR_RECURSE;            goto NAMED_REF_OR_RECURSE;
5444            }            }
5445    
5446          /* Test a signed number in angle brackets or quotes. */          /* Test a signed number in angle brackets or quotes. */
5447    
5448          p = ptr + 2;          p = ptr + 2;
5449          while ((digitab[*p] & ctype_digit) != 0) p++;          while ((digitab[*p] & ctype_digit) != 0) p++;
5450          if (*p != terminator)          if (*p != terminator)
# Line 5176  we set the flag only if there is a liter Line 5452  we set the flag only if there is a liter
5452            *errorcodeptr = ERR57;            *errorcodeptr = ERR57;
5453            break;            break;
5454            }            }
5455          ptr++;          ptr++;
5456          goto HANDLE_NUMERICAL_RECURSION;          goto HANDLE_NUMERICAL_RECURSION;
5457          }          }
5458    
5459        /* \k<name> or \k'name' is a back reference by name (Perl syntax).        /* \k<name> or \k'name' is a back reference by name (Perl syntax).
5460        We also support \k{name} (.NET syntax) */        We also support \k{name} (.NET syntax) */
5461    
5462        if (-c == ESC_k && (ptr[1] == '<' || ptr[1] == '\'' || ptr[1] == '{'))        if (-c == ESC_k && (ptr[1] == CHAR_LESS_THAN_SIGN ||
5463              ptr[1] == CHAR_APOSTROPHE || ptr[1] == CHAR_LEFT_CURLY_BRACKET))
5464          {          {
5465          is_recurse = FALSE;          is_recurse = FALSE;
5466          terminator = (*(++ptr) == '<')? '>' : (*ptr == '\'')? '\'' : '}';          terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?
5467              CHAR_GREATER_THAN_SIGN : (*ptr == CHAR_APOSTROPHE)?
5468              CHAR_APOSTROPHE : CHAR_RIGHT_CURLY_BRACKET;
5469          goto NAMED_REF_OR_RECURSE;          goto NAMED_REF_OR_RECURSE;
5470          }          }
5471    
# Line 5289  we set the flag only if there is a liter Line 5568  we set the flag only if there is a liter
5568    
5569      /* Remember if \r or \n were seen */      /* Remember if \r or \n were seen */
5570    
5571      if (mcbuffer[0] == '\r' || mcbuffer[0] == '\n')      if (mcbuffer[0] == CHAR_CR || mcbuffer[0] == CHAR_NL)
5572        cd->external_flags |= PCRE_HASCRORLF;        cd->external_flags |= PCRE_HASCRORLF;
5573    
5574      /* Set the first and required bytes appropriately. If no previous first      /* Set the first and required bytes appropriately. If no previous first
# Line 5534  for (;;) Line 5813  for (;;)
5813    compile a resetting op-code following, except at the very end of the pattern.    compile a resetting op-code following, except at the very end of the pattern.
5814    Return leaving the pointer at the terminating char. */    Return leaving the pointer at the terminating char. */
5815    
5816    if (*ptr != '|')    if (*ptr != CHAR_VERTICAL_LINE)
5817      {      {
5818      if (lengthptr == NULL)      if (lengthptr == NULL)
5819        {        {
# Line 5557  for (;;) Line 5836  for (;;)
5836    
5837      /* Resetting option if needed */      /* Resetting option if needed */
5838    
5839      if ((options & PCRE_IMS) != oldims && *ptr == ')')      if ((options & PCRE_IMS) != oldims && *ptr == CHAR_RIGHT_PARENTHESIS)
5840        {        {
5841        *code++ = OP_OPT;        *code++ = OP_OPT;
5842        *code++ = oldims;        *code++ = oldims;
# Line 5686  do { Line 5965  do {
5965       if (!is_anchored(scode, options, bracket_map, backref_map)) return FALSE;       if (!is_anchored(scode, options, bracket_map, backref_map)) return FALSE;
5966       }       }
5967    
5968     /* .* is not anchored unless DOTALL is set and it isn't in brackets that     /* .* is not anchored unless DOTALL is set (which generates OP_ALLANY) and
5969     are or may be referenced. */     it isn't in brackets that are or may be referenced. */
5970    
5971     else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR ||     else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR ||
5972               op == OP_TYPEPOSSTAR) &&               op == OP_TYPEPOSSTAR))
             (*options & PCRE_DOTALL) != 0)  
5973       {       {
5974       if (scode[1] != OP_ANY || (bracket_map & backref_map) != 0) return FALSE;       if (scode[1] != OP_ALLANY || (bracket_map & backref_map) != 0)
5975           return FALSE;
5976       }       }
5977    
5978     /* Check for explicit anchoring */     /* Check for explicit anchoring */
# Line 5739  do { Line 6018  do {
6018       NULL, 0, FALSE);       NULL, 0, FALSE);
6019     register int op = *scode;     register int op = *scode;
6020    
6021       /* If we are at the start of a conditional assertion group, *both* the
6022       conditional assertion *and* what follows the condition must satisfy the test
6023       for start of line. Other kinds of condition fail. Note that there may be an
6024       auto-callout at the start of a condition. */
6025    
6026       if (op == OP_COND)
6027         {
6028         scode += 1 + LINK_SIZE;
6029         if (*scode == OP_CALLOUT) scode += _pcre_OP_lengths[OP_CALLOUT];
6030         switch (*scode)
6031           {
6032           case OP_CREF:
6033           case OP_RREF:
6034           case OP_DEF:
6035           return FALSE;
6036    
6037           default:     /* Assertion */
6038           if (!is_startline(scode, bracket_map, backref_map)) return FALSE;
6039           do scode += GET(scode, 1); while (*scode == OP_ALT);
6040           scode += 1 + LINK_SIZE;
6041           break;
6042           }
6043         scode = first_significant_code(scode, NULL, 0, FALSE);
6044         op = *scode;
6045         }
6046    
6047     /* Non-capturing brackets */     /* Non-capturing brackets */
6048    
6049     if (op == OP_BRA)     if (op == OP_BRA)
# Line 5757  do { Line 6062  do {
6062    
6063     /* Other brackets */     /* Other brackets */
6064    
6065     else if (op == OP_ASSERT || op == OP_ONCE || op == OP_COND)     else if (op == OP_ASSERT || op == OP_ONCE)
6066       { if (!is_startline(scode, bracket_map, backref_map)) return FALSE; }       {
6067         if (!is_startline(scode, bracket_map, backref_map)) return FALSE;
6068         }
6069    
6070     /* .* means "start at start or after \n" if it isn't in brackets that     /* .* means "start at start or after \n" if it isn't in brackets that
6071     may be referenced. */     may be referenced. */
# Line 5875  Returns:        pointer to compiled data Line 6182  Returns:        pointer to compiled data
6182                  with errorptr and erroroffset set                  with errorptr and erroroffset set
6183  */  */
6184    
6185  PCRE_EXP_DEFN pcre *  PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION
6186  pcre_compile(const char *pattern, int options, const char **errorptr,  pcre_compile(const char *pattern, int options, const char **errorptr,
6187    int *erroroffset, const unsigned char *tables)    int *erroroffset, const unsigned char *tables)
6188  {  {
# Line 5883  return pcre_compile2(pattern, options, N Line 6190  return pcre_compile2(pattern, options, N
6190  }  }
6191    
6192    
6193  PCRE_EXP_DEFN pcre *  PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION
6194  pcre_compile2(const char *pattern, int options, int *errorcodeptr,  pcre_compile2(const char *pattern, int options, int *errorcodeptr,
6195    const char **errorptr, int *erroroffset, const unsigned char *tables)    const char **errorptr, int *erroroffset, const unsigned char *tables)
6196  {  {
# Line 5937  if (erroroffset == NULL) Line 6244  if (erroroffset == NULL)
6244    
6245  *erroroffset = 0;  *erroroffset = 0;
6246    
 /* Can't support UTF8 unless PCRE has been compiled to include the code. */  
   
 #ifdef SUPPORT_UTF8  
 utf8 = (options & PCRE_UTF8) != 0;  
 if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 &&  
      (*erroroffset = _pcre_valid_utf8((uschar *)pattern, -1)) >= 0)  
   {  
   errorcode = ERR44;  
   goto PCRE_EARLY_ERROR_RETURN2;  
   }  
 #else  
 if ((options & PCRE_UTF8) != 0)  
   {  
   errorcode = ERR32;  
   goto PCRE_EARLY_ERROR_RETURN;  
   }  
 #endif  
   
 if ((options & ~PUBLIC_OPTIONS) != 0)  
   {  
   errorcode = ERR17;  
   goto PCRE_EARLY_ERROR_RETURN;  
   }  
   
6247  /* Set up pointers to the individual character tables */  /* Set up pointers to the individual character tables */
6248    
6249  if (tables == NULL) tables = _pcre_default_tables;  if (tables == NULL) tables = _pcre_default_tables;
# Line 5969  cd->fcc = tables + fcc_offset; Line 6252  cd->fcc = tables + fcc_offset;
6252  cd->cbits = tables + cbits_offset;  cd->cbits = tables + cbits_offset;
6253  cd->ctypes = tables + ctypes_offset;  cd->ctypes = tables + ctypes_offset;
6254    
6255    /* Check that all undefined public option bits are zero */
6256    
6257    if ((options & ~PUBLIC_COMPILE_OPTIONS) != 0)
6258      {
6259      errorcode = ERR17;
6260      goto PCRE_EARLY_ERROR_RETURN;
6261      }
6262    
6263  /* Check for global one-time settings at the start of the pattern, and remember  /* Check for global one-time settings at the start of the pattern, and remember
6264  the offset for later. */  the offset for later. */
6265    
6266  while (ptr[skipatstart] == '(' && ptr[skipatstart+1] == '*')  while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&
6267           ptr[skipatstart+1] == CHAR_ASTERISK)
6268    {    {
6269    int newnl = 0;    int newnl = 0;
6270    int newbsr = 0;    int newbsr = 0;
6271    
6272    if (strncmp((char *)(ptr+skipatstart+2), "CR)", 3) == 0)    if (strncmp((char *)(ptr+skipatstart+2), STRING_UTF8_RIGHTPAR, 5) == 0)
6273        { skipatstart += 7; options |= PCRE_UTF8; continue; }
6274    
6275      if (strncmp((char *)(ptr+skipatstart+2), STRING_CR_RIGHTPAR, 3) == 0)
6276      { skipatstart += 5; newnl = PCRE_NEWLINE_CR; }      { skipatstart += 5; newnl = PCRE_NEWLINE_CR; }
6277    else if (strncmp((char *)(ptr+skipatstart+2), "LF)", 3)  == 0)    else if (strncmp((char *)(ptr+skipatstart+2), STRING_LF_RIGHTPAR, 3)  == 0)
6278      { skipatstart += 5; newnl = PCRE_NEWLINE_LF; }      { skipatstart += 5; newnl = PCRE_NEWLINE_LF; }
6279    else if (strncmp((char *)(ptr+skipatstart+2), "CRLF)", 5)  == 0)    else if (strncmp((char *)(ptr+skipatstart+2), STRING_CRLF_RIGHTPAR, 5)  == 0)
6280      { skipatstart += 7; newnl = PCRE_NEWLINE_CR + PCRE_NEWLINE_LF; }      { skipatstart += 7; newnl = PCRE_NEWLINE_CR + PCRE_NEWLINE_LF; }
6281    else if (strncmp((char *)(ptr+skipatstart+2), "ANY)", 4) == 0)    else if (strncmp((char *)(ptr+skipatstart+2), STRING_ANY_RIGHTPAR, 4) == 0)
6282      { skipatstart += 6; newnl = PCRE_NEWLINE_ANY; }      { skipatstart += 6; newnl = PCRE_NEWLINE_ANY; }
6283    else if (strncmp((char *)(ptr+skipatstart+2), "ANYCRLF)", 8)  == 0)    else if (strncmp((char *)(ptr+skipatstart+2), STRING_ANYCRLF_RIGHTPAR, 8) == 0)
6284      { skipatstart += 10; newnl = PCRE_NEWLINE_ANYCRLF; }      { skipatstart += 10; newnl = PCRE_NEWLINE_ANYCRLF; }
6285    
6286    else if (strncmp((char *)(ptr+skipatstart+2), "BSR_ANYCRLF)", 12) == 0)    else if (strncmp((char *)(ptr+skipatstart+2), STRING_BSR_ANYCRLF_RIGHTPAR, 12) == 0)
6287      { skipatstart += 14; newbsr = PCRE_BSR_ANYCRLF; }      { skipatstart += 14; newbsr = PCRE_BSR_ANYCRLF; }
6288    else if (strncmp((char *)(ptr+skipatstart+2), "BSR_UNICODE)", 12) == 0)    else if (strncmp((char *)(ptr+skipatstart+2), STRING_BSR_UNICODE_RIGHTPAR, 12) == 0)
6289      { skipatstart += 14; newbsr = PCRE_BSR_UNICODE; }      { skipatstart += 14; newbsr = PCRE_BSR_UNICODE; }
6290    
6291    if (newnl != 0)    if (newnl != 0)
# Line 6000  while (ptr[skipatstart] == '(' && ptr[sk Line 6295  while (ptr[skipatstart] == '(' && ptr[sk
6295    else break;    else break;
6296    }    }
6297    
6298    /* Can't support UTF8 unless PCRE has been compiled to include the code. */
6299    
6300    #ifdef SUPPORT_UTF8
6301    utf8 = (options & PCRE_UTF8) != 0;
6302    if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 &&
6303         (*erroroffset = _pcre_valid_utf8((uschar *)pattern, -1)) >= 0)
6304      {
6305      errorcode = ERR44;
6306      goto PCRE_EARLY_ERROR_RETURN2;
6307      }
6308    #else
6309    if ((options & PCRE_UTF8) != 0)
6310      {
6311      errorcode = ERR32;
6312      goto PCRE_EARLY_ERROR_RETURN;
6313      }
6314    #endif
6315    
6316  /* Check validity of \R options. */  /* Check validity of \R options. */
6317    
6318  switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))  switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
# Line 6018  current code allows for fixed one- or tw Line 6331  current code allows for fixed one- or tw
6331  switch (options & PCRE_NEWLINE_BITS)  switch (options & PCRE_NEWLINE_BITS)
6332    {    {
6333    case 0: newline = NEWLINE; break;   /* Build-time default */    case 0: newline = NEWLINE; break;   /* Build-time default */
6334    case PCRE_NEWLINE_CR: newline = '\r'; break;    case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
6335    case PCRE_NEWLINE_LF: newline = '\n'; break;    case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
6336    case PCRE_NEWLINE_CR+    case PCRE_NEWLINE_CR+
6337         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;         PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
6338    case PCRE_NEWLINE_ANY: newline = -1; break;    case PCRE_NEWLINE_ANY: newline = -1; break;
6339    case PCRE_NEWLINE_ANYCRLF: newline = -2; break;    case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
6340    default: errorcode = ERR56; goto PCRE_EARLY_ERROR_RETURN;    default: errorcode = ERR56; goto PCRE_EARLY_ERROR_RETURN;

Legend:
Removed from v.334  
changed lines
  Added in v.426

  ViewVC Help
Powered by ViewVC 1.1.5