/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 903 by ph10, Sat Jan 21 16:37:17 2012 UTC revision 1041 by ph10, Sun Sep 16 10:16:27 2012 UTC
# Line 489  static const char error_texts[] = Line 489  static const char error_texts[] =
489    "too many forward references\0"    "too many forward references\0"
490    "disallowed Unicode code point (>= 0xd800 && <= 0xdfff)\0"    "disallowed Unicode code point (>= 0xd800 && <= 0xdfff)\0"
491    "invalid UTF-16 string\0"    "invalid UTF-16 string\0"
492      /* 75 */
493      "name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0"
494      "character value in \\u.... sequence is too large\0"
495    ;    ;
496    
497  /* Table to identify digits and hex digits. This is used when compiling  /* Table to identify digits and hex digits. This is used when compiling
# Line 786  else if ((i = escapes[c - CHAR_0]) != 0) Line 789  else if ((i = escapes[c - CHAR_0]) != 0)
789    
790  #else           /* EBCDIC coding */  #else           /* EBCDIC coding */
791  /* Not alphanumeric */  /* Not alphanumeric */
792  else if (c < 'a' || (!MAX_255(c) || (ebcdic_chartab[c] & 0x0E) == 0)) {}  else if (c < CHAR_a || (!MAX_255(c) || (ebcdic_chartab[c] & 0x0E) == 0)) {}
793  else if ((i = escapes[c - 0x48]) != 0)  c = i;  else if ((i = escapes[c - 0x48]) != 0)  c = i;
794  #endif  #endif
795    
# Line 829  else Line 832  else
832            c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));            c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
833  #endif  #endif
834            }            }
835    
836    #ifdef COMPILE_PCRE8
837            if (c > (utf ? 0x10ffff : 0xff))
838    #else
839    #ifdef COMPILE_PCRE16
840            if (c > (utf ? 0x10ffff : 0xffff))
841    #endif
842    #endif
843              {
844              *errorcodeptr = ERR76;
845              }
846            else if (utf && c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73;
847          }          }
848        }        }
849      else      else
# Line 2225  for (;;) Line 2240  for (;;)
2240        {        {
2241        case OP_CHAR:        case OP_CHAR:
2242        case OP_CHARI:        case OP_CHARI:
2243          case OP_NOT:
2244          case OP_NOTI:
2245        case OP_EXACT:        case OP_EXACT:
2246        case OP_EXACTI:        case OP_EXACTI:
2247          case OP_NOTEXACT:
2248          case OP_NOTEXACTI:
2249        case OP_UPTO:        case OP_UPTO:
2250        case OP_UPTOI:        case OP_UPTOI:
2251          case OP_NOTUPTO:
2252          case OP_NOTUPTOI:
2253        case OP_MINUPTO:        case OP_MINUPTO:
2254        case OP_MINUPTOI:        case OP_MINUPTOI:
2255          case OP_NOTMINUPTO:
2256          case OP_NOTMINUPTOI:
2257        case OP_POSUPTO:        case OP_POSUPTO:
2258        case OP_POSUPTOI:        case OP_POSUPTOI:
2259          case OP_NOTPOSUPTO:
2260          case OP_NOTPOSUPTOI:
2261        case OP_STAR:        case OP_STAR:
2262        case OP_STARI:        case OP_STARI:
2263          case OP_NOTSTAR:
2264          case OP_NOTSTARI:
2265        case OP_MINSTAR:        case OP_MINSTAR:
2266        case OP_MINSTARI:        case OP_MINSTARI:
2267          case OP_NOTMINSTAR:
2268          case OP_NOTMINSTARI:
2269        case OP_POSSTAR:        case OP_POSSTAR:
2270        case OP_POSSTARI:        case OP_POSSTARI:
2271          case OP_NOTPOSSTAR:
2272          case OP_NOTPOSSTARI:
2273        case OP_PLUS:        case OP_PLUS:
2274        case OP_PLUSI:        case OP_PLUSI:
2275          case OP_NOTPLUS:
2276          case OP_NOTPLUSI:
2277        case OP_MINPLUS:        case OP_MINPLUS:
2278        case OP_MINPLUSI:        case OP_MINPLUSI:
2279          case OP_NOTMINPLUS:
2280          case OP_NOTMINPLUSI:
2281        case OP_POSPLUS:        case OP_POSPLUS:
2282        case OP_POSPLUSI:        case OP_POSPLUSI:
2283          case OP_NOTPOSPLUS:
2284          case OP_NOTPOSPLUSI:
2285        case OP_QUERY:        case OP_QUERY:
2286        case OP_QUERYI:        case OP_QUERYI:
2287          case OP_NOTQUERY:
2288          case OP_NOTQUERYI:
2289        case OP_MINQUERY:        case OP_MINQUERY:
2290        case OP_MINQUERYI:        case OP_MINQUERYI:
2291          case OP_NOTMINQUERY:
2292          case OP_NOTMINQUERYI:
2293        case OP_POSQUERY:        case OP_POSQUERY:
2294        case OP_POSQUERYI:        case OP_POSQUERYI:
2295          case OP_NOTPOSQUERY:
2296          case OP_NOTPOSQUERYI:
2297        if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);        if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
2298        break;        break;
2299        }        }
# Line 3067  if (next >= 0) switch(op_code) Line 3110  if (next >= 0) switch(op_code)
3110      }      }
3111    else    else
3112  #endif  /* SUPPORT_UTF */  #endif  /* SUPPORT_UTF */
3113    return (c != TABLE_GET(next, cd->fcc, next));  /* Non-UTF-8 mode */    return (c != TABLE_GET((unsigned int)next, cd->fcc, next));  /* Non-UTF-8 mode */
   
   /* For OP_NOT and OP_NOTI, the data is always a single-byte character. These  
   opcodes are not used for multi-byte characters, because they are coded using  
   an XCLASS instead. */  
3114    
3115    case OP_NOT:    case OP_NOT:
3116    return (c = *previous) == next;  #ifdef SUPPORT_UTF
3117      GETCHARTEST(c, previous);
3118    #else
3119      c = *previous;
3120    #endif
3121      return c == next;
3122    
3123    case OP_NOTI:    case OP_NOTI:
3124    if ((c = *previous) == next) return TRUE;  #ifdef SUPPORT_UTF
3125      GETCHARTEST(c, previous);
3126    #else
3127      c = *previous;
3128    #endif
3129      if (c == next) return TRUE;
3130  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
3131    if (utf)    if (utf)
3132      {      {
3133      unsigned int othercase;      unsigned int othercase;
3134      if (next < 128) othercase = cd->fcc[next]; else      if (next < 128) othercase = cd->fcc[next]; else
3135  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3136      othercase = UCD_OTHERCASE(next);      othercase = UCD_OTHERCASE((unsigned int)next);
3137  #else  #else
3138      othercase = NOTACHAR;      othercase = NOTACHAR;
3139  #endif  #endif
# Line 3092  if (next >= 0) switch(op_code) Line 3141  if (next >= 0) switch(op_code)
3141      }      }
3142    else    else
3143  #endif  /* SUPPORT_UTF */  #endif  /* SUPPORT_UTF */
3144    return (c == TABLE_GET(next, cd->fcc, next));  /* Non-UTF-8 mode */    return (c == TABLE_GET((unsigned int)next, cd->fcc, next));  /* Non-UTF-8 mode */
3145    
3146    /* Note that OP_DIGIT etc. are generated only when PCRE_UCP is *not* set.    /* Note that OP_DIGIT etc. are generated only when PCRE_UCP is *not* set.
3147    When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */    When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */
3148    
3149    case OP_DIGIT:    case OP_DIGIT:
3150    return next > 127 || (cd->ctypes[next] & ctype_digit) == 0;    return next > 255 || (cd->ctypes[next] & ctype_digit) == 0;
3151    
3152    case OP_NOT_DIGIT:    case OP_NOT_DIGIT:
3153    return next <= 127 && (cd->ctypes[next] & ctype_digit) != 0;    return next <= 255 && (cd->ctypes[next] & ctype_digit) != 0;
3154    
3155    case OP_WHITESPACE:    case OP_WHITESPACE:
3156    return next > 127 || (cd->ctypes[next] & ctype_space) == 0;    return next > 255 || (cd->ctypes[next] & ctype_space) == 0;
3157    
3158    case OP_NOT_WHITESPACE:    case OP_NOT_WHITESPACE:
3159    return next <= 127 && (cd->ctypes[next] & ctype_space) != 0;    return next <= 255 && (cd->ctypes[next] & ctype_space) != 0;
3160    
3161    case OP_WORDCHAR:    case OP_WORDCHAR:
3162    return next > 127 || (cd->ctypes[next] & ctype_word) == 0;    return next > 255 || (cd->ctypes[next] & ctype_word) == 0;
3163    
3164    case OP_NOT_WORDCHAR:    case OP_NOT_WORDCHAR:
3165    return next <= 127 && (cd->ctypes[next] & ctype_word) != 0;    return next <= 255 && (cd->ctypes[next] & ctype_word) != 0;
3166    
3167    case OP_HSPACE:    case OP_HSPACE:
3168    case OP_NOT_HSPACE:    case OP_NOT_HSPACE:
3169    switch(next)    switch(next)
3170      {      {
3171      case 0x09:      HSPACE_CASES:
     case 0x20:  
     case 0xa0:  
     case 0x1680:  
     case 0x180e:  
     case 0x2000:  
     case 0x2001:  
     case 0x2002:  
     case 0x2003:  
     case 0x2004:  
     case 0x2005:  
     case 0x2006:  
     case 0x2007:  
     case 0x2008:  
     case 0x2009:  
     case 0x200A:  
     case 0x202f:  
     case 0x205f:  
     case 0x3000:  
3172      return op_code == OP_NOT_HSPACE;      return op_code == OP_NOT_HSPACE;
3173    
3174      default:      default:
3175      return op_code != OP_NOT_HSPACE;      return op_code != OP_NOT_HSPACE;
3176      }      }
# Line 3148  if (next >= 0) switch(op_code) Line 3180  if (next >= 0) switch(op_code)
3180    case OP_NOT_VSPACE:    case OP_NOT_VSPACE:
3181    switch(next)    switch(next)
3182      {      {
3183      case 0x0a:      VSPACE_CASES:
     case 0x0b:  
     case 0x0c:  
     case 0x0d:  
     case 0x85:  
     case 0x2028:  
     case 0x2029:  
3184      return op_code == OP_NOT_VSPACE;      return op_code == OP_NOT_VSPACE;
3185    
3186      default:      default:
3187      return op_code != OP_NOT_VSPACE;      return op_code != OP_NOT_VSPACE;
3188      }      }
# Line 3191  switch(op_code) Line 3218  switch(op_code)
3218    switch(-next)    switch(-next)
3219      {      {
3220      case ESC_d:      case ESC_d:
3221      return c > 127 || (cd->ctypes[c] & ctype_digit) == 0;      return c > 255 || (cd->ctypes[c] & ctype_digit) == 0;
3222    
3223      case ESC_D:      case ESC_D:
3224      return c <= 127 && (cd->ctypes[c] & ctype_digit) != 0;      return c <= 255 && (cd->ctypes[c] & ctype_digit) != 0;
3225    
3226      case ESC_s:      case ESC_s:
3227      return c > 127 || (cd->ctypes[c] & ctype_space) == 0;      return c > 255 || (cd->ctypes[c] & ctype_space) == 0;
3228    
3229      case ESC_S:      case ESC_S:
3230      return c <= 127 && (cd->ctypes[c] & ctype_space) != 0;      return c <= 255 && (cd->ctypes[c] & ctype_space) != 0;
3231    
3232      case ESC_w:      case ESC_w:
3233      return c > 127 || (cd->ctypes[c] & ctype_word) == 0;      return c > 255 || (cd->ctypes[c] & ctype_word) == 0;
3234    
3235      case ESC_W:      case ESC_W:
3236      return c <= 127 && (cd->ctypes[c] & ctype_word) != 0;      return c <= 255 && (cd->ctypes[c] & ctype_word) != 0;
3237    
3238      case ESC_h:      case ESC_h:
3239      case ESC_H:      case ESC_H:
3240      switch(c)      switch(c)
3241        {        {
3242        case 0x09:        HSPACE_CASES:
       case 0x20:  
       case 0xa0:  
       case 0x1680:  
       case 0x180e:  
       case 0x2000:  
       case 0x2001:  
       case 0x2002:  
       case 0x2003:  
       case 0x2004:  
       case 0x2005:  
       case 0x2006:  
       case 0x2007:  
       case 0x2008:  
       case 0x2009:  
       case 0x200A:  
       case 0x202f:  
       case 0x205f:  
       case 0x3000:  
3243        return -next != ESC_h;        return -next != ESC_h;
3244    
3245        default:        default:
3246        return -next == ESC_h;        return -next == ESC_h;
3247        }        }
# Line 3240  switch(op_code) Line 3250  switch(op_code)
3250      case ESC_V:      case ESC_V:
3251      switch(c)      switch(c)
3252        {        {
3253        case 0x0a:        VSPACE_CASES:
       case 0x0b:  
       case 0x0c:  
       case 0x0d:  
       case 0x85:  
       case 0x2028:  
       case 0x2029:  
3254        return -next != ESC_v;        return -next != ESC_v;
3255    
3256        default:        default:
3257        return -next == ESC_v;        return -next == ESC_v;
3258        }        }
# Line 3315  switch(op_code) Line 3320  switch(op_code)
3320    return next == -ESC_d;    return next == -ESC_d;
3321    
3322    case OP_WHITESPACE:    case OP_WHITESPACE:
3323    return next == -ESC_S || next == -ESC_d || next == -ESC_w || next == -ESC_R;    return next == -ESC_S || next == -ESC_d || next == -ESC_w;
3324    
3325    case OP_NOT_WHITESPACE:    case OP_NOT_WHITESPACE:
3326    return next == -ESC_s || next == -ESC_h || next == -ESC_v;    return next == -ESC_s || next == -ESC_h || next == -ESC_v || next == -ESC_R;
3327    
3328    case OP_HSPACE:    case OP_HSPACE:
3329    return next == -ESC_S || next == -ESC_H || next == -ESC_d ||    return next == -ESC_S || next == -ESC_H || next == -ESC_d ||
# Line 4008  for (;; ptr++) Line 4013  for (;; ptr++)
4013    
4014              /* Perl 5.004 onwards omits VT from \s, but we must preserve it              /* Perl 5.004 onwards omits VT from \s, but we must preserve it
4015              if it was previously set by something earlier in the character              if it was previously set by something earlier in the character
4016              class. */              class. Luckily, the value of CHAR_VT is 0x0b in both ASCII and
4017                EBCDIC, so we lazily just adjust the appropriate bit. */
4018    
4019              case ESC_s:              case ESC_s:
4020              classbits[0] |= cbits[cbit_space];              classbits[0] |= cbits[cbit_space];
# Line 4023  for (;; ptr++) Line 4029  for (;; ptr++)
4029              continue;              continue;
4030    
4031              case ESC_h:              case ESC_h:
4032              SETBIT(classbits, 0x09); /* VT */              SETBIT(classbits, CHAR_HT);
4033              SETBIT(classbits, 0x20); /* SPACE */              SETBIT(classbits, CHAR_SPACE);
4034    #ifndef EBCDIC
4035              SETBIT(classbits, 0xa0); /* NSBP */              SETBIT(classbits, 0xa0); /* NSBP */
4036  #ifndef COMPILE_PCRE8  #ifndef COMPILE_PCRE8
4037              xclass = TRUE;              xclass = TRUE;
# Line 4060  for (;; ptr++) Line 4067  for (;; ptr++)
4067                class_uchardata += PRIV(ord2utf)(0x3000, class_uchardata);                class_uchardata += PRIV(ord2utf)(0x3000, class_uchardata);
4068                }                }
4069  #endif  #endif
4070    #endif  /* Not EBCDIC */
4071              continue;              continue;
4072    
4073              case ESC_H:              case ESC_H:
# Line 4068  for (;; ptr++) Line 4076  for (;; ptr++)
4076                int x = 0xff;                int x = 0xff;
4077                switch (c)                switch (c)
4078                  {                  {
4079                  case 0x09/8: x ^= 1 << (0x09%8); break;                  case CHAR_HT/8:    x ^= 1 << (CHAR_HT%8); break;
4080                  case 0x20/8: x ^= 1 << (0x20%8); break;                  case CHAR_SPACE/8: x ^= 1 << (CHAR_SPACE%8); break;
4081                  case 0xa0/8: x ^= 1 << (0xa0%8); break;  #ifndef EBCDIC
4082                    case 0xa0/8: x ^= 1 << (0xa0%8); break;  /* NSBSP */
4083    #endif
4084                  default: break;                  default: break;
4085                  }                  }
4086                classbits[c] |= x;                classbits[c] |= x;
4087                }                }
4088    #ifndef EBCDIC
4089  #ifndef COMPILE_PCRE8  #ifndef COMPILE_PCRE8
4090              xclass = TRUE;              xclass = TRUE;
4091              *class_uchardata++ = XCL_RANGE;              *class_uchardata++ = XCL_RANGE;
# Line 4101  for (;; ptr++) Line 4112  for (;; ptr++)
4112              if (utf)              if (utf)
4113                class_uchardata += PRIV(ord2utf)(0x10ffff, class_uchardata);                class_uchardata += PRIV(ord2utf)(0x10ffff, class_uchardata);
4114              else              else
4115  #endif  #endif   /* SUPPORT_UTF */
4116                *class_uchardata++ = 0xffff;                *class_uchardata++ = 0xffff;
4117  #elif defined SUPPORT_UTF  #elif defined SUPPORT_UTF
4118              if (utf)              if (utf)
# Line 4130  for (;; ptr++) Line 4141  for (;; ptr++)
4141                class_uchardata += PRIV(ord2utf)(0x10ffff, class_uchardata);                class_uchardata += PRIV(ord2utf)(0x10ffff, class_uchardata);
4142                }                }
4143  #endif  #endif
4144    #endif  /* Not EBCDIC */
4145              continue;              continue;
4146    
4147              case ESC_v:              case ESC_v:
4148              SETBIT(classbits, 0x0a); /* LF */              SETBIT(classbits, CHAR_LF);
4149              SETBIT(classbits, 0x0b); /* VT */              SETBIT(classbits, CHAR_VT);
4150              SETBIT(classbits, 0x0c); /* FF */              SETBIT(classbits, CHAR_FF);
4151              SETBIT(classbits, 0x0d); /* CR */              SETBIT(classbits, CHAR_CR);
4152              SETBIT(classbits, 0x85); /* NEL */              SETBIT(classbits, CHAR_NEL);
4153    #ifndef EBCDIC
4154  #ifndef COMPILE_PCRE8  #ifndef COMPILE_PCRE8
4155              xclass = TRUE;              xclass = TRUE;
4156              *class_uchardata++ = XCL_RANGE;              *class_uchardata++ = XCL_RANGE;
# Line 4152  for (;; ptr++) Line 4165  for (;; ptr++)
4165                class_uchardata += PRIV(ord2utf)(0x2029, class_uchardata);                class_uchardata += PRIV(ord2utf)(0x2029, class_uchardata);
4166                }                }
4167  #endif  #endif
4168    #endif  /* Not EBCDIC */
4169              continue;              continue;
4170    
4171              case ESC_V:              case ESC_V:
# Line 4160  for (;; ptr++) Line 4174  for (;; ptr++)
4174                int x = 0xff;                int x = 0xff;
4175                switch (c)                switch (c)
4176                  {                  {
4177                  case 0x0a/8: x ^= 1 << (0x0a%8);                  case CHAR_LF/8: x ^= 1 << (CHAR_LF%8);
4178                               x ^= 1 << (0x0b%8);                                  x ^= 1 << (CHAR_VT%8);
4179                               x ^= 1 << (0x0c%8);                                  x ^= 1 << (CHAR_FF%8);
4180                               x ^= 1 << (0x0d%8);                                  x ^= 1 << (CHAR_CR%8);
4181                               break;                                  break;
4182                  case 0x85/8: x ^= 1 << (0x85%8); break;                  case CHAR_NEL/8: x ^= 1 << (CHAR_NEL%8); break;
4183                  default: break;                  default: break;
4184                  }                  }
4185                classbits[c] |= x;                classbits[c] |= x;
4186                }                }
4187    
4188    #ifndef EBCDIC
4189  #ifndef COMPILE_PCRE8  #ifndef COMPILE_PCRE8
4190              xclass = TRUE;              xclass = TRUE;
4191              *class_uchardata++ = XCL_RANGE;              *class_uchardata++ = XCL_RANGE;
# Line 4196  for (;; ptr++) Line 4211  for (;; ptr++)
4211                class_uchardata += PRIV(ord2utf)(0x10ffff, class_uchardata);                class_uchardata += PRIV(ord2utf)(0x10ffff, class_uchardata);
4212                }                }
4213  #endif  #endif
4214    #endif  /* Not EBCDIC */
4215              continue;              continue;
4216    
4217  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 4482  for (;; ptr++) Line 4498  for (;; ptr++)
4498        LONE_SINGLE_CHARACTER:        LONE_SINGLE_CHARACTER:
4499    
4500        /* Only the value of 1 matters for class_single_char. */        /* Only the value of 1 matters for class_single_char. */
4501    
4502        if (class_single_char < 2) class_single_char++;        if (class_single_char < 2) class_single_char++;
4503    
4504        /* If class_charcount is 1, we saw precisely one character. As long as        /* If class_charcount is 1, we saw precisely one character. As long as
4505        there were no negated characters >= 128 and there was no use of \p or \P,        there was no use of \p or \P, in other words, no use of any XCLASS
4506        in other words, no use of any XCLASS features, we can optimize.        features, we can optimize.
   
       In UTF-8 mode, we can optimize the negative case only if there were no  
       characters >= 128 because OP_NOT and the related opcodes like OP_NOTSTAR  
       operate on single-bytes characters only. This is an historical hangover.  
       Maybe one day we can tidy these opcodes to handle multi-byte characters.  
4507    
4508        The optimization throws away the bit map. We turn the item into a        The optimization throws away the bit map. We turn the item into a
4509        1-character OP_CHAR[I] if it's positive, or OP_NOT[I] if it's negative.        1-character OP_CHAR[I] if it's positive, or OP_NOT[I] if it's negative.
4510        Note that OP_NOT[I] does not support multibyte characters. In the positive        In the positive case, it can cause firstchar to be set. Otherwise, there
4511        case, it can cause firstchar to be set. Otherwise, there can be no first        can be no first char if this item is first, whatever repeat count may
4512        char if this item is first, whatever repeat count may follow. In the case        follow. In the case of reqchar, save the previous value for reinstating. */
       of reqchar, save the previous value for reinstating. */  
4513    
 #ifdef SUPPORT_UTF  
       if (class_single_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET  
         && (!utf || !negate_class || c < (MAX_VALUE_FOR_SINGLE_CHAR + 1)))  
 #else  
4514        if (class_single_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)        if (class_single_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
 #endif  
4515          {          {
4516          ptr++;          ptr++;
4517          zeroreqchar = reqchar;          zeroreqchar = reqchar;
4518    
         /* The OP_NOT[I] opcodes work on single characters only. */  
   
4519          if (negate_class)          if (negate_class)
4520            {            {
4521            if (firstchar == REQ_UNSET) firstchar = REQ_NONE;            if (firstchar == REQ_UNSET) firstchar = REQ_NONE;
4522            zerofirstchar = firstchar;            zerofirstchar = firstchar;
4523            *code++ = ((options & PCRE_CASELESS) != 0)? OP_NOTI: OP_NOT;            *code++ = ((options & PCRE_CASELESS) != 0)? OP_NOTI: OP_NOT;
4524            *code++ = c;  #ifdef SUPPORT_UTF
4525              if (utf && c > MAX_VALUE_FOR_SINGLE_CHAR)
4526                code += PRIV(ord2utf)(c, code);
4527              else
4528    #endif
4529                *code++ = c;
4530            goto NOT_CHAR;            goto NOT_CHAR;
4531            }            }
4532    
# Line 4571  for (;; ptr++) Line 4580  for (;; ptr++)
4580  #endif  #endif
4581            {            {
4582            unsigned int othercase;            unsigned int othercase;
4583            if ((othercase = UCD_OTHERCASE(c)) != c)            if ((int)(othercase = UCD_OTHERCASE(c)) != c)
4584              {              {
4585              *class_uchardata++ = XCL_SINGLE;              *class_uchardata++ = XCL_SINGLE;
4586              class_uchardata += PRIV(ord2utf)(othercase, class_uchardata);              class_uchardata += PRIV(ord2utf)(othercase, class_uchardata);
# Line 4775  for (;; ptr++) Line 4784  for (;; ptr++)
4784    
4785      /* Now handle repetition for the different types of item. */      /* Now handle repetition for the different types of item. */
4786    
4787      /* If previous was a character match, abolish the item and generate a      /* If previous was a character or negated character match, abolish the item
4788      repeat item instead. If a char item has a minumum of more than one, ensure      and generate a repeat item instead. If a char item has a minimum of more
4789      that it is set in reqchar - it might not be if a sequence such as x{3} is      than one, ensure that it is set in reqchar - it might not be if a sequence
4790      the first thing in a branch because the x will have gone into firstchar      such as x{3} is the first thing in a branch because the x will have gone
4791      instead.  */      into firstchar instead.  */
4792    
4793      if (*previous == OP_CHAR || *previous == OP_CHARI)      if (*previous == OP_CHAR || *previous == OP_CHARI
4794        {          || *previous == OP_NOT || *previous == OP_NOTI)
4795        op_type = (*previous == OP_CHAR)? 0 : OP_STARI - OP_STAR;        {
4796          switch (*previous)
4797            {
4798            default: /* Make compiler happy. */
4799            case OP_CHAR:  op_type = OP_STAR - OP_STAR; break;
4800            case OP_CHARI: op_type = OP_STARI - OP_STAR; break;
4801            case OP_NOT:   op_type = OP_NOTSTAR - OP_STAR; break;
4802            case OP_NOTI:  op_type = OP_NOTSTARI - OP_STAR; break;
4803            }
4804    
4805        /* Deal with UTF characters that take up more than one character. It's        /* Deal with UTF characters that take up more than one character. It's
4806        easier to write this out separately than try to macrify it. Use c to        easier to write this out separately than try to macrify it. Use c to
# Line 4806  for (;; ptr++) Line 4823  for (;; ptr++)
4823        with UTF disabled, or for a single character UTF character. */        with UTF disabled, or for a single character UTF character. */
4824          {          {
4825          c = code[-1];          c = code[-1];
4826          if (repeat_min > 1) reqchar = c | req_caseopt | cd->req_varyopt;          if (*previous <= OP_CHARI && repeat_min > 1)
4827              reqchar = c | req_caseopt | cd->req_varyopt;
4828          }          }
4829    
4830        /* If the repetition is unlimited, it pays to see if the next thing on        /* If the repetition is unlimited, it pays to see if the next thing on
# Line 4825  for (;; ptr++) Line 4843  for (;; ptr++)
4843        goto OUTPUT_SINGLE_REPEAT;   /* Code shared with single character types */        goto OUTPUT_SINGLE_REPEAT;   /* Code shared with single character types */
4844        }        }
4845    
     /* If previous was a single negated character ([^a] or similar), we use  
     one of the special opcodes, replacing it. The code is shared with single-  
     character repeats by setting opt_type to add a suitable offset into  
     repeat_type. We can also test for auto-possessification. OP_NOT and OP_NOTI  
     are currently used only for single-byte chars. */  
   
     else if (*previous == OP_NOT || *previous == OP_NOTI)  
       {  
       op_type = ((*previous == OP_NOT)? OP_NOTSTAR : OP_NOTSTARI) - OP_STAR;  
       c = previous[1];  
       if (!possessive_quantifier &&  
           repeat_max < 0 &&  
           check_auto_possessive(previous, utf, ptr + 1, options, cd))  
         {  
         repeat_type = 0;    /* Force greedy */  
         possessive_quantifier = TRUE;  
         }  
       goto OUTPUT_SINGLE_REPEAT;  
       }  
   
4846      /* If previous was a character type match (\d or similar), abolish it and      /* If previous was a character type match (\d or similar), abolish it and
4847      create a suitable repeat item. The code is shared with single-character      create a suitable repeat item. The code is shared with single-character
4848      repeats by setting op_type to add a suitable offset into repeat_type. Note      repeats by setting op_type to add a suitable offset into repeat_type. Note
# Line 5585  for (;; ptr++) Line 5583  for (;; ptr++)
5583          arg = ++ptr;          arg = ++ptr;
5584          while (*ptr != 0 && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;          while (*ptr != 0 && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
5585          arglen = (int)(ptr - arg);          arglen = (int)(ptr - arg);
5586            if (arglen > (int)MAX_MARK)
5587              {
5588              *errorcodeptr = ERR75;
5589              goto FAILED;
5590              }
5591          }          }
5592    
5593        if (*ptr != CHAR_RIGHT_PARENTHESIS)        if (*ptr != CHAR_RIGHT_PARENTHESIS)
# Line 5600  for (;; ptr++) Line 5603  for (;; ptr++)
5603          if (namelen == verbs[i].len &&          if (namelen == verbs[i].len &&
5604              STRNCMP_UC_C8(name, vn, namelen) == 0)              STRNCMP_UC_C8(name, vn, namelen) == 0)
5605            {            {
5606              int setverb;
5607    
5608            /* Check for open captures before ACCEPT and convert it to            /* Check for open captures before ACCEPT and convert it to
5609            ASSERT_ACCEPT if in an assertion. */            ASSERT_ACCEPT if in an assertion. */
5610    
# Line 5617  for (;; ptr++) Line 5622  for (;; ptr++)
5622                *code++ = OP_CLOSE;                *code++ = OP_CLOSE;
5623                PUT2INC(code, 0, oc->number);                PUT2INC(code, 0, oc->number);
5624                }                }
5625              *code++ = (cd->assert_depth > 0)? OP_ASSERT_ACCEPT : OP_ACCEPT;              setverb = *code++ =
5626                  (cd->assert_depth > 0)? OP_ASSERT_ACCEPT : OP_ACCEPT;
5627    
5628              /* Do not set firstchar after *ACCEPT */              /* Do not set firstchar after *ACCEPT */
5629              if (firstchar == REQ_UNSET) firstchar = REQ_NONE;              if (firstchar == REQ_UNSET) firstchar = REQ_NONE;
# Line 5632  for (;; ptr++) Line 5638  for (;; ptr++)
5638                *errorcodeptr = ERR66;                *errorcodeptr = ERR66;
5639                goto FAILED;                goto FAILED;
5640                }                }
5641              *code = verbs[i].op;              setverb = *code++ = verbs[i].op;
             if (*code++ == OP_THEN) cd->external_flags |= PCRE_HASTHEN;  
5642              }              }
5643    
5644            else            else
# Line 5643  for (;; ptr++) Line 5648  for (;; ptr++)
5648                *errorcodeptr = ERR59;                *errorcodeptr = ERR59;
5649                goto FAILED;                goto FAILED;
5650                }                }
5651              *code = verbs[i].op_arg;              setverb = *code++ = verbs[i].op_arg;
             if (*code++ == OP_THEN_ARG) cd->external_flags |= PCRE_HASTHEN;  
5652              *code++ = arglen;              *code++ = arglen;
5653              memcpy(code, arg, IN_UCHARS(arglen));              memcpy(code, arg, IN_UCHARS(arglen));
5654              code += arglen;              code += arglen;
5655              *code++ = 0;              *code++ = 0;
5656              }              }
5657    
5658              switch (setverb)
5659                {
5660                case OP_THEN:
5661                case OP_THEN_ARG:
5662                cd->external_flags |= PCRE_HASTHEN;
5663                break;
5664    
5665                case OP_PRUNE:
5666                case OP_PRUNE_ARG:
5667                case OP_SKIP:
5668                case OP_SKIP_ARG:
5669                cd->had_pruneorskip = TRUE;
5670                break;
5671                }
5672    
5673            break;  /* Found verb, exit loop */            break;  /* Found verb, exit loop */
5674            }            }
5675    
# Line 6836  for (;; ptr++) Line 6855  for (;; ptr++)
6855        /* For the rest (including \X when Unicode properties are supported), we        /* For the rest (including \X when Unicode properties are supported), we
6856        can obtain the OP value by negating the escape value in the default        can obtain the OP value by negating the escape value in the default
6857        situation when PCRE_UCP is not set. When it *is* set, we substitute        situation when PCRE_UCP is not set. When it *is* set, we substitute
6858        Unicode property tests. */        Unicode property tests. Note that \b and \B do a one-character
6859          lookbehind. */
6860    
6861        else        else
6862          {          {
6863            if ((-c == ESC_b || -c == ESC_B) && cd->max_lookbehind == 0)
6864              cd->max_lookbehind = 1;
6865  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
6866          if (-c >= ESC_DU && -c <= ESC_wu)          if (-c >= ESC_DU && -c <= ESC_wu)
6867            {            {
# Line 7147  for (;;) Line 7169  for (;;)
7169          *ptrptr = ptr;          *ptrptr = ptr;
7170          return FALSE;          return FALSE;
7171          }          }
7172        else { PUT(reverse_count, 0, fixed_length); }        else
7173            {
7174            if (fixed_length > cd->max_lookbehind)
7175              cd->max_lookbehind = fixed_length;
7176            PUT(reverse_count, 0, fixed_length);
7177            }
7178        }        }
7179      }      }
7180    
# Line 7279  and the highest back reference was great Line 7306  and the highest back reference was great
7306  However, by keeping a bitmap of the first 31 back references, we can catch some  However, by keeping a bitmap of the first 31 back references, we can catch some
7307  of the more common cases more precisely.  of the more common cases more precisely.
7308    
7309    ... A second exception is when the .* appears inside an atomic group, because
7310    this prevents the number of characters it matches from being adjusted.
7311    
7312  Arguments:  Arguments:
7313    code           points to start of expression (the bracket)    code           points to start of expression (the bracket)
7314    bracket_map    a bitmap of which brackets we are inside while testing; this    bracket_map    a bitmap of which brackets we are inside while testing; this
7315                    handles up to substring 31; after that we just have to take                    handles up to substring 31; after that we just have to take
7316                    the less precise approach                    the less precise approach
7317    backref_map    the back reference bitmap    cd             points to the compile data block
7318      atomcount      atomic group level
7319    
7320  Returns:     TRUE or FALSE  Returns:     TRUE or FALSE
7321  */  */
7322    
7323  static BOOL  static BOOL
7324  is_anchored(register const pcre_uchar *code, unsigned int bracket_map,  is_anchored(register const pcre_uchar *code, unsigned int bracket_map,
7325    unsigned int backref_map)    compile_data *cd, int atomcount)
7326  {  {
7327  do {  do {
7328     const pcre_uchar *scode = first_significant_code(     const pcre_uchar *scode = first_significant_code(
# Line 7303  do { Line 7334  do {
7334     if (op == OP_BRA  || op == OP_BRAPOS ||     if (op == OP_BRA  || op == OP_BRAPOS ||
7335         op == OP_SBRA || op == OP_SBRAPOS)         op == OP_SBRA || op == OP_SBRAPOS)
7336       {       {
7337       if (!is_anchored(scode, bracket_map, backref_map)) return FALSE;       if (!is_anchored(scode, bracket_map, cd, atomcount)) return FALSE;
7338       }       }
7339    
7340     /* Capturing brackets */     /* Capturing brackets */
# Line 7313  do { Line 7344  do {
7344       {       {
7345       int n = GET2(scode, 1+LINK_SIZE);       int n = GET2(scode, 1+LINK_SIZE);
7346       int new_map = bracket_map | ((n < 32)? (1 << n) : 1);       int new_map = bracket_map | ((n < 32)? (1 << n) : 1);
7347       if (!is_anchored(scode, new_map, backref_map)) return FALSE;       if (!is_anchored(scode, new_map, cd, atomcount)) return FALSE;
7348         }
7349    
7350       /* Positive forward assertions and conditions */
7351    
7352       else if (op == OP_ASSERT || op == OP_COND)
7353         {
7354         if (!is_anchored(scode, bracket_map, cd, atomcount)) return FALSE;
7355       }       }
7356    
7357     /* Other brackets */     /* Atomic groups */
7358    
7359     else if (op == OP_ASSERT || op == OP_ONCE || op == OP_ONCE_NC ||     else if (op == OP_ONCE || op == OP_ONCE_NC)
             op == OP_COND)  
7360       {       {
7361       if (!is_anchored(scode, bracket_map, backref_map)) return FALSE;       if (!is_anchored(scode, bracket_map, cd, atomcount + 1))
7362           return FALSE;
7363       }       }
7364    
7365     /* .* is not anchored unless DOTALL is set (which generates OP_ALLANY) and     /* .* is not anchored unless DOTALL is set (which generates OP_ALLANY) and
7366     it isn't in brackets that are or may be referenced. */     it isn't in brackets that are or may be referenced or inside an atomic
7367       group. */
7368    
7369     else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR ||     else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR ||
7370               op == OP_TYPEPOSSTAR))               op == OP_TYPEPOSSTAR))
7371       {       {
7372       if (scode[1] != OP_ALLANY || (bracket_map & backref_map) != 0)       if (scode[1] != OP_ALLANY || (bracket_map & cd->backref_map) != 0 ||
7373             atomcount > 0 || cd->had_pruneorskip)
7374         return FALSE;         return FALSE;
7375       }       }
7376    
7377     /* Check for explicit anchoring */     /* Check for explicit anchoring */
7378    
7379     else if (op != OP_SOD && op != OP_SOM && op != OP_CIRC) return FALSE;     else if (op != OP_SOD && op != OP_SOM && op != OP_CIRC) return FALSE;
7380    
7381     code += GET(code, 1);     code += GET(code, 1);
7382     }     }
7383  while (*code == OP_ALT);   /* Loop for each alternative */  while (*code == OP_ALT);   /* Loop for each alternative */
# Line 7354  return TRUE; Line 7395  return TRUE;
7395  matching and for non-DOTALL patterns that start with .* (which must start at  matching and for non-DOTALL patterns that start with .* (which must start at
7396  the beginning or after \n). As in the case of is_anchored() (see above), we  the beginning or after \n). As in the case of is_anchored() (see above), we
7397  have to take account of back references to capturing brackets that contain .*  have to take account of back references to capturing brackets that contain .*
7398  because in that case we can't make the assumption.  because in that case we can't make the assumption. Also, the appearance of .*
7399    inside atomic brackets or in a pattern that contains *PRUNE or *SKIP does not
7400    count, because once again the assumption no longer holds.
7401    
7402  Arguments:  Arguments:
7403    code           points to start of expression (the bracket)    code           points to start of expression (the bracket)
7404    bracket_map    a bitmap of which brackets we are inside while testing; this    bracket_map    a bitmap of which brackets we are inside while testing; this
7405                    handles up to substring 31; after that we just have to take                    handles up to substring 31; after that we just have to take
7406                    the less precise approach                    the less precise approach
7407    backref_map    the back reference bitmap    cd             points to the compile data
7408      atomcount      atomic group level
7409    
7410  Returns:         TRUE or FALSE  Returns:         TRUE or FALSE
7411  */  */
7412    
7413  static BOOL  static BOOL
7414  is_startline(const pcre_uchar *code, unsigned int bracket_map,  is_startline(const pcre_uchar *code, unsigned int bracket_map,
7415    unsigned int backref_map)    compile_data *cd, int atomcount)
7416  {  {
7417  do {  do {
7418     const pcre_uchar *scode = first_significant_code(     const pcre_uchar *scode = first_significant_code(
# Line 7394  do { Line 7438  do {
7438         return FALSE;         return FALSE;
7439    
7440         default:     /* Assertion */         default:     /* Assertion */
7441         if (!is_startline(scode, bracket_map, backref_map)) return FALSE;         if (!is_startline(scode, bracket_map, cd, atomcount)) return FALSE;
7442         do scode += GET(scode, 1); while (*scode == OP_ALT);         do scode += GET(scode, 1); while (*scode == OP_ALT);
7443         scode += 1 + LINK_SIZE;         scode += 1 + LINK_SIZE;
7444         break;         break;
# Line 7408  do { Line 7452  do {
7452     if (op == OP_BRA  || op == OP_BRAPOS ||     if (op == OP_BRA  || op == OP_BRAPOS ||
7453         op == OP_SBRA || op == OP_SBRAPOS)         op == OP_SBRA || op == OP_SBRAPOS)
7454       {       {
7455       if (!is_startline(scode, bracket_map, backref_map)) return FALSE;       if (!is_startline(scode, bracket_map, cd, atomcount)) return FALSE;
7456       }       }
7457    
7458     /* Capturing brackets */     /* Capturing brackets */
# Line 7418  do { Line 7462  do {
7462       {       {
7463       int n = GET2(scode, 1+LINK_SIZE);       int n = GET2(scode, 1+LINK_SIZE);
7464       int new_map = bracket_map | ((n < 32)? (1 << n) : 1);       int new_map = bracket_map | ((n < 32)? (1 << n) : 1);
7465       if (!is_startline(scode, new_map, backref_map)) return FALSE;       if (!is_startline(scode, new_map, cd, atomcount)) return FALSE;
7466       }       }
7467    
7468     /* Other brackets */     /* Positive forward assertions */
7469    
7470     else if (op == OP_ASSERT || op == OP_ONCE || op == OP_ONCE_NC)     else if (op == OP_ASSERT)
7471       {       {
7472       if (!is_startline(scode, bracket_map, backref_map)) return FALSE;       if (!is_startline(scode, bracket_map, cd, atomcount)) return FALSE;
7473       }       }
7474    
7475       /* Atomic brackets */
7476    
7477     /* .* means "start at start or after \n" if it isn't in brackets that     else if (op == OP_ONCE || op == OP_ONCE_NC)
7478     may be referenced. */       {
7479         if (!is_startline(scode, bracket_map, cd, atomcount + 1)) return FALSE;
7480         }
7481    
7482       /* .* means "start at start or after \n" if it isn't in atomic brackets or
7483       brackets that may be referenced, as long as the pattern does not contain
7484       *PRUNE or *SKIP, because these break the feature. Consider, for example,
7485       /.*?a(*PRUNE)b/ with the subject "aab", which matches "ab", i.e. not at the
7486       start of a line. */
7487    
7488     else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR || op == OP_TYPEPOSSTAR)     else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR || op == OP_TYPEPOSSTAR)
7489       {       {
7490       if (scode[1] != OP_ANY || (bracket_map & backref_map) != 0) return FALSE;       if (scode[1] != OP_ANY || (bracket_map & cd->backref_map) != 0 ||
7491             atomcount > 0 || cd->had_pruneorskip)
7492           return FALSE;
7493       }       }
7494    
7495     /* Check for explicit circumflex */     /* Check for explicit circumflex; anything else gives a FALSE result. Note
7496       in particular that this includes atomic brackets OP_ONCE and OP_ONCE_NC
7497       because the number of characters matched by .* cannot be adjusted inside
7498       them. */
7499    
7500     else if (op != OP_CIRC && op != OP_CIRCM) return FALSE;     else if (op != OP_CIRC && op != OP_CIRCM) return FALSE;
7501    
# Line 7817  cd->start_pattern = (const pcre_uchar *) Line 7876  cd->start_pattern = (const pcre_uchar *)
7876  cd->end_pattern = (const pcre_uchar *)(pattern + STRLEN_UC((const pcre_uchar *)pattern));  cd->end_pattern = (const pcre_uchar *)(pattern + STRLEN_UC((const pcre_uchar *)pattern));
7877  cd->req_varyopt = 0;  cd->req_varyopt = 0;
7878  cd->assert_depth = 0;  cd->assert_depth = 0;
7879    cd->max_lookbehind = 0;
7880  cd->external_options = options;  cd->external_options = options;
7881  cd->external_flags = 0;  cd->external_flags = 0;
7882  cd->open_caps = NULL;  cd->open_caps = NULL;
# Line 7867  re->magic_number = MAGIC_NUMBER; Line 7927  re->magic_number = MAGIC_NUMBER;
7927  re->size = (int)size;  re->size = (int)size;
7928  re->options = cd->external_options;  re->options = cd->external_options;
7929  re->flags = cd->external_flags;  re->flags = cd->external_flags;
 re->dummy1 = 0;  
7930  re->first_char = 0;  re->first_char = 0;
7931  re->req_char = 0;  re->req_char = 0;
7932  re->name_table_offset = sizeof(REAL_PCRE) / sizeof(pcre_uchar);  re->name_table_offset = sizeof(REAL_PCRE) / sizeof(pcre_uchar);
# Line 7887  field; this time it's used for rememberi Line 7946  field; this time it's used for rememberi
7946  cd->final_bracount = cd->bracount;  /* Save for checking forward references */  cd->final_bracount = cd->bracount;  /* Save for checking forward references */
7947  cd->assert_depth = 0;  cd->assert_depth = 0;
7948  cd->bracount = 0;  cd->bracount = 0;
7949    cd->max_lookbehind = 0;
7950  cd->names_found = 0;  cd->names_found = 0;
7951  cd->name_table = (pcre_uchar *)re + re->name_table_offset;  cd->name_table = (pcre_uchar *)re + re->name_table_offset;
7952  codestart = cd->name_table + re->name_entry_size * re->name_count;  codestart = cd->name_table + re->name_entry_size * re->name_count;
# Line 7894  cd->start_code = codestart; Line 7954  cd->start_code = codestart;
7954  cd->hwm = (pcre_uchar *)(cd->start_workspace);  cd->hwm = (pcre_uchar *)(cd->start_workspace);
7955  cd->req_varyopt = 0;  cd->req_varyopt = 0;
7956  cd->had_accept = FALSE;  cd->had_accept = FALSE;
7957    cd->had_pruneorskip = FALSE;
7958  cd->check_lookbehind = FALSE;  cd->check_lookbehind = FALSE;
7959  cd->open_caps = NULL;  cd->open_caps = NULL;
7960    
# Line 7908  code = (pcre_uchar *)codestart; Line 7969  code = (pcre_uchar *)codestart;
7969    &firstchar, &reqchar, NULL, cd, NULL);    &firstchar, &reqchar, NULL, cd, NULL);
7970  re->top_bracket = cd->bracount;  re->top_bracket = cd->bracount;
7971  re->top_backref = cd->top_backref;  re->top_backref = cd->top_backref;
7972    re->max_lookbehind = cd->max_lookbehind;
7973  re->flags = cd->external_flags | PCRE_MODE;  re->flags = cd->external_flags | PCRE_MODE;
7974    
7975  if (cd->had_accept) reqchar = REQ_NONE;   /* Must disable after (*ACCEPT) */  if (cd->had_accept) reqchar = REQ_NONE;   /* Must disable after (*ACCEPT) */
# Line 7995  if (cd->check_lookbehind) Line 8057  if (cd->check_lookbehind)
8057                      (fixed_length == -4)? ERR70 : ERR25;                      (fixed_length == -4)? ERR70 : ERR25;
8058          break;          break;
8059          }          }
8060          if (fixed_length > cd->max_lookbehind) cd->max_lookbehind = fixed_length;
8061        PUT(cc, 1, fixed_length);        PUT(cc, 1, fixed_length);
8062        }        }
8063      cc += 1 + LINK_SIZE;      cc += 1 + LINK_SIZE;
# Line 8015  if (errorcode != 0) Line 8078  if (errorcode != 0)
8078    }    }
8079    
8080  /* If the anchored option was not passed, set the flag if we can determine that  /* If the anchored option was not passed, set the flag if we can determine that
8081  the pattern is anchored by virtue of ^ characters or \A or anything else (such  the pattern is anchored by virtue of ^ characters or \A or anything else, such
8082  as starting with .* when DOTALL is set).  as starting with non-atomic .* when DOTALL is set and there are no occurrences
8083    of *PRUNE or *SKIP.
8084    
8085  Otherwise, if we know what the first byte has to be, save it, because that  Otherwise, if we know what the first byte has to be, save it, because that
8086  speeds up unanchored matches no end. If not, see if we can set the  speeds up unanchored matches no end. If not, see if we can set the
8087  PCRE_STARTLINE flag. This is helpful for multiline matches when all branches  PCRE_STARTLINE flag. This is helpful for multiline matches when all branches
8088  start with ^. and also when all branches start with .* for non-DOTALL matches.  start with ^. and also when all branches start with non-atomic .* for
8089  */  non-DOTALL matches when *PRUNE and SKIP are not present. */
8090    
8091  if ((re->options & PCRE_ANCHORED) == 0)  if ((re->options & PCRE_ANCHORED) == 0)
8092    {    {
8093    if (is_anchored(codestart, 0, cd->backref_map))    if (is_anchored(codestart, 0, cd, 0)) re->options |= PCRE_ANCHORED;
     re->options |= PCRE_ANCHORED;  
8094    else    else
8095      {      {
8096      if (firstchar < 0)      if (firstchar < 0)
# Line 8064  if ((re->options & PCRE_ANCHORED) == 0) Line 8127  if ((re->options & PCRE_ANCHORED) == 0)
8127    
8128        re->flags |= PCRE_FIRSTSET;        re->flags |= PCRE_FIRSTSET;
8129        }        }
8130      else if (is_startline(codestart, 0, cd->backref_map))  
8131        re->flags |= PCRE_STARTLINE;      else if (is_startline(codestart, 0, cd, 0)) re->flags |= PCRE_STARTLINE;
8132      }      }
8133    }    }
8134    
# Line 8134  if ((re->flags & PCRE_REQCHSET) != 0) Line 8197  if ((re->flags & PCRE_REQCHSET) != 0)
8197    }    }
8198    
8199  #ifdef COMPILE_PCRE8  #ifdef COMPILE_PCRE8
8200  pcre_printint(re, stdout, TRUE);  pcre_printint((pcre *)re, stdout, TRUE);
8201  #else  #else
8202  pcre16_printint(re, stdout, TRUE);  pcre16_printint((pcre *)re, stdout, TRUE);
8203  #endif  #endif
8204    
8205  /* This check is done here in the debugging case so that the code that  /* This check is done here in the debugging case so that the code that

Legend:
Removed from v.903  
changed lines
  Added in v.1041

  ViewVC Help
Powered by ViewVC 1.1.5