/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 903 by ph10, Sat Jan 21 16:37:17 2012 UTC revision 1033 by ph10, Mon Sep 10 11:02:48 2012 UTC
# Line 489  static const char error_texts[] = Line 489  static const char error_texts[] =
489    "too many forward references\0"    "too many forward references\0"
490    "disallowed Unicode code point (>= 0xd800 && <= 0xdfff)\0"    "disallowed Unicode code point (>= 0xd800 && <= 0xdfff)\0"
491    "invalid UTF-16 string\0"    "invalid UTF-16 string\0"
492      /* 75 */
493      "name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0"
494      "character value in \\u.... sequence is too large\0"
495    ;    ;
496    
497  /* Table to identify digits and hex digits. This is used when compiling  /* Table to identify digits and hex digits. This is used when compiling
# Line 786  else if ((i = escapes[c - CHAR_0]) != 0) Line 789  else if ((i = escapes[c - CHAR_0]) != 0)
789    
790  #else           /* EBCDIC coding */  #else           /* EBCDIC coding */
791  /* Not alphanumeric */  /* Not alphanumeric */
792  else if (c < 'a' || (!MAX_255(c) || (ebcdic_chartab[c] & 0x0E) == 0)) {}  else if (c < CHAR_a || (!MAX_255(c) || (ebcdic_chartab[c] & 0x0E) == 0)) {}
793  else if ((i = escapes[c - 0x48]) != 0)  c = i;  else if ((i = escapes[c - 0x48]) != 0)  c = i;
794  #endif  #endif
795    
# Line 829  else Line 832  else
832            c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));            c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
833  #endif  #endif
834            }            }
835    
836    #ifdef COMPILE_PCRE8
837            if (c > (utf ? 0x10ffff : 0xff))
838    #else
839    #ifdef COMPILE_PCRE16
840            if (c > (utf ? 0x10ffff : 0xffff))
841    #endif
842    #endif
843              {
844              *errorcodeptr = ERR76;
845              }
846            else if (utf && c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73;
847          }          }
848        }        }
849      else      else
# Line 2225  for (;;) Line 2240  for (;;)
2240        {        {
2241        case OP_CHAR:        case OP_CHAR:
2242        case OP_CHARI:        case OP_CHARI:
2243          case OP_NOT:
2244          case OP_NOTI:
2245        case OP_EXACT:        case OP_EXACT:
2246        case OP_EXACTI:        case OP_EXACTI:
2247          case OP_NOTEXACT:
2248          case OP_NOTEXACTI:
2249        case OP_UPTO:        case OP_UPTO:
2250        case OP_UPTOI:        case OP_UPTOI:
2251          case OP_NOTUPTO:
2252          case OP_NOTUPTOI:
2253        case OP_MINUPTO:        case OP_MINUPTO:
2254        case OP_MINUPTOI:        case OP_MINUPTOI:
2255          case OP_NOTMINUPTO:
2256          case OP_NOTMINUPTOI:
2257        case OP_POSUPTO:        case OP_POSUPTO:
2258        case OP_POSUPTOI:        case OP_POSUPTOI:
2259          case OP_NOTPOSUPTO:
2260          case OP_NOTPOSUPTOI:
2261        case OP_STAR:        case OP_STAR:
2262        case OP_STARI:        case OP_STARI:
2263          case OP_NOTSTAR:
2264          case OP_NOTSTARI:
2265        case OP_MINSTAR:        case OP_MINSTAR:
2266        case OP_MINSTARI:        case OP_MINSTARI:
2267          case OP_NOTMINSTAR:
2268          case OP_NOTMINSTARI:
2269        case OP_POSSTAR:        case OP_POSSTAR:
2270        case OP_POSSTARI:        case OP_POSSTARI:
2271          case OP_NOTPOSSTAR:
2272          case OP_NOTPOSSTARI:
2273        case OP_PLUS:        case OP_PLUS:
2274        case OP_PLUSI:        case OP_PLUSI:
2275          case OP_NOTPLUS:
2276          case OP_NOTPLUSI:
2277        case OP_MINPLUS:        case OP_MINPLUS:
2278        case OP_MINPLUSI:        case OP_MINPLUSI:
2279          case OP_NOTMINPLUS:
2280          case OP_NOTMINPLUSI:
2281        case OP_POSPLUS:        case OP_POSPLUS:
2282        case OP_POSPLUSI:        case OP_POSPLUSI:
2283          case OP_NOTPOSPLUS:
2284          case OP_NOTPOSPLUSI:
2285        case OP_QUERY:        case OP_QUERY:
2286        case OP_QUERYI:        case OP_QUERYI:
2287          case OP_NOTQUERY:
2288          case OP_NOTQUERYI:
2289        case OP_MINQUERY:        case OP_MINQUERY:
2290        case OP_MINQUERYI:        case OP_MINQUERYI:
2291          case OP_NOTMINQUERY:
2292          case OP_NOTMINQUERYI:
2293        case OP_POSQUERY:        case OP_POSQUERY:
2294        case OP_POSQUERYI:        case OP_POSQUERYI:
2295          case OP_NOTPOSQUERY:
2296          case OP_NOTPOSQUERYI:
2297        if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);        if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
2298        break;        break;
2299        }        }
# Line 3067  if (next >= 0) switch(op_code) Line 3110  if (next >= 0) switch(op_code)
3110      }      }
3111    else    else
3112  #endif  /* SUPPORT_UTF */  #endif  /* SUPPORT_UTF */
3113    return (c != TABLE_GET(next, cd->fcc, next));  /* Non-UTF-8 mode */    return (c != TABLE_GET((unsigned int)next, cd->fcc, next));  /* Non-UTF-8 mode */
   
   /* For OP_NOT and OP_NOTI, the data is always a single-byte character. These  
   opcodes are not used for multi-byte characters, because they are coded using  
   an XCLASS instead. */  
3114    
3115    case OP_NOT:    case OP_NOT:
3116    return (c = *previous) == next;  #ifdef SUPPORT_UTF
3117      GETCHARTEST(c, previous);
3118    #else
3119      c = *previous;
3120    #endif
3121      return c == next;
3122    
3123    case OP_NOTI:    case OP_NOTI:
3124    if ((c = *previous) == next) return TRUE;  #ifdef SUPPORT_UTF
3125      GETCHARTEST(c, previous);
3126    #else
3127      c = *previous;
3128    #endif
3129      if (c == next) return TRUE;
3130  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
3131    if (utf)    if (utf)
3132      {      {
3133      unsigned int othercase;      unsigned int othercase;
3134      if (next < 128) othercase = cd->fcc[next]; else      if (next < 128) othercase = cd->fcc[next]; else
3135  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3136      othercase = UCD_OTHERCASE(next);      othercase = UCD_OTHERCASE((unsigned int)next);
3137  #else  #else
3138      othercase = NOTACHAR;      othercase = NOTACHAR;
3139  #endif  #endif
# Line 3092  if (next >= 0) switch(op_code) Line 3141  if (next >= 0) switch(op_code)
3141      }      }
3142    else    else
3143  #endif  /* SUPPORT_UTF */  #endif  /* SUPPORT_UTF */
3144    return (c == TABLE_GET(next, cd->fcc, next));  /* Non-UTF-8 mode */    return (c == TABLE_GET((unsigned int)next, cd->fcc, next));  /* Non-UTF-8 mode */
3145    
3146    /* Note that OP_DIGIT etc. are generated only when PCRE_UCP is *not* set.    /* Note that OP_DIGIT etc. are generated only when PCRE_UCP is *not* set.
3147    When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */    When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */
3148    
3149    case OP_DIGIT:    case OP_DIGIT:
3150    return next > 127 || (cd->ctypes[next] & ctype_digit) == 0;    return next > 255 || (cd->ctypes[next] & ctype_digit) == 0;
3151    
3152    case OP_NOT_DIGIT:    case OP_NOT_DIGIT:
3153    return next <= 127 && (cd->ctypes[next] & ctype_digit) != 0;    return next <= 255 && (cd->ctypes[next] & ctype_digit) != 0;
3154    
3155    case OP_WHITESPACE:    case OP_WHITESPACE:
3156    return next > 127 || (cd->ctypes[next] & ctype_space) == 0;    return next > 255 || (cd->ctypes[next] & ctype_space) == 0;
3157    
3158    case OP_NOT_WHITESPACE:    case OP_NOT_WHITESPACE:
3159    return next <= 127 && (cd->ctypes[next] & ctype_space) != 0;    return next <= 255 && (cd->ctypes[next] & ctype_space) != 0;
3160    
3161    case OP_WORDCHAR:    case OP_WORDCHAR:
3162    return next > 127 || (cd->ctypes[next] & ctype_word) == 0;    return next > 255 || (cd->ctypes[next] & ctype_word) == 0;
3163    
3164    case OP_NOT_WORDCHAR:    case OP_NOT_WORDCHAR:
3165    return next <= 127 && (cd->ctypes[next] & ctype_word) != 0;    return next <= 255 && (cd->ctypes[next] & ctype_word) != 0;
3166    
3167    case OP_HSPACE:    case OP_HSPACE:
3168    case OP_NOT_HSPACE:    case OP_NOT_HSPACE:
3169    switch(next)    switch(next)
3170      {      {
3171      case 0x09:      case CHAR_HT:
3172      case 0x20:      case CHAR_SPACE:
3173    #ifndef EBCDIC
3174      case 0xa0:      case 0xa0:
3175      case 0x1680:      case 0x1680:
3176      case 0x180e:      case 0x180e:
# Line 3138  if (next >= 0) switch(op_code) Line 3188  if (next >= 0) switch(op_code)
3188      case 0x202f:      case 0x202f:
3189      case 0x205f:      case 0x205f:
3190      case 0x3000:      case 0x3000:
3191    #endif  /* Not EBCDIC */
3192      return op_code == OP_NOT_HSPACE;      return op_code == OP_NOT_HSPACE;
3193      default:      default:
3194      return op_code != OP_NOT_HSPACE;      return op_code != OP_NOT_HSPACE;
# Line 3148  if (next >= 0) switch(op_code) Line 3199  if (next >= 0) switch(op_code)
3199    case OP_NOT_VSPACE:    case OP_NOT_VSPACE:
3200    switch(next)    switch(next)
3201      {      {
3202      case 0x0a:      case CHAR_LF:
3203      case 0x0b:      case CHAR_VT:
3204      case 0x0c:      case CHAR_FF:
3205      case 0x0d:      case CHAR_CR:
3206      case 0x85:      case CHAR_NEL:
3207    #ifndef EBCDIC
3208      case 0x2028:      case 0x2028:
3209      case 0x2029:      case 0x2029:
3210    #endif
3211      return op_code == OP_NOT_VSPACE;      return op_code == OP_NOT_VSPACE;
3212      default:      default:
3213      return op_code != OP_NOT_VSPACE;      return op_code != OP_NOT_VSPACE;
# Line 3191  switch(op_code) Line 3244  switch(op_code)
3244    switch(-next)    switch(-next)
3245      {      {
3246      case ESC_d:      case ESC_d:
3247      return c > 127 || (cd->ctypes[c] & ctype_digit) == 0;      return c > 255 || (cd->ctypes[c] & ctype_digit) == 0;
3248    
3249      case ESC_D:      case ESC_D:
3250      return c <= 127 && (cd->ctypes[c] & ctype_digit) != 0;      return c <= 255 && (cd->ctypes[c] & ctype_digit) != 0;
3251    
3252      case ESC_s:      case ESC_s:
3253      return c > 127 || (cd->ctypes[c] & ctype_space) == 0;      return c > 255 || (cd->ctypes[c] & ctype_space) == 0;
3254    
3255      case ESC_S:      case ESC_S:
3256      return c <= 127 && (cd->ctypes[c] & ctype_space) != 0;      return c <= 255 && (cd->ctypes[c] & ctype_space) != 0;
3257    
3258      case ESC_w:      case ESC_w:
3259      return c > 127 || (cd->ctypes[c] & ctype_word) == 0;      return c > 255 || (cd->ctypes[c] & ctype_word) == 0;
3260    
3261      case ESC_W:      case ESC_W:
3262      return c <= 127 && (cd->ctypes[c] & ctype_word) != 0;      return c <= 255 && (cd->ctypes[c] & ctype_word) != 0;
3263    
3264      case ESC_h:      case ESC_h:
3265      case ESC_H:      case ESC_H:
3266      switch(c)      switch(c)
3267        {        {
3268        case 0x09:        case CHAR_HT:
3269        case 0x20:        case CHAR_SPACE:
3270    #ifndef EBCDIC
3271        case 0xa0:        case 0xa0:
3272        case 0x1680:        case 0x1680:
3273        case 0x180e:        case 0x180e:
# Line 3231  switch(op_code) Line 3285  switch(op_code)
3285        case 0x202f:        case 0x202f:
3286        case 0x205f:        case 0x205f:
3287        case 0x3000:        case 0x3000:
3288    #endif  /* Not EBCDIC */
3289        return -next != ESC_h;        return -next != ESC_h;
3290        default:        default:
3291        return -next == ESC_h;        return -next == ESC_h;
# Line 3240  switch(op_code) Line 3295  switch(op_code)
3295      case ESC_V:      case ESC_V:
3296      switch(c)      switch(c)
3297        {        {
3298        case 0x0a:        case CHAR_LF:
3299        case 0x0b:        case CHAR_VT:
3300        case 0x0c:        case CHAR_FF:
3301        case 0x0d:        case CHAR_CR:
3302        case 0x85:        case CHAR_NEL:
3303    #ifndef EBCDIC
3304        case 0x2028:        case 0x2028:
3305        case 0x2029:        case 0x2029:
3306    #endif  /* Not EBCDIC */
3307        return -next != ESC_v;        return -next != ESC_v;
3308        default:        default:
3309        return -next == ESC_v;        return -next == ESC_v;
# Line 3315  switch(op_code) Line 3372  switch(op_code)
3372    return next == -ESC_d;    return next == -ESC_d;
3373    
3374    case OP_WHITESPACE:    case OP_WHITESPACE:
3375    return next == -ESC_S || next == -ESC_d || next == -ESC_w || next == -ESC_R;    return next == -ESC_S || next == -ESC_d || next == -ESC_w;
3376    
3377    case OP_NOT_WHITESPACE:    case OP_NOT_WHITESPACE:
3378    return next == -ESC_s || next == -ESC_h || next == -ESC_v;    return next == -ESC_s || next == -ESC_h || next == -ESC_v || next == -ESC_R;
3379    
3380    case OP_HSPACE:    case OP_HSPACE:
3381    return next == -ESC_S || next == -ESC_H || next == -ESC_d ||    return next == -ESC_S || next == -ESC_H || next == -ESC_d ||
# Line 4008  for (;; ptr++) Line 4065  for (;; ptr++)
4065    
4066              /* Perl 5.004 onwards omits VT from \s, but we must preserve it              /* Perl 5.004 onwards omits VT from \s, but we must preserve it
4067              if it was previously set by something earlier in the character              if it was previously set by something earlier in the character
4068              class. */              class. Luckily, the value of CHAR_VT is 0x0b in both ASCII and
4069                EBCDIC, so we lazily just adjust the appropriate bit. */
4070    
4071              case ESC_s:              case ESC_s:
4072              classbits[0] |= cbits[cbit_space];              classbits[0] |= cbits[cbit_space];
# Line 4023  for (;; ptr++) Line 4081  for (;; ptr++)
4081              continue;              continue;
4082    
4083              case ESC_h:              case ESC_h:
4084              SETBIT(classbits, 0x09); /* VT */              SETBIT(classbits, CHAR_HT);
4085              SETBIT(classbits, 0x20); /* SPACE */              SETBIT(classbits, CHAR_SPACE);
4086    #ifndef EBCDIC
4087              SETBIT(classbits, 0xa0); /* NSBP */              SETBIT(classbits, 0xa0); /* NSBP */
4088  #ifndef COMPILE_PCRE8  #ifndef COMPILE_PCRE8
4089              xclass = TRUE;              xclass = TRUE;
# Line 4060  for (;; ptr++) Line 4119  for (;; ptr++)
4119                class_uchardata += PRIV(ord2utf)(0x3000, class_uchardata);                class_uchardata += PRIV(ord2utf)(0x3000, class_uchardata);
4120                }                }
4121  #endif  #endif
4122    #endif  /* Not EBCDIC */
4123              continue;              continue;
4124    
4125              case ESC_H:              case ESC_H:
# Line 4068  for (;; ptr++) Line 4128  for (;; ptr++)
4128                int x = 0xff;                int x = 0xff;
4129                switch (c)                switch (c)
4130                  {                  {
4131                  case 0x09/8: x ^= 1 << (0x09%8); break;                  case CHAR_HT/8:    x ^= 1 << (CHAR_HT%8); break;
4132                  case 0x20/8: x ^= 1 << (0x20%8); break;                  case CHAR_SPACE/8: x ^= 1 << (CHAR_SPACE%8); break;
4133                  case 0xa0/8: x ^= 1 << (0xa0%8); break;  #ifndef EBCDIC
4134                    case 0xa0/8: x ^= 1 << (0xa0%8); break;  /* NSBSP */
4135    #endif
4136                  default: break;                  default: break;
4137                  }                  }
4138                classbits[c] |= x;                classbits[c] |= x;
4139                }                }
4140    #ifndef EBCDIC
4141  #ifndef COMPILE_PCRE8  #ifndef COMPILE_PCRE8
4142              xclass = TRUE;              xclass = TRUE;
4143              *class_uchardata++ = XCL_RANGE;              *class_uchardata++ = XCL_RANGE;
# Line 4101  for (;; ptr++) Line 4164  for (;; ptr++)
4164              if (utf)              if (utf)
4165                class_uchardata += PRIV(ord2utf)(0x10ffff, class_uchardata);                class_uchardata += PRIV(ord2utf)(0x10ffff, class_uchardata);
4166              else              else
4167  #endif  #endif   /* SUPPORT_UTF */
4168                *class_uchardata++ = 0xffff;                *class_uchardata++ = 0xffff;
4169  #elif defined SUPPORT_UTF  #elif defined SUPPORT_UTF
4170              if (utf)              if (utf)
# Line 4130  for (;; ptr++) Line 4193  for (;; ptr++)
4193                class_uchardata += PRIV(ord2utf)(0x10ffff, class_uchardata);                class_uchardata += PRIV(ord2utf)(0x10ffff, class_uchardata);
4194                }                }
4195  #endif  #endif
4196    #endif  /* Not EBCDIC */
4197              continue;              continue;
4198    
4199              case ESC_v:              case ESC_v:
4200              SETBIT(classbits, 0x0a); /* LF */              SETBIT(classbits, CHAR_LF);
4201              SETBIT(classbits, 0x0b); /* VT */              SETBIT(classbits, CHAR_VT);
4202              SETBIT(classbits, 0x0c); /* FF */              SETBIT(classbits, CHAR_FF);
4203              SETBIT(classbits, 0x0d); /* CR */              SETBIT(classbits, CHAR_CR);
4204              SETBIT(classbits, 0x85); /* NEL */              SETBIT(classbits, CHAR_NEL);
4205    #ifndef EBCDIC
4206  #ifndef COMPILE_PCRE8  #ifndef COMPILE_PCRE8
4207              xclass = TRUE;              xclass = TRUE;
4208              *class_uchardata++ = XCL_RANGE;              *class_uchardata++ = XCL_RANGE;
# Line 4152  for (;; ptr++) Line 4217  for (;; ptr++)
4217                class_uchardata += PRIV(ord2utf)(0x2029, class_uchardata);                class_uchardata += PRIV(ord2utf)(0x2029, class_uchardata);
4218                }                }
4219  #endif  #endif
4220    #endif  /* Not EBCDIC */
4221              continue;              continue;
4222    
4223              case ESC_V:              case ESC_V:
# Line 4160  for (;; ptr++) Line 4226  for (;; ptr++)
4226                int x = 0xff;                int x = 0xff;
4227                switch (c)                switch (c)
4228                  {                  {
4229                  case 0x0a/8: x ^= 1 << (0x0a%8);                  case CHAR_LF/8: x ^= 1 << (CHAR_LF%8);
4230                               x ^= 1 << (0x0b%8);                                  x ^= 1 << (CHAR_VT%8);
4231                               x ^= 1 << (0x0c%8);                                  x ^= 1 << (CHAR_FF%8);
4232                               x ^= 1 << (0x0d%8);                                  x ^= 1 << (CHAR_CR%8);
4233                               break;                                  break;
4234                  case 0x85/8: x ^= 1 << (0x85%8); break;                  case CHAR_NEL/8: x ^= 1 << (CHAR_NEL%8); break;
4235                  default: break;                  default: break;
4236                  }                  }
4237                classbits[c] |= x;                classbits[c] |= x;
4238                }                }
4239    
4240    #ifndef EBCDIC
4241  #ifndef COMPILE_PCRE8  #ifndef COMPILE_PCRE8
4242              xclass = TRUE;              xclass = TRUE;
4243              *class_uchardata++ = XCL_RANGE;              *class_uchardata++ = XCL_RANGE;
# Line 4196  for (;; ptr++) Line 4263  for (;; ptr++)
4263                class_uchardata += PRIV(ord2utf)(0x10ffff, class_uchardata);                class_uchardata += PRIV(ord2utf)(0x10ffff, class_uchardata);
4264                }                }
4265  #endif  #endif
4266    #endif  /* Not EBCDIC */
4267              continue;              continue;
4268    
4269  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 4482  for (;; ptr++) Line 4550  for (;; ptr++)
4550        LONE_SINGLE_CHARACTER:        LONE_SINGLE_CHARACTER:
4551    
4552        /* Only the value of 1 matters for class_single_char. */        /* Only the value of 1 matters for class_single_char. */
4553    
4554        if (class_single_char < 2) class_single_char++;        if (class_single_char < 2) class_single_char++;
4555    
4556        /* If class_charcount is 1, we saw precisely one character. As long as        /* If class_charcount is 1, we saw precisely one character. As long as
4557        there were no negated characters >= 128 and there was no use of \p or \P,        there was no use of \p or \P, in other words, no use of any XCLASS
4558        in other words, no use of any XCLASS features, we can optimize.        features, we can optimize.
   
       In UTF-8 mode, we can optimize the negative case only if there were no  
       characters >= 128 because OP_NOT and the related opcodes like OP_NOTSTAR  
       operate on single-bytes characters only. This is an historical hangover.  
       Maybe one day we can tidy these opcodes to handle multi-byte characters.  
4559    
4560        The optimization throws away the bit map. We turn the item into a        The optimization throws away the bit map. We turn the item into a
4561        1-character OP_CHAR[I] if it's positive, or OP_NOT[I] if it's negative.        1-character OP_CHAR[I] if it's positive, or OP_NOT[I] if it's negative.
4562        Note that OP_NOT[I] does not support multibyte characters. In the positive        In the positive case, it can cause firstchar to be set. Otherwise, there
4563        case, it can cause firstchar to be set. Otherwise, there can be no first        can be no first char if this item is first, whatever repeat count may
4564        char if this item is first, whatever repeat count may follow. In the case        follow. In the case of reqchar, save the previous value for reinstating. */
       of reqchar, save the previous value for reinstating. */  
4565    
 #ifdef SUPPORT_UTF  
       if (class_single_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET  
         && (!utf || !negate_class || c < (MAX_VALUE_FOR_SINGLE_CHAR + 1)))  
 #else  
4566        if (class_single_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)        if (class_single_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
 #endif  
4567          {          {
4568          ptr++;          ptr++;
4569          zeroreqchar = reqchar;          zeroreqchar = reqchar;
4570    
         /* The OP_NOT[I] opcodes work on single characters only. */  
   
4571          if (negate_class)          if (negate_class)
4572            {            {
4573            if (firstchar == REQ_UNSET) firstchar = REQ_NONE;            if (firstchar == REQ_UNSET) firstchar = REQ_NONE;
4574            zerofirstchar = firstchar;            zerofirstchar = firstchar;
4575            *code++ = ((options & PCRE_CASELESS) != 0)? OP_NOTI: OP_NOT;            *code++ = ((options & PCRE_CASELESS) != 0)? OP_NOTI: OP_NOT;
4576            *code++ = c;  #ifdef SUPPORT_UTF
4577              if (utf && c > MAX_VALUE_FOR_SINGLE_CHAR)
4578                code += PRIV(ord2utf)(c, code);
4579              else
4580    #endif
4581                *code++ = c;
4582            goto NOT_CHAR;            goto NOT_CHAR;
4583            }            }
4584    
# Line 4571  for (;; ptr++) Line 4632  for (;; ptr++)
4632  #endif  #endif
4633            {            {
4634            unsigned int othercase;            unsigned int othercase;
4635            if ((othercase = UCD_OTHERCASE(c)) != c)            if ((int)(othercase = UCD_OTHERCASE(c)) != c)
4636              {              {
4637              *class_uchardata++ = XCL_SINGLE;              *class_uchardata++ = XCL_SINGLE;
4638              class_uchardata += PRIV(ord2utf)(othercase, class_uchardata);              class_uchardata += PRIV(ord2utf)(othercase, class_uchardata);
# Line 4775  for (;; ptr++) Line 4836  for (;; ptr++)
4836    
4837      /* Now handle repetition for the different types of item. */      /* Now handle repetition for the different types of item. */
4838    
4839      /* If previous was a character match, abolish the item and generate a      /* If previous was a character or negated character match, abolish the item
4840      repeat item instead. If a char item has a minumum of more than one, ensure      and generate a repeat item instead. If a char item has a minimum of more
4841      that it is set in reqchar - it might not be if a sequence such as x{3} is      than one, ensure that it is set in reqchar - it might not be if a sequence
4842      the first thing in a branch because the x will have gone into firstchar      such as x{3} is the first thing in a branch because the x will have gone
4843      instead.  */      into firstchar instead.  */
4844    
4845      if (*previous == OP_CHAR || *previous == OP_CHARI)      if (*previous == OP_CHAR || *previous == OP_CHARI
4846        {          || *previous == OP_NOT || *previous == OP_NOTI)
4847        op_type = (*previous == OP_CHAR)? 0 : OP_STARI - OP_STAR;        {
4848          switch (*previous)
4849            {
4850            default: /* Make compiler happy. */
4851            case OP_CHAR:  op_type = OP_STAR - OP_STAR; break;
4852            case OP_CHARI: op_type = OP_STARI - OP_STAR; break;
4853            case OP_NOT:   op_type = OP_NOTSTAR - OP_STAR; break;
4854            case OP_NOTI:  op_type = OP_NOTSTARI - OP_STAR; break;
4855            }
4856    
4857        /* Deal with UTF characters that take up more than one character. It's        /* Deal with UTF characters that take up more than one character. It's
4858        easier to write this out separately than try to macrify it. Use c to        easier to write this out separately than try to macrify it. Use c to
# Line 4806  for (;; ptr++) Line 4875  for (;; ptr++)
4875        with UTF disabled, or for a single character UTF character. */        with UTF disabled, or for a single character UTF character. */
4876          {          {
4877          c = code[-1];          c = code[-1];
4878          if (repeat_min > 1) reqchar = c | req_caseopt | cd->req_varyopt;          if (*previous <= OP_CHARI && repeat_min > 1)
4879              reqchar = c | req_caseopt | cd->req_varyopt;
4880          }          }
4881    
4882        /* If the repetition is unlimited, it pays to see if the next thing on        /* If the repetition is unlimited, it pays to see if the next thing on
# Line 4825  for (;; ptr++) Line 4895  for (;; ptr++)
4895        goto OUTPUT_SINGLE_REPEAT;   /* Code shared with single character types */        goto OUTPUT_SINGLE_REPEAT;   /* Code shared with single character types */
4896        }        }
4897    
     /* If previous was a single negated character ([^a] or similar), we use  
     one of the special opcodes, replacing it. The code is shared with single-  
     character repeats by setting opt_type to add a suitable offset into  
     repeat_type. We can also test for auto-possessification. OP_NOT and OP_NOTI  
     are currently used only for single-byte chars. */  
   
     else if (*previous == OP_NOT || *previous == OP_NOTI)  
       {  
       op_type = ((*previous == OP_NOT)? OP_NOTSTAR : OP_NOTSTARI) - OP_STAR;  
       c = previous[1];  
       if (!possessive_quantifier &&  
           repeat_max < 0 &&  
           check_auto_possessive(previous, utf, ptr + 1, options, cd))  
         {  
         repeat_type = 0;    /* Force greedy */  
         possessive_quantifier = TRUE;  
         }  
       goto OUTPUT_SINGLE_REPEAT;  
       }  
   
4898      /* If previous was a character type match (\d or similar), abolish it and      /* If previous was a character type match (\d or similar), abolish it and
4899      create a suitable repeat item. The code is shared with single-character      create a suitable repeat item. The code is shared with single-character
4900      repeats by setting op_type to add a suitable offset into repeat_type. Note      repeats by setting op_type to add a suitable offset into repeat_type. Note
# Line 5585  for (;; ptr++) Line 5635  for (;; ptr++)
5635          arg = ++ptr;          arg = ++ptr;
5636          while (*ptr != 0 && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;          while (*ptr != 0 && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
5637          arglen = (int)(ptr - arg);          arglen = (int)(ptr - arg);
5638            if (arglen > (int)MAX_MARK)
5639              {
5640              *errorcodeptr = ERR75;
5641              goto FAILED;
5642              }
5643          }          }
5644    
5645        if (*ptr != CHAR_RIGHT_PARENTHESIS)        if (*ptr != CHAR_RIGHT_PARENTHESIS)
# Line 5600  for (;; ptr++) Line 5655  for (;; ptr++)
5655          if (namelen == verbs[i].len &&          if (namelen == verbs[i].len &&
5656              STRNCMP_UC_C8(name, vn, namelen) == 0)              STRNCMP_UC_C8(name, vn, namelen) == 0)
5657            {            {
5658              int setverb;
5659    
5660            /* Check for open captures before ACCEPT and convert it to            /* Check for open captures before ACCEPT and convert it to
5661            ASSERT_ACCEPT if in an assertion. */            ASSERT_ACCEPT if in an assertion. */
5662    
# Line 5617  for (;; ptr++) Line 5674  for (;; ptr++)
5674                *code++ = OP_CLOSE;                *code++ = OP_CLOSE;
5675                PUT2INC(code, 0, oc->number);                PUT2INC(code, 0, oc->number);
5676                }                }
5677              *code++ = (cd->assert_depth > 0)? OP_ASSERT_ACCEPT : OP_ACCEPT;              setverb = *code++ =
5678                  (cd->assert_depth > 0)? OP_ASSERT_ACCEPT : OP_ACCEPT;
5679    
5680              /* Do not set firstchar after *ACCEPT */              /* Do not set firstchar after *ACCEPT */
5681              if (firstchar == REQ_UNSET) firstchar = REQ_NONE;              if (firstchar == REQ_UNSET) firstchar = REQ_NONE;
# Line 5632  for (;; ptr++) Line 5690  for (;; ptr++)
5690                *errorcodeptr = ERR66;                *errorcodeptr = ERR66;
5691                goto FAILED;                goto FAILED;
5692                }                }
5693              *code = verbs[i].op;              setverb = *code++ = verbs[i].op;
             if (*code++ == OP_THEN) cd->external_flags |= PCRE_HASTHEN;  
5694              }              }
5695    
5696            else            else
# Line 5643  for (;; ptr++) Line 5700  for (;; ptr++)
5700                *errorcodeptr = ERR59;                *errorcodeptr = ERR59;
5701                goto FAILED;                goto FAILED;
5702                }                }
5703              *code = verbs[i].op_arg;              setverb = *code++ = verbs[i].op_arg;
             if (*code++ == OP_THEN_ARG) cd->external_flags |= PCRE_HASTHEN;  
5704              *code++ = arglen;              *code++ = arglen;
5705              memcpy(code, arg, IN_UCHARS(arglen));              memcpy(code, arg, IN_UCHARS(arglen));
5706              code += arglen;              code += arglen;
5707              *code++ = 0;              *code++ = 0;
5708              }              }
5709    
5710              switch (setverb)
5711                {
5712                case OP_THEN:
5713                case OP_THEN_ARG:
5714                cd->external_flags |= PCRE_HASTHEN;
5715                break;
5716    
5717                case OP_PRUNE:
5718                case OP_PRUNE_ARG:
5719                case OP_SKIP:
5720                case OP_SKIP_ARG:
5721                cd->had_pruneorskip = TRUE;
5722                break;
5723                }
5724    
5725            break;  /* Found verb, exit loop */            break;  /* Found verb, exit loop */
5726            }            }
5727    
# Line 6836  for (;; ptr++) Line 6907  for (;; ptr++)
6907        /* For the rest (including \X when Unicode properties are supported), we        /* For the rest (including \X when Unicode properties are supported), we
6908        can obtain the OP value by negating the escape value in the default        can obtain the OP value by negating the escape value in the default
6909        situation when PCRE_UCP is not set. When it *is* set, we substitute        situation when PCRE_UCP is not set. When it *is* set, we substitute
6910        Unicode property tests. */        Unicode property tests. Note that \b and \B do a one-character
6911          lookbehind. */
6912    
6913        else        else
6914          {          {
6915            if ((-c == ESC_b || -c == ESC_B) && cd->max_lookbehind == 0)
6916              cd->max_lookbehind = 1;
6917  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
6918          if (-c >= ESC_DU && -c <= ESC_wu)          if (-c >= ESC_DU && -c <= ESC_wu)
6919            {            {
# Line 7147  for (;;) Line 7221  for (;;)
7221          *ptrptr = ptr;          *ptrptr = ptr;
7222          return FALSE;          return FALSE;
7223          }          }
7224        else { PUT(reverse_count, 0, fixed_length); }        else
7225            {
7226            if (fixed_length > cd->max_lookbehind)
7227              cd->max_lookbehind = fixed_length;
7228            PUT(reverse_count, 0, fixed_length);
7229            }
7230        }        }
7231      }      }
7232    
# Line 7279  and the highest back reference was great Line 7358  and the highest back reference was great
7358  However, by keeping a bitmap of the first 31 back references, we can catch some  However, by keeping a bitmap of the first 31 back references, we can catch some
7359  of the more common cases more precisely.  of the more common cases more precisely.
7360    
7361    ... A second exception is when the .* appears inside an atomic group, because
7362    this prevents the number of characters it matches from being adjusted.
7363    
7364  Arguments:  Arguments:
7365    code           points to start of expression (the bracket)    code           points to start of expression (the bracket)
7366    bracket_map    a bitmap of which brackets we are inside while testing; this    bracket_map    a bitmap of which brackets we are inside while testing; this
7367                    handles up to substring 31; after that we just have to take                    handles up to substring 31; after that we just have to take
7368                    the less precise approach                    the less precise approach
7369    backref_map    the back reference bitmap    cd             points to the compile data block
7370      atomcount      atomic group level
7371    
7372  Returns:     TRUE or FALSE  Returns:     TRUE or FALSE
7373  */  */
7374    
7375  static BOOL  static BOOL
7376  is_anchored(register const pcre_uchar *code, unsigned int bracket_map,  is_anchored(register const pcre_uchar *code, unsigned int bracket_map,
7377    unsigned int backref_map)    compile_data *cd, int atomcount)
7378  {  {
7379  do {  do {
7380     const pcre_uchar *scode = first_significant_code(     const pcre_uchar *scode = first_significant_code(
# Line 7303  do { Line 7386  do {
7386     if (op == OP_BRA  || op == OP_BRAPOS ||     if (op == OP_BRA  || op == OP_BRAPOS ||
7387         op == OP_SBRA || op == OP_SBRAPOS)         op == OP_SBRA || op == OP_SBRAPOS)
7388       {       {
7389       if (!is_anchored(scode, bracket_map, backref_map)) return FALSE;       if (!is_anchored(scode, bracket_map, cd, atomcount)) return FALSE;
7390       }       }
7391    
7392     /* Capturing brackets */     /* Capturing brackets */
# Line 7313  do { Line 7396  do {
7396       {       {
7397       int n = GET2(scode, 1+LINK_SIZE);       int n = GET2(scode, 1+LINK_SIZE);
7398       int new_map = bracket_map | ((n < 32)? (1 << n) : 1);       int new_map = bracket_map | ((n < 32)? (1 << n) : 1);
7399       if (!is_anchored(scode, new_map, backref_map)) return FALSE;       if (!is_anchored(scode, new_map, cd, atomcount)) return FALSE;
7400         }
7401    
7402       /* Positive forward assertions and conditions */
7403    
7404       else if (op == OP_ASSERT || op == OP_COND)
7405         {
7406         if (!is_anchored(scode, bracket_map, cd, atomcount)) return FALSE;
7407       }       }
7408    
7409     /* Other brackets */     /* Atomic groups */
7410    
7411     else if (op == OP_ASSERT || op == OP_ONCE || op == OP_ONCE_NC ||     else if (op == OP_ONCE || op == OP_ONCE_NC)
             op == OP_COND)  
7412       {       {
7413       if (!is_anchored(scode, bracket_map, backref_map)) return FALSE;       if (!is_anchored(scode, bracket_map, cd, atomcount + 1))
7414           return FALSE;
7415       }       }
7416    
7417     /* .* is not anchored unless DOTALL is set (which generates OP_ALLANY) and     /* .* is not anchored unless DOTALL is set (which generates OP_ALLANY) and
7418     it isn't in brackets that are or may be referenced. */     it isn't in brackets that are or may be referenced or inside an atomic
7419       group. */
7420    
7421     else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR ||     else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR ||
7422               op == OP_TYPEPOSSTAR))               op == OP_TYPEPOSSTAR))
7423       {       {
7424       if (scode[1] != OP_ALLANY || (bracket_map & backref_map) != 0)       if (scode[1] != OP_ALLANY || (bracket_map & cd->backref_map) != 0 ||
7425             atomcount > 0 || cd->had_pruneorskip)
7426         return FALSE;         return FALSE;
7427       }       }
7428    
7429     /* Check for explicit anchoring */     /* Check for explicit anchoring */
7430    
7431     else if (op != OP_SOD && op != OP_SOM && op != OP_CIRC) return FALSE;     else if (op != OP_SOD && op != OP_SOM && op != OP_CIRC) return FALSE;
7432    
7433     code += GET(code, 1);     code += GET(code, 1);
7434     }     }
7435  while (*code == OP_ALT);   /* Loop for each alternative */  while (*code == OP_ALT);   /* Loop for each alternative */
# Line 7354  return TRUE; Line 7447  return TRUE;
7447  matching and for non-DOTALL patterns that start with .* (which must start at  matching and for non-DOTALL patterns that start with .* (which must start at
7448  the beginning or after \n). As in the case of is_anchored() (see above), we  the beginning or after \n). As in the case of is_anchored() (see above), we
7449  have to take account of back references to capturing brackets that contain .*  have to take account of back references to capturing brackets that contain .*
7450  because in that case we can't make the assumption.  because in that case we can't make the assumption. Also, the appearance of .*
7451    inside atomic brackets or in a pattern that contains *PRUNE or *SKIP does not
7452    count, because once again the assumption no longer holds.
7453    
7454  Arguments:  Arguments:
7455    code           points to start of expression (the bracket)    code           points to start of expression (the bracket)
7456    bracket_map    a bitmap of which brackets we are inside while testing; this    bracket_map    a bitmap of which brackets we are inside while testing; this
7457                    handles up to substring 31; after that we just have to take                    handles up to substring 31; after that we just have to take
7458                    the less precise approach                    the less precise approach
7459    backref_map    the back reference bitmap    cd             points to the compile data
7460      atomcount      atomic group level
7461    
7462  Returns:         TRUE or FALSE  Returns:         TRUE or FALSE
7463  */  */
7464    
7465  static BOOL  static BOOL
7466  is_startline(const pcre_uchar *code, unsigned int bracket_map,  is_startline(const pcre_uchar *code, unsigned int bracket_map,
7467    unsigned int backref_map)    compile_data *cd, int atomcount)
7468  {  {
7469  do {  do {
7470     const pcre_uchar *scode = first_significant_code(     const pcre_uchar *scode = first_significant_code(
# Line 7394  do { Line 7490  do {
7490         return FALSE;         return FALSE;
7491    
7492         default:     /* Assertion */         default:     /* Assertion */
7493         if (!is_startline(scode, bracket_map, backref_map)) return FALSE;         if (!is_startline(scode, bracket_map, cd, atomcount)) return FALSE;
7494         do scode += GET(scode, 1); while (*scode == OP_ALT);         do scode += GET(scode, 1); while (*scode == OP_ALT);
7495         scode += 1 + LINK_SIZE;         scode += 1 + LINK_SIZE;
7496         break;         break;
# Line 7408  do { Line 7504  do {
7504     if (op == OP_BRA  || op == OP_BRAPOS ||     if (op == OP_BRA  || op == OP_BRAPOS ||
7505         op == OP_SBRA || op == OP_SBRAPOS)         op == OP_SBRA || op == OP_SBRAPOS)
7506       {       {
7507       if (!is_startline(scode, bracket_map, backref_map)) return FALSE;       if (!is_startline(scode, bracket_map, cd, atomcount)) return FALSE;
7508       }       }
7509    
7510     /* Capturing brackets */     /* Capturing brackets */
# Line 7418  do { Line 7514  do {
7514       {       {
7515       int n = GET2(scode, 1+LINK_SIZE);       int n = GET2(scode, 1+LINK_SIZE);
7516       int new_map = bracket_map | ((n < 32)? (1 << n) : 1);       int new_map = bracket_map | ((n < 32)? (1 << n) : 1);
7517       if (!is_startline(scode, new_map, backref_map)) return FALSE;       if (!is_startline(scode, new_map, cd, atomcount)) return FALSE;
7518       }       }
7519    
7520     /* Other brackets */     /* Positive forward assertions */
7521    
7522     else if (op == OP_ASSERT || op == OP_ONCE || op == OP_ONCE_NC)     else if (op == OP_ASSERT)
7523       {       {
7524       if (!is_startline(scode, bracket_map, backref_map)) return FALSE;       if (!is_startline(scode, bracket_map, cd, atomcount)) return FALSE;
7525       }       }
7526    
7527       /* Atomic brackets */
7528    
7529     /* .* means "start at start or after \n" if it isn't in brackets that     else if (op == OP_ONCE || op == OP_ONCE_NC)
7530     may be referenced. */       {
7531         if (!is_startline(scode, bracket_map, cd, atomcount + 1)) return FALSE;
7532         }
7533    
7534       /* .* means "start at start or after \n" if it isn't in atomic brackets or
7535       brackets that may be referenced, as long as the pattern does not contain
7536       *PRUNE or *SKIP, because these break the feature. Consider, for example,
7537       /.*?a(*PRUNE)b/ with the subject "aab", which matches "ab", i.e. not at the
7538       start of a line. */
7539    
7540     else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR || op == OP_TYPEPOSSTAR)     else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR || op == OP_TYPEPOSSTAR)
7541       {       {
7542       if (scode[1] != OP_ANY || (bracket_map & backref_map) != 0) return FALSE;       if (scode[1] != OP_ANY || (bracket_map & cd->backref_map) != 0 ||
7543             atomcount > 0 || cd->had_pruneorskip)
7544           return FALSE;
7545       }       }
7546    
7547     /* Check for explicit circumflex */     /* Check for explicit circumflex; anything else gives a FALSE result. Note
7548       in particular that this includes atomic brackets OP_ONCE and OP_ONCE_NC
7549       because the number of characters matched by .* cannot be adjusted inside
7550       them. */
7551    
7552     else if (op != OP_CIRC && op != OP_CIRCM) return FALSE;     else if (op != OP_CIRC && op != OP_CIRCM) return FALSE;
7553    
# Line 7817  cd->start_pattern = (const pcre_uchar *) Line 7928  cd->start_pattern = (const pcre_uchar *)
7928  cd->end_pattern = (const pcre_uchar *)(pattern + STRLEN_UC((const pcre_uchar *)pattern));  cd->end_pattern = (const pcre_uchar *)(pattern + STRLEN_UC((const pcre_uchar *)pattern));
7929  cd->req_varyopt = 0;  cd->req_varyopt = 0;
7930  cd->assert_depth = 0;  cd->assert_depth = 0;
7931    cd->max_lookbehind = 0;
7932  cd->external_options = options;  cd->external_options = options;
7933  cd->external_flags = 0;  cd->external_flags = 0;
7934  cd->open_caps = NULL;  cd->open_caps = NULL;
# Line 7867  re->magic_number = MAGIC_NUMBER; Line 7979  re->magic_number = MAGIC_NUMBER;
7979  re->size = (int)size;  re->size = (int)size;
7980  re->options = cd->external_options;  re->options = cd->external_options;
7981  re->flags = cd->external_flags;  re->flags = cd->external_flags;
 re->dummy1 = 0;  
7982  re->first_char = 0;  re->first_char = 0;
7983  re->req_char = 0;  re->req_char = 0;
7984  re->name_table_offset = sizeof(REAL_PCRE) / sizeof(pcre_uchar);  re->name_table_offset = sizeof(REAL_PCRE) / sizeof(pcre_uchar);
# Line 7887  field; this time it's used for rememberi Line 7998  field; this time it's used for rememberi
7998  cd->final_bracount = cd->bracount;  /* Save for checking forward references */  cd->final_bracount = cd->bracount;  /* Save for checking forward references */
7999  cd->assert_depth = 0;  cd->assert_depth = 0;
8000  cd->bracount = 0;  cd->bracount = 0;
8001    cd->max_lookbehind = 0;
8002  cd->names_found = 0;  cd->names_found = 0;
8003  cd->name_table = (pcre_uchar *)re + re->name_table_offset;  cd->name_table = (pcre_uchar *)re + re->name_table_offset;
8004  codestart = cd->name_table + re->name_entry_size * re->name_count;  codestart = cd->name_table + re->name_entry_size * re->name_count;
# Line 7894  cd->start_code = codestart; Line 8006  cd->start_code = codestart;
8006  cd->hwm = (pcre_uchar *)(cd->start_workspace);  cd->hwm = (pcre_uchar *)(cd->start_workspace);
8007  cd->req_varyopt = 0;  cd->req_varyopt = 0;
8008  cd->had_accept = FALSE;  cd->had_accept = FALSE;
8009    cd->had_pruneorskip = FALSE;
8010  cd->check_lookbehind = FALSE;  cd->check_lookbehind = FALSE;
8011  cd->open_caps = NULL;  cd->open_caps = NULL;
8012    
# Line 7908  code = (pcre_uchar *)codestart; Line 8021  code = (pcre_uchar *)codestart;
8021    &firstchar, &reqchar, NULL, cd, NULL);    &firstchar, &reqchar, NULL, cd, NULL);
8022  re->top_bracket = cd->bracount;  re->top_bracket = cd->bracount;
8023  re->top_backref = cd->top_backref;  re->top_backref = cd->top_backref;
8024    re->max_lookbehind = cd->max_lookbehind;
8025  re->flags = cd->external_flags | PCRE_MODE;  re->flags = cd->external_flags | PCRE_MODE;
8026    
8027  if (cd->had_accept) reqchar = REQ_NONE;   /* Must disable after (*ACCEPT) */  if (cd->had_accept) reqchar = REQ_NONE;   /* Must disable after (*ACCEPT) */
# Line 7995  if (cd->check_lookbehind) Line 8109  if (cd->check_lookbehind)
8109                      (fixed_length == -4)? ERR70 : ERR25;                      (fixed_length == -4)? ERR70 : ERR25;
8110          break;          break;
8111          }          }
8112          if (fixed_length > cd->max_lookbehind) cd->max_lookbehind = fixed_length;
8113        PUT(cc, 1, fixed_length);        PUT(cc, 1, fixed_length);
8114        }        }
8115      cc += 1 + LINK_SIZE;      cc += 1 + LINK_SIZE;
# Line 8015  if (errorcode != 0) Line 8130  if (errorcode != 0)
8130    }    }
8131    
8132  /* If the anchored option was not passed, set the flag if we can determine that  /* If the anchored option was not passed, set the flag if we can determine that
8133  the pattern is anchored by virtue of ^ characters or \A or anything else (such  the pattern is anchored by virtue of ^ characters or \A or anything else, such
8134  as starting with .* when DOTALL is set).  as starting with non-atomic .* when DOTALL is set and there are no occurrences
8135    of *PRUNE or *SKIP.
8136    
8137  Otherwise, if we know what the first byte has to be, save it, because that  Otherwise, if we know what the first byte has to be, save it, because that
8138  speeds up unanchored matches no end. If not, see if we can set the  speeds up unanchored matches no end. If not, see if we can set the
8139  PCRE_STARTLINE flag. This is helpful for multiline matches when all branches  PCRE_STARTLINE flag. This is helpful for multiline matches when all branches
8140  start with ^. and also when all branches start with .* for non-DOTALL matches.  start with ^. and also when all branches start with non-atomic .* for
8141  */  non-DOTALL matches when *PRUNE and SKIP are not present. */
8142    
8143  if ((re->options & PCRE_ANCHORED) == 0)  if ((re->options & PCRE_ANCHORED) == 0)
8144    {    {
8145    if (is_anchored(codestart, 0, cd->backref_map))    if (is_anchored(codestart, 0, cd, 0)) re->options |= PCRE_ANCHORED;
     re->options |= PCRE_ANCHORED;  
8146    else    else
8147      {      {
8148      if (firstchar < 0)      if (firstchar < 0)
# Line 8064  if ((re->options & PCRE_ANCHORED) == 0) Line 8179  if ((re->options & PCRE_ANCHORED) == 0)
8179    
8180        re->flags |= PCRE_FIRSTSET;        re->flags |= PCRE_FIRSTSET;
8181        }        }
8182      else if (is_startline(codestart, 0, cd->backref_map))  
8183        re->flags |= PCRE_STARTLINE;      else if (is_startline(codestart, 0, cd, 0)) re->flags |= PCRE_STARTLINE;
8184      }      }
8185    }    }
8186    
# Line 8134  if ((re->flags & PCRE_REQCHSET) != 0) Line 8249  if ((re->flags & PCRE_REQCHSET) != 0)
8249    }    }
8250    
8251  #ifdef COMPILE_PCRE8  #ifdef COMPILE_PCRE8
8252  pcre_printint(re, stdout, TRUE);  pcre_printint((pcre *)re, stdout, TRUE);
8253  #else  #else
8254  pcre16_printint(re, stdout, TRUE);  pcre16_printint((pcre *)re, stdout, TRUE);
8255  #endif  #endif
8256    
8257  /* This check is done here in the debugging case so that the code that  /* This check is done here in the debugging case so that the code that

Legend:
Removed from v.903  
changed lines
  Added in v.1033

  ViewVC Help
Powered by ViewVC 1.1.5