/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 836 by ph10, Wed Dec 28 17:16:11 2011 UTC revision 964 by ph10, Fri May 4 13:03:39 2012 UTC
# Line 55  supporting internal functions that are n Line 55  supporting internal functions that are n
55    
56  /* When PCRE_DEBUG is defined, we need the pcre(16)_printint() function, which  /* When PCRE_DEBUG is defined, we need the pcre(16)_printint() function, which
57  is also used by pcretest. PCRE_DEBUG is not defined when building a production  is also used by pcretest. PCRE_DEBUG is not defined when building a production
58  library. We do not need to select pcre16_printint.c specially, because the  library. We do not need to select pcre16_printint.c specially, because the
59  COMPILE_PCREx macro will already be appropriately set. */  COMPILE_PCREx macro will already be appropriately set. */
60    
61  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
# Line 438  static const char error_texts[] = Line 438  static const char error_texts[] =
438    /* 30 */    /* 30 */
439    "unknown POSIX class name\0"    "unknown POSIX class name\0"
440    "POSIX collating elements are not supported\0"    "POSIX collating elements are not supported\0"
441    "this version of PCRE is not compiled with PCRE_UTF8 support\0"    "this version of PCRE is compiled without UTF support\0"
442    "spare error\0"  /** DEAD **/    "spare error\0"  /** DEAD **/
443    "character value in \\x{...} sequence is too large\0"    "character value in \\x{...} sequence is too large\0"
444    /* 35 */    /* 35 */
# Line 461  static const char error_texts[] = Line 461  static const char error_texts[] =
461    "too many named subpatterns (maximum " XSTRING(MAX_NAME_COUNT) ")\0"    "too many named subpatterns (maximum " XSTRING(MAX_NAME_COUNT) ")\0"
462    /* 50 */    /* 50 */
463    "repeated subpattern is too long\0"    /** DEAD **/    "repeated subpattern is too long\0"    /** DEAD **/
464    "octal value is greater than \\377 (not in UTF-8 mode)\0"    "octal value is greater than \\377 in 8-bit non-UTF-8 mode\0"
465    "internal error: overran compiling workspace\0"    "internal error: overran compiling workspace\0"
466    "internal error: previously-checked referenced subpattern not found\0"    "internal error: previously-checked referenced subpattern not found\0"
467    "DEFINE group contains more than one branch\0"    "DEFINE group contains more than one branch\0"
# Line 480  static const char error_texts[] = Line 480  static const char error_texts[] =
480    /* 65 */    /* 65 */
481    "different names for subpatterns of the same number are not allowed\0"    "different names for subpatterns of the same number are not allowed\0"
482    "(*MARK) must have an argument\0"    "(*MARK) must have an argument\0"
483    "this version of PCRE is not compiled with PCRE_UCP support\0"    "this version of PCRE is not compiled with Unicode property support\0"
484    "\\c must be followed by an ASCII character\0"    "\\c must be followed by an ASCII character\0"
485    "\\k is not followed by a braced, angle-bracketed, or quoted name\0"    "\\k is not followed by a braced, angle-bracketed, or quoted name\0"
486    /* 70 */    /* 70 */
487    "internal error: unknown opcode in find_fixedlength()\0"    "internal error: unknown opcode in find_fixedlength()\0"
488    "\\N is not supported in a class\0"    "\\N is not supported in a class\0"
489    "too many forward references\0"    "too many forward references\0"
490    "disallowed UTF-8/16 code point (>= 0xd800 && <= 0xdfff)\0"    "disallowed Unicode code point (>= 0xd800 && <= 0xdfff)\0"
491      "invalid UTF-16 string\0"
492      /* 75 */
493      "name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0"
494    ;    ;
495    
496  /* Table to identify digits and hex digits. This is used when compiling  /* Table to identify digits and hex digits. This is used when compiling
# Line 990  else Line 993  else
993      /* \0 always starts an octal number, but we may drop through to here with a      /* \0 always starts an octal number, but we may drop through to here with a
994      larger first octal digit. The original code used just to take the least      larger first octal digit. The original code used just to take the least
995      significant 8 bits of octal numbers (I think this is what early Perls used      significant 8 bits of octal numbers (I think this is what early Perls used
996      to do). Nowadays we allow for larger numbers in UTF-8 mode, but no more      to do). Nowadays we allow for larger numbers in UTF-8 mode and 16-bit mode,
997      than 3 octal digits. */      but no more than 3 octal digits. */
998    
999      case CHAR_0:      case CHAR_0:
1000      c -= CHAR_0;      c -= CHAR_0;
1001      while(i++ < 2 && ptr[1] >= CHAR_0 && ptr[1] <= CHAR_7)      while(i++ < 2 && ptr[1] >= CHAR_0 && ptr[1] <= CHAR_7)
1002          c = c * 8 + *(++ptr) - CHAR_0;          c = c * 8 + *(++ptr) - CHAR_0;
1003    #ifdef COMPILE_PCRE8
1004      if (!utf && c > 0xff) *errorcodeptr = ERR51;      if (!utf && c > 0xff) *errorcodeptr = ERR51;
1005    #endif
1006      break;      break;
1007    
1008      /* \x is complicated. \x{ddd} is a character number which can be greater      /* \x is complicated. \x{ddd} is a character number which can be greater
# Line 1708  for (;;) Line 1713  for (;;)
1713    int d;    int d;
1714    pcre_uchar *ce, *cs;    pcre_uchar *ce, *cs;
1715    register int op = *cc;    register int op = *cc;
1716    
1717    switch (op)    switch (op)
1718      {      {
1719      /* We only need to continue for OP_CBRA (normal capturing bracket) and      /* We only need to continue for OP_CBRA (normal capturing bracket) and
# Line 1769  for (;;) Line 1774  for (;;)
1774      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
1775      do cc += GET(cc, 1); while (*cc == OP_ALT);      do cc += GET(cc, 1); while (*cc == OP_ALT);
1776      cc += PRIV(OP_lengths)[*cc];      cc += PRIV(OP_lengths)[*cc];
1777      break;      break;
1778    
1779      /* Skip over things that don't match chars */      /* Skip over things that don't match chars */
1780    
# Line 2222  for (;;) Line 2227  for (;;)
2227        {        {
2228        case OP_CHAR:        case OP_CHAR:
2229        case OP_CHARI:        case OP_CHARI:
2230          case OP_NOT:
2231          case OP_NOTI:
2232        case OP_EXACT:        case OP_EXACT:
2233        case OP_EXACTI:        case OP_EXACTI:
2234          case OP_NOTEXACT:
2235          case OP_NOTEXACTI:
2236        case OP_UPTO:        case OP_UPTO:
2237        case OP_UPTOI:        case OP_UPTOI:
2238          case OP_NOTUPTO:
2239          case OP_NOTUPTOI:
2240        case OP_MINUPTO:        case OP_MINUPTO:
2241        case OP_MINUPTOI:        case OP_MINUPTOI:
2242          case OP_NOTMINUPTO:
2243          case OP_NOTMINUPTOI:
2244        case OP_POSUPTO:        case OP_POSUPTO:
2245        case OP_POSUPTOI:        case OP_POSUPTOI:
2246          case OP_NOTPOSUPTO:
2247          case OP_NOTPOSUPTOI:
2248        case OP_STAR:        case OP_STAR:
2249        case OP_STARI:        case OP_STARI:
2250          case OP_NOTSTAR:
2251          case OP_NOTSTARI:
2252        case OP_MINSTAR:        case OP_MINSTAR:
2253        case OP_MINSTARI:        case OP_MINSTARI:
2254          case OP_NOTMINSTAR:
2255          case OP_NOTMINSTARI:
2256        case OP_POSSTAR:        case OP_POSSTAR:
2257        case OP_POSSTARI:        case OP_POSSTARI:
2258          case OP_NOTPOSSTAR:
2259          case OP_NOTPOSSTARI:
2260        case OP_PLUS:        case OP_PLUS:
2261        case OP_PLUSI:        case OP_PLUSI:
2262          case OP_NOTPLUS:
2263          case OP_NOTPLUSI:
2264        case OP_MINPLUS:        case OP_MINPLUS:
2265        case OP_MINPLUSI:        case OP_MINPLUSI:
2266          case OP_NOTMINPLUS:
2267          case OP_NOTMINPLUSI:
2268        case OP_POSPLUS:        case OP_POSPLUS:
2269        case OP_POSPLUSI:        case OP_POSPLUSI:
2270          case OP_NOTPOSPLUS:
2271          case OP_NOTPOSPLUSI:
2272        case OP_QUERY:        case OP_QUERY:
2273        case OP_QUERYI:        case OP_QUERYI:
2274          case OP_NOTQUERY:
2275          case OP_NOTQUERYI:
2276        case OP_MINQUERY:        case OP_MINQUERY:
2277        case OP_MINQUERYI:        case OP_MINQUERYI:
2278          case OP_NOTMINQUERY:
2279          case OP_NOTMINQUERYI:
2280        case OP_POSQUERY:        case OP_POSQUERY:
2281        case OP_POSQUERYI:        case OP_POSQUERYI:
2282          case OP_NOTPOSQUERY:
2283          case OP_NOTPOSQUERYI:
2284        if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);        if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
2285        break;        break;
2286        }        }
# Line 3064  if (next >= 0) switch(op_code) Line 3097  if (next >= 0) switch(op_code)
3097      }      }
3098    else    else
3099  #endif  /* SUPPORT_UTF */  #endif  /* SUPPORT_UTF */
3100    return (c != TABLE_GET(next, cd->fcc, next));  /* Non-UTF-8 mode */    return (c != TABLE_GET((unsigned int)next, cd->fcc, next));  /* Non-UTF-8 mode */
   
   /* For OP_NOT and OP_NOTI, the data is always a single-byte character. These  
   opcodes are not used for multi-byte characters, because they are coded using  
   an XCLASS instead. */  
3101    
3102    case OP_NOT:    case OP_NOT:
3103    return (c = *previous) == next;  #ifdef SUPPORT_UTF
3104      GETCHARTEST(c, previous);
3105    #else
3106      c = *previous;
3107    #endif
3108      return c == next;
3109    
3110    case OP_NOTI:    case OP_NOTI:
3111    if ((c = *previous) == next) return TRUE;  #ifdef SUPPORT_UTF
3112      GETCHARTEST(c, previous);
3113    #else
3114      c = *previous;
3115    #endif
3116      if (c == next) return TRUE;
3117  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
3118    if (utf)    if (utf)
3119      {      {
3120      unsigned int othercase;      unsigned int othercase;
3121      if (next < 128) othercase = cd->fcc[next]; else      if (next < 128) othercase = cd->fcc[next]; else
3122  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3123      othercase = UCD_OTHERCASE(next);      othercase = UCD_OTHERCASE((unsigned int)next);
3124  #else  #else
3125      othercase = NOTACHAR;      othercase = NOTACHAR;
3126  #endif  #endif
# Line 3089  if (next >= 0) switch(op_code) Line 3128  if (next >= 0) switch(op_code)
3128      }      }
3129    else    else
3130  #endif  /* SUPPORT_UTF */  #endif  /* SUPPORT_UTF */
3131    return (c == TABLE_GET(next, cd->fcc, next));  /* Non-UTF-8 mode */    return (c == TABLE_GET((unsigned int)next, cd->fcc, next));  /* Non-UTF-8 mode */
3132    
3133    /* Note that OP_DIGIT etc. are generated only when PCRE_UCP is *not* set.    /* Note that OP_DIGIT etc. are generated only when PCRE_UCP is *not* set.
3134    When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */    When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */
3135    
3136    case OP_DIGIT:    case OP_DIGIT:
3137    return next > 127 || (cd->ctypes[next] & ctype_digit) == 0;    return next > 255 || (cd->ctypes[next] & ctype_digit) == 0;
3138    
3139    case OP_NOT_DIGIT:    case OP_NOT_DIGIT:
3140    return next <= 127 && (cd->ctypes[next] & ctype_digit) != 0;    return next <= 255 && (cd->ctypes[next] & ctype_digit) != 0;
3141    
3142    case OP_WHITESPACE:    case OP_WHITESPACE:
3143    return next > 127 || (cd->ctypes[next] & ctype_space) == 0;    return next > 255 || (cd->ctypes[next] & ctype_space) == 0;
3144    
3145    case OP_NOT_WHITESPACE:    case OP_NOT_WHITESPACE:
3146    return next <= 127 && (cd->ctypes[next] & ctype_space) != 0;    return next <= 255 && (cd->ctypes[next] & ctype_space) != 0;
3147    
3148    case OP_WORDCHAR:    case OP_WORDCHAR:
3149    return next > 127 || (cd->ctypes[next] & ctype_word) == 0;    return next > 255 || (cd->ctypes[next] & ctype_word) == 0;
3150    
3151    case OP_NOT_WORDCHAR:    case OP_NOT_WORDCHAR:
3152    return next <= 127 && (cd->ctypes[next] & ctype_word) != 0;    return next <= 255 && (cd->ctypes[next] & ctype_word) != 0;
3153    
3154    case OP_HSPACE:    case OP_HSPACE:
3155    case OP_NOT_HSPACE:    case OP_NOT_HSPACE:
# Line 3188  switch(op_code) Line 3227  switch(op_code)
3227    switch(-next)    switch(-next)
3228      {      {
3229      case ESC_d:      case ESC_d:
3230      return c > 127 || (cd->ctypes[c] & ctype_digit) == 0;      return c > 255 || (cd->ctypes[c] & ctype_digit) == 0;
3231    
3232      case ESC_D:      case ESC_D:
3233      return c <= 127 && (cd->ctypes[c] & ctype_digit) != 0;      return c <= 255 && (cd->ctypes[c] & ctype_digit) != 0;
3234    
3235      case ESC_s:      case ESC_s:
3236      return c > 127 || (cd->ctypes[c] & ctype_space) == 0;      return c > 255 || (cd->ctypes[c] & ctype_space) == 0;
3237    
3238      case ESC_S:      case ESC_S:
3239      return c <= 127 && (cd->ctypes[c] & ctype_space) != 0;      return c <= 255 && (cd->ctypes[c] & ctype_space) != 0;
3240    
3241      case ESC_w:      case ESC_w:
3242      return c > 127 || (cd->ctypes[c] & ctype_word) == 0;      return c > 255 || (cd->ctypes[c] & ctype_word) == 0;
3243    
3244      case ESC_W:      case ESC_W:
3245      return c <= 127 && (cd->ctypes[c] & ctype_word) != 0;      return c <= 255 && (cd->ctypes[c] & ctype_word) != 0;
3246    
3247      case ESC_h:      case ESC_h:
3248      case ESC_H:      case ESC_H:
# Line 3312  switch(op_code) Line 3351  switch(op_code)
3351    return next == -ESC_d;    return next == -ESC_d;
3352    
3353    case OP_WHITESPACE:    case OP_WHITESPACE:
3354    return next == -ESC_S || next == -ESC_d || next == -ESC_w || next == -ESC_R;    return next == -ESC_S || next == -ESC_d || next == -ESC_w;
3355    
3356    case OP_NOT_WHITESPACE:    case OP_NOT_WHITESPACE:
3357    return next == -ESC_s || next == -ESC_h || next == -ESC_v;    return next == -ESC_s || next == -ESC_h || next == -ESC_v || next == -ESC_R;
3358    
3359    case OP_HSPACE:    case OP_HSPACE:
3360    return next == -ESC_S || next == -ESC_H || next == -ESC_d ||    return next == -ESC_S || next == -ESC_H || next == -ESC_d ||
# Line 3526  for (;; ptr++) Line 3565  for (;; ptr++)
3565      *lengthptr += (int)(code - last_code);      *lengthptr += (int)(code - last_code);
3566      DPRINTF(("length=%d added %d c=%c (0x%x)\n", *lengthptr,      DPRINTF(("length=%d added %d c=%c (0x%x)\n", *lengthptr,
3567        (int)(code - last_code), c, c));        (int)(code - last_code), c, c));
3568    
3569      /* If "previous" is set and it is not at the start of the work space, move      /* If "previous" is set and it is not at the start of the work space, move
3570      it back to there, in order to avoid filling up the work space. Otherwise,      it back to there, in order to avoid filling up the work space. Otherwise,
3571      if "previous" is NULL, reset the current code pointer to the start. */      if "previous" is NULL, reset the current code pointer to the start. */
# Line 4479  for (;; ptr++) Line 4518  for (;; ptr++)
4518        LONE_SINGLE_CHARACTER:        LONE_SINGLE_CHARACTER:
4519    
4520        /* Only the value of 1 matters for class_single_char. */        /* Only the value of 1 matters for class_single_char. */
4521    
4522        if (class_single_char < 2) class_single_char++;        if (class_single_char < 2) class_single_char++;
4523    
4524        /* If class_charcount is 1, we saw precisely one character. As long as        /* If class_charcount is 1, we saw precisely one character. As long as
4525        there were no negated characters >= 128 and there was no use of \p or \P,        there was no use of \p or \P, in other words, no use of any XCLASS
4526        in other words, no use of any XCLASS features, we can optimize.        features, we can optimize.
   
       In UTF-8 mode, we can optimize the negative case only if there were no  
       characters >= 128 because OP_NOT and the related opcodes like OP_NOTSTAR  
       operate on single-bytes characters only. This is an historical hangover.  
       Maybe one day we can tidy these opcodes to handle multi-byte characters.  
4527    
4528        The optimization throws away the bit map. We turn the item into a        The optimization throws away the bit map. We turn the item into a
4529        1-character OP_CHAR[I] if it's positive, or OP_NOT[I] if it's negative.        1-character OP_CHAR[I] if it's positive, or OP_NOT[I] if it's negative.
4530        Note that OP_NOT[I] does not support multibyte characters. In the positive        In the positive case, it can cause firstchar to be set. Otherwise, there
4531        case, it can cause firstchar to be set. Otherwise, there can be no first        can be no first char if this item is first, whatever repeat count may
4532        char if this item is first, whatever repeat count may follow. In the case        follow. In the case of reqchar, save the previous value for reinstating. */
       of reqchar, save the previous value for reinstating. */  
4533    
 #ifdef SUPPORT_UTF  
       if (class_single_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET  
         && (!utf || !negate_class || c < (MAX_VALUE_FOR_SINGLE_CHAR + 1)))  
 #else  
4534        if (class_single_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)        if (class_single_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
 #endif  
4535          {          {
4536          ptr++;          ptr++;
4537          zeroreqchar = reqchar;          zeroreqchar = reqchar;
4538    
         /* The OP_NOT[I] opcodes work on single characters only. */  
   
4539          if (negate_class)          if (negate_class)
4540            {            {
4541            if (firstchar == REQ_UNSET) firstchar = REQ_NONE;            if (firstchar == REQ_UNSET) firstchar = REQ_NONE;
4542            zerofirstchar = firstchar;            zerofirstchar = firstchar;
4543            *code++ = ((options & PCRE_CASELESS) != 0)? OP_NOTI: OP_NOT;            *code++ = ((options & PCRE_CASELESS) != 0)? OP_NOTI: OP_NOT;
4544            *code++ = c;  #ifdef SUPPORT_UTF
4545              if (utf && c > MAX_VALUE_FOR_SINGLE_CHAR)
4546                code += PRIV(ord2utf)(c, code);
4547              else
4548    #endif
4549                *code++ = c;
4550            goto NOT_CHAR;            goto NOT_CHAR;
4551            }            }
4552    
# Line 4550  for (;; ptr++) Line 4582  for (;; ptr++)
4582  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
4583  #ifndef COMPILE_PCRE8  #ifndef COMPILE_PCRE8
4584          /* In non 8 bit mode, we can get here even if we are not in UTF mode. */          /* In non 8 bit mode, we can get here even if we are not in UTF mode. */
4585          if (!utf)          if (!utf)
4586            *class_uchardata++ = c;            *class_uchardata++ = c;
4587          else          else
4588  #endif  #endif
# Line 4568  for (;; ptr++) Line 4600  for (;; ptr++)
4600  #endif  #endif
4601            {            {
4602            unsigned int othercase;            unsigned int othercase;
4603            if ((othercase = UCD_OTHERCASE(c)) != c)            if ((int)(othercase = UCD_OTHERCASE(c)) != c)
4604              {              {
4605              *class_uchardata++ = XCL_SINGLE;              *class_uchardata++ = XCL_SINGLE;
4606              class_uchardata += PRIV(ord2utf)(othercase, class_uchardata);              class_uchardata += PRIV(ord2utf)(othercase, class_uchardata);
# Line 4772  for (;; ptr++) Line 4804  for (;; ptr++)
4804    
4805      /* Now handle repetition for the different types of item. */      /* Now handle repetition for the different types of item. */
4806    
4807      /* If previous was a character match, abolish the item and generate a      /* If previous was a character or negated character match, abolish the item
4808      repeat item instead. If a char item has a minumum of more than one, ensure      and generate a repeat item instead. If a char item has a minimum of more
4809      that it is set in reqchar - it might not be if a sequence such as x{3} is      than one, ensure that it is set in reqchar - it might not be if a sequence
4810      the first thing in a branch because the x will have gone into firstchar      such as x{3} is the first thing in a branch because the x will have gone
4811      instead.  */      into firstchar instead.  */
4812    
4813      if (*previous == OP_CHAR || *previous == OP_CHARI)      if (*previous == OP_CHAR || *previous == OP_CHARI
4814        {          || *previous == OP_NOT || *previous == OP_NOTI)
4815        op_type = (*previous == OP_CHAR)? 0 : OP_STARI - OP_STAR;        {
4816          switch (*previous)
4817            {
4818            default: /* Make compiler happy. */
4819            case OP_CHAR:  op_type = OP_STAR - OP_STAR; break;
4820            case OP_CHARI: op_type = OP_STARI - OP_STAR; break;
4821            case OP_NOT:   op_type = OP_NOTSTAR - OP_STAR; break;
4822            case OP_NOTI:  op_type = OP_NOTSTARI - OP_STAR; break;
4823            }
4824    
4825        /* Deal with UTF characters that take up more than one character. It's        /* Deal with UTF characters that take up more than one character. It's
4826        easier to write this out separately than try to macrify it. Use c to        easier to write this out separately than try to macrify it. Use c to
# Line 4803  for (;; ptr++) Line 4843  for (;; ptr++)
4843        with UTF disabled, or for a single character UTF character. */        with UTF disabled, or for a single character UTF character. */
4844          {          {
4845          c = code[-1];          c = code[-1];
4846          if (repeat_min > 1) reqchar = c | req_caseopt | cd->req_varyopt;          if (*previous <= OP_CHARI && repeat_min > 1)
4847              reqchar = c | req_caseopt | cd->req_varyopt;
4848          }          }
4849    
4850        /* If the repetition is unlimited, it pays to see if the next thing on        /* If the repetition is unlimited, it pays to see if the next thing on
# Line 4822  for (;; ptr++) Line 4863  for (;; ptr++)
4863        goto OUTPUT_SINGLE_REPEAT;   /* Code shared with single character types */        goto OUTPUT_SINGLE_REPEAT;   /* Code shared with single character types */
4864        }        }
4865    
     /* If previous was a single negated character ([^a] or similar), we use  
     one of the special opcodes, replacing it. The code is shared with single-  
     character repeats by setting opt_type to add a suitable offset into  
     repeat_type. We can also test for auto-possessification. OP_NOT and OP_NOTI  
     are currently used only for single-byte chars. */  
   
     else if (*previous == OP_NOT || *previous == OP_NOTI)  
       {  
       op_type = ((*previous == OP_NOT)? OP_NOTSTAR : OP_NOTSTARI) - OP_STAR;  
       c = previous[1];  
       if (!possessive_quantifier &&  
           repeat_max < 0 &&  
           check_auto_possessive(previous, utf, ptr + 1, options, cd))  
         {  
         repeat_type = 0;    /* Force greedy */  
         possessive_quantifier = TRUE;  
         }  
       goto OUTPUT_SINGLE_REPEAT;  
       }  
   
4866      /* If previous was a character type match (\d or similar), abolish it and      /* If previous was a character type match (\d or similar), abolish it and
4867      create a suitable repeat item. The code is shared with single-character      create a suitable repeat item. The code is shared with single-character
4868      repeats by setting op_type to add a suitable offset into repeat_type. Note      repeats by setting op_type to add a suitable offset into repeat_type. Note
# Line 5572  for (;; ptr++) Line 5593  for (;; ptr++)
5593        ptr++;        ptr++;
5594        while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_letter) != 0) ptr++;        while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_letter) != 0) ptr++;
5595        namelen = (int)(ptr - name);        namelen = (int)(ptr - name);
5596    
5597        /* It appears that Perl allows any characters whatsoever, other than        /* It appears that Perl allows any characters whatsoever, other than
5598        a closing parenthesis, to appear in arguments, so we no longer insist on        a closing parenthesis, to appear in arguments, so we no longer insist on
5599        letters, digits, and underscores. */        letters, digits, and underscores. */
# Line 5582  for (;; ptr++) Line 5603  for (;; ptr++)
5603          arg = ++ptr;          arg = ++ptr;
5604          while (*ptr != 0 && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;          while (*ptr != 0 && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
5605          arglen = (int)(ptr - arg);          arglen = (int)(ptr - arg);
5606            if (arglen > (int)MAX_MARK)
5607              {
5608              *errorcodeptr = ERR75;
5609              goto FAILED;
5610              }
5611          }          }
5612    
5613        if (*ptr != CHAR_RIGHT_PARENTHESIS)        if (*ptr != CHAR_RIGHT_PARENTHESIS)
# Line 6833  for (;; ptr++) Line 6859  for (;; ptr++)
6859        /* For the rest (including \X when Unicode properties are supported), we        /* For the rest (including \X when Unicode properties are supported), we
6860        can obtain the OP value by negating the escape value in the default        can obtain the OP value by negating the escape value in the default
6861        situation when PCRE_UCP is not set. When it *is* set, we substitute        situation when PCRE_UCP is not set. When it *is* set, we substitute
6862        Unicode property tests. */        Unicode property tests. Note that \b and \B do a one-character
6863          lookbehind. */
6864    
6865        else        else
6866          {          {
6867            if ((-c == ESC_b || -c == ESC_B) && cd->max_lookbehind == 0)
6868              cd->max_lookbehind = 1;
6869  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
6870          if (-c >= ESC_DU && -c <= ESC_wu)          if (-c >= ESC_DU && -c <= ESC_wu)
6871            {            {
# Line 7144  for (;;) Line 7173  for (;;)
7173          *ptrptr = ptr;          *ptrptr = ptr;
7174          return FALSE;          return FALSE;
7175          }          }
7176        else { PUT(reverse_count, 0, fixed_length); }        else
7177            {
7178            if (fixed_length > cd->max_lookbehind)
7179              cd->max_lookbehind = fixed_length;
7180            PUT(reverse_count, 0, fixed_length);
7181            }
7182        }        }
7183      }      }
7184    
# Line 7560  PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTIO Line 7594  PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTIO
7594  pcre_compile(const char *pattern, int options, const char **errorptr,  pcre_compile(const char *pattern, int options, const char **errorptr,
7595    int *erroroffset, const unsigned char *tables)    int *erroroffset, const unsigned char *tables)
7596  #else  #else
7597  PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION  PCRE_EXP_DEFN pcre16 * PCRE_CALL_CONVENTION
7598  pcre16_compile(PCRE_SPTR16 pattern, int options, const char **errorptr,  pcre16_compile(PCRE_SPTR16 pattern, int options, const char **errorptr,
7599    int *erroroffset, const unsigned char *tables)    int *erroroffset, const unsigned char *tables)
7600  #endif  #endif
# Line 7578  PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTIO Line 7612  PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTIO
7612  pcre_compile2(const char *pattern, int options, int *errorcodeptr,  pcre_compile2(const char *pattern, int options, int *errorcodeptr,
7613    const char **errorptr, int *erroroffset, const unsigned char *tables)    const char **errorptr, int *erroroffset, const unsigned char *tables)
7614  #else  #else
7615  PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION  PCRE_EXP_DEFN pcre16 * PCRE_CALL_CONVENTION
7616  pcre16_compile2(PCRE_SPTR16 pattern, int options, int *errorcodeptr,  pcre16_compile2(PCRE_SPTR16 pattern, int options, int *errorcodeptr,
7617    const char **errorptr, int *erroroffset, const unsigned char *tables)    const char **errorptr, int *erroroffset, const unsigned char *tables)
7618  #endif  #endif
7619  {  {
7620  real_pcre *re;  REAL_PCRE *re;
7621  int length = 1;  /* For final END opcode */  int length = 1;  /* For final END opcode */
7622  pcre_int32 firstchar, reqchar;  pcre_int32 firstchar, reqchar;
7623  int newline;  int newline;
# Line 7706  not used here. */ Line 7740  not used here. */
7740  if (utf && (options & PCRE_NO_UTF8_CHECK) == 0 &&  if (utf && (options & PCRE_NO_UTF8_CHECK) == 0 &&
7741       (errorcode = PRIV(valid_utf)((PCRE_PUCHAR)pattern, -1, erroroffset)) != 0)       (errorcode = PRIV(valid_utf)((PCRE_PUCHAR)pattern, -1, erroroffset)) != 0)
7742    {    {
7743    #ifdef COMPILE_PCRE8
7744    errorcode = ERR44;    errorcode = ERR44;
7745    #else
7746      errorcode = ERR74;
7747    #endif
7748    goto PCRE_EARLY_ERROR_RETURN2;    goto PCRE_EARLY_ERROR_RETURN2;
7749    }    }
7750  #else  #else
# Line 7810  cd->start_pattern = (const pcre_uchar *) Line 7848  cd->start_pattern = (const pcre_uchar *)
7848  cd->end_pattern = (const pcre_uchar *)(pattern + STRLEN_UC((const pcre_uchar *)pattern));  cd->end_pattern = (const pcre_uchar *)(pattern + STRLEN_UC((const pcre_uchar *)pattern));
7849  cd->req_varyopt = 0;  cd->req_varyopt = 0;
7850  cd->assert_depth = 0;  cd->assert_depth = 0;
7851    cd->max_lookbehind = 0;
7852  cd->external_options = options;  cd->external_options = options;
7853  cd->external_flags = 0;  cd->external_flags = 0;
7854  cd->open_caps = NULL;  cd->open_caps = NULL;
# Line 7841  externally provided function. Integer ov Line 7880  externally provided function. Integer ov
7880  because nowadays we limit the maximum value of cd->names_found and  because nowadays we limit the maximum value of cd->names_found and
7881  cd->name_entry_size. */  cd->name_entry_size. */
7882    
7883  size = sizeof(real_pcre) + (length + cd->names_found * cd->name_entry_size) * sizeof(pcre_uchar);  size = sizeof(REAL_PCRE) + (length + cd->names_found * cd->name_entry_size) * sizeof(pcre_uchar);
7884  re = (real_pcre *)(PUBL(malloc))(size);  re = (REAL_PCRE *)(PUBL(malloc))(size);
7885    
7886  if (re == NULL)  if (re == NULL)
7887    {    {
# Line 7860  re->magic_number = MAGIC_NUMBER; Line 7899  re->magic_number = MAGIC_NUMBER;
7899  re->size = (int)size;  re->size = (int)size;
7900  re->options = cd->external_options;  re->options = cd->external_options;
7901  re->flags = cd->external_flags;  re->flags = cd->external_flags;
 re->dummy1 = 0;  
7902  re->first_char = 0;  re->first_char = 0;
7903  re->req_char = 0;  re->req_char = 0;
7904  re->name_table_offset = sizeof(real_pcre) / sizeof(pcre_uchar);  re->name_table_offset = sizeof(REAL_PCRE) / sizeof(pcre_uchar);
7905  re->name_entry_size = cd->name_entry_size;  re->name_entry_size = cd->name_entry_size;
7906  re->name_count = cd->names_found;  re->name_count = cd->names_found;
7907  re->ref_count = 0;  re->ref_count = 0;
# Line 7880  field; this time it's used for rememberi Line 7918  field; this time it's used for rememberi
7918  cd->final_bracount = cd->bracount;  /* Save for checking forward references */  cd->final_bracount = cd->bracount;  /* Save for checking forward references */
7919  cd->assert_depth = 0;  cd->assert_depth = 0;
7920  cd->bracount = 0;  cd->bracount = 0;
7921    cd->max_lookbehind = 0;
7922  cd->names_found = 0;  cd->names_found = 0;
7923  cd->name_table = (pcre_uchar *)re + re->name_table_offset;  cd->name_table = (pcre_uchar *)re + re->name_table_offset;
7924  codestart = cd->name_table + re->name_entry_size * re->name_count;  codestart = cd->name_table + re->name_entry_size * re->name_count;
# Line 7901  code = (pcre_uchar *)codestart; Line 7940  code = (pcre_uchar *)codestart;
7940    &firstchar, &reqchar, NULL, cd, NULL);    &firstchar, &reqchar, NULL, cd, NULL);
7941  re->top_bracket = cd->bracount;  re->top_bracket = cd->bracount;
7942  re->top_backref = cd->top_backref;  re->top_backref = cd->top_backref;
7943    re->max_lookbehind = cd->max_lookbehind;
7944  re->flags = cd->external_flags | PCRE_MODE;  re->flags = cd->external_flags | PCRE_MODE;
7945    
7946  if (cd->had_accept) reqchar = REQ_NONE;   /* Must disable after (*ACCEPT) */  if (cd->had_accept) reqchar = REQ_NONE;   /* Must disable after (*ACCEPT) */
# Line 7988  if (cd->check_lookbehind) Line 8028  if (cd->check_lookbehind)
8028                      (fixed_length == -4)? ERR70 : ERR25;                      (fixed_length == -4)? ERR70 : ERR25;
8029          break;          break;
8030          }          }
8031          if (fixed_length > cd->max_lookbehind) cd->max_lookbehind = fixed_length;
8032        PUT(cc, 1, fixed_length);        PUT(cc, 1, fixed_length);
8033        }        }
8034      cc += 1 + LINK_SIZE;      cc += 1 + LINK_SIZE;
# Line 8127  if ((re->flags & PCRE_REQCHSET) != 0) Line 8168  if ((re->flags & PCRE_REQCHSET) != 0)
8168    }    }
8169    
8170  #ifdef COMPILE_PCRE8  #ifdef COMPILE_PCRE8
8171  pcre_printint(re, stdout, TRUE);  pcre_printint((pcre *)re, stdout, TRUE);
8172  #else  #else
8173  pcre16_printint(re, stdout, TRUE);  pcre16_printint((pcre *)re, stdout, TRUE);
8174  #endif  #endif
8175    
8176  /* This check is done here in the debugging case so that the code that  /* This check is done here in the debugging case so that the code that
# Line 8145  if (code - codestart > length) Line 8186  if (code - codestart > length)
8186    }    }
8187  #endif   /* PCRE_DEBUG */  #endif   /* PCRE_DEBUG */
8188    
8189    #ifdef COMPILE_PCRE8
8190  return (pcre *)re;  return (pcre *)re;
8191    #else
8192    return (pcre16 *)re;
8193    #endif
8194  }  }
8195    
8196  /* End of pcre_compile.c */  /* End of pcre_compile.c */

Legend:
Removed from v.836  
changed lines
  Added in v.964

  ViewVC Help
Powered by ViewVC 1.1.5