/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 701 by ph10, Tue Sep 20 11:30:56 2011 UTC revision 744 by zherczeg, Sun Nov 13 16:31:38 2011 UTC
# Line 676  else Line 676  else
676    
677      case CHAR_l:      case CHAR_l:
678      case CHAR_L:      case CHAR_L:
679        *errorcodeptr = ERR37;
680        break;
681    
682      case CHAR_u:      case CHAR_u:
683        if ((options & PCRE_JAVASCRIPT_COMPAT) != 0)
684          {
685          /* In JavaScript, \u must be followed by four hexadecimal numbers.
686          Otherwise it is a lowercase u letter. */
687          if ((digitab[ptr[1]] & ctype_xdigit) != 0 && (digitab[ptr[2]] & ctype_xdigit) != 0
688               && (digitab[ptr[3]] & ctype_xdigit) != 0 && (digitab[ptr[4]] & ctype_xdigit) != 0)
689            {
690            int i;
691            c = 0;
692            for (i = 0; i < 4; ++i)
693              {
694              register int cc = *(++ptr);
695    #ifndef EBCDIC  /* ASCII/UTF-8 coding */
696              if (cc >= CHAR_a) cc -= 32;               /* Convert to upper case */
697              c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
698    #else           /* EBCDIC coding */
699              if (cc >= CHAR_a && cc <= CHAR_z) cc += 64;  /* Convert to upper case */
700              c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
701    #endif
702              }
703            }
704          }
705        else
706          *errorcodeptr = ERR37;
707        break;
708    
709      case CHAR_U:      case CHAR_U:
710      *errorcodeptr = ERR37;      /* In JavaScript, \U is an uppercase U letter. */
711        if ((options & PCRE_JAVASCRIPT_COMPAT) == 0) *errorcodeptr = ERR37;
712      break;      break;
713    
714      /* In a character class, \g is just a literal "g". Outside a character      /* In a character class, \g is just a literal "g". Outside a character
# Line 828  else Line 858  else
858      treated as a data character. */      treated as a data character. */
859    
860      case CHAR_x:      case CHAR_x:
861        if ((options & PCRE_JAVASCRIPT_COMPAT) != 0)
862          {
863          /* In JavaScript, \x must be followed by two hexadecimal numbers.
864          Otherwise it is a lowercase x letter. */
865          if ((digitab[ptr[1]] & ctype_xdigit) != 0 && (digitab[ptr[2]] & ctype_xdigit) != 0)
866            {
867            int i;
868            c = 0;
869            for (i = 0; i < 2; ++i)
870              {
871              register int cc = *(++ptr);
872    #ifndef EBCDIC  /* ASCII/UTF-8 coding */
873              if (cc >= CHAR_a) cc -= 32;               /* Convert to upper case */
874              c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
875    #else           /* EBCDIC coding */
876              if (cc >= CHAR_a && cc <= CHAR_z) cc += 64;  /* Convert to upper case */
877              c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
878    #endif
879              }
880            }
881          break;
882          }
883    
884      if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)      if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)
885        {        {
886        const uschar *pt = ptr + 2;        const uschar *pt = ptr + 2;
# Line 1506  for (;;) Line 1559  for (;;)
1559      case OP_CBRA:      case OP_CBRA:
1560      case OP_BRA:      case OP_BRA:
1561      case OP_ONCE:      case OP_ONCE:
1562        case OP_ONCE_NC:
1563      case OP_COND:      case OP_COND:
1564      d = find_fixedlength(cc + ((op == OP_CBRA)? 2:0), utf8, atend, cd);      d = find_fixedlength(cc + ((op == OP_CBRA)? 2:0), utf8, atend, cd);
1565      if (d < 0) return d;      if (d < 0) return d;
# Line 1761  for (;;) Line 1815  for (;;)
1815        break;        break;
1816    
1817        case OP_THEN_ARG:        case OP_THEN_ARG:
1818        code += code[1+LINK_SIZE];        code += code[1];
1819        break;        break;
1820        }        }
1821    
# Line 1880  for (;;) Line 1934  for (;;)
1934        break;        break;
1935    
1936        case OP_THEN_ARG:        case OP_THEN_ARG:
1937        code += code[1+LINK_SIZE];        code += code[1];
1938        break;        break;
1939        }        }
1940    
# Line 2045  for (code = first_significant_code(code Line 2099  for (code = first_significant_code(code
2099    
2100    if (c == OP_BRA  || c == OP_BRAPOS ||    if (c == OP_BRA  || c == OP_BRAPOS ||
2101        c == OP_CBRA || c == OP_CBRAPOS ||        c == OP_CBRA || c == OP_CBRAPOS ||
2102        c == OP_ONCE || c == OP_COND)        c == OP_ONCE || c == OP_ONCE_NC ||
2103          c == OP_COND)
2104      {      {
2105      BOOL empty_branch;      BOOL empty_branch;
2106      if (GET(code, 1) == 0) return TRUE;    /* Hit unclosed bracket */      if (GET(code, 1) == 0) return TRUE;    /* Hit unclosed bracket */
# Line 2217  for (code = first_significant_code(code Line 2272  for (code = first_significant_code(code
2272      break;      break;
2273    
2274      case OP_THEN_ARG:      case OP_THEN_ARG:
2275      code += code[1+LINK_SIZE];      code += code[1];
2276      break;      break;
2277    
2278      /* None of the remaining opcodes are required to match a character. */      /* None of the remaining opcodes are required to match a character. */
# Line 3142  for (;; ptr++) Line 3197  for (;; ptr++)
3197    int subfirstbyte;    int subfirstbyte;
3198    int terminator;    int terminator;
3199    int mclength;    int mclength;
3200      int tempbracount;
3201    uschar mcbuffer[8];    uschar mcbuffer[8];
3202    
3203    /* Get next byte in the pattern */    /* Get next byte in the pattern */
# Line 4840  for (;; ptr++) Line 4896  for (;; ptr++)
4896          uschar *ketcode = code - 1 - LINK_SIZE;          uschar *ketcode = code - 1 - LINK_SIZE;
4897          uschar *bracode = ketcode - GET(ketcode, 1);          uschar *bracode = ketcode - GET(ketcode, 1);
4898    
4899          if (*bracode == OP_ONCE && possessive_quantifier) *bracode = OP_BRA;          if ((*bracode == OP_ONCE || *bracode == OP_ONCE_NC) &&
4900          if (*bracode == OP_ONCE)              possessive_quantifier) *bracode = OP_BRA;
4901    
4902            if (*bracode == OP_ONCE || *bracode == OP_ONCE_NC)
4903            *ketcode = OP_KETRMAX + repeat_type;            *ketcode = OP_KETRMAX + repeat_type;
4904          else          else
4905            {            {
# Line 5045  for (;; ptr++) Line 5103  for (;; ptr++)
5103                PUT2INC(code, 0, oc->number);                PUT2INC(code, 0, oc->number);
5104                }                }
5105              *code++ = (cd->assert_depth > 0)? OP_ASSERT_ACCEPT : OP_ACCEPT;              *code++ = (cd->assert_depth > 0)? OP_ASSERT_ACCEPT : OP_ACCEPT;
5106    
5107              /* Do not set firstbyte after *ACCEPT */              /* Do not set firstbyte after *ACCEPT */
5108              if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;              if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
5109              }              }
# Line 5060  for (;; ptr++) Line 5118  for (;; ptr++)
5118                goto FAILED;                goto FAILED;
5119                }                }
5120              *code = verbs[i].op;              *code = verbs[i].op;
5121              if (*code++ == OP_THEN)              if (*code++ == OP_THEN) cd->external_flags |= PCRE_HASTHEN;
               {  
               PUT(code, 0, code - bcptr->current_branch - 1);  
               code += LINK_SIZE;  
               }  
5122              }              }
5123    
5124            else            else
# Line 5075  for (;; ptr++) Line 5129  for (;; ptr++)
5129                goto FAILED;                goto FAILED;
5130                }                }
5131              *code = verbs[i].op_arg;              *code = verbs[i].op_arg;
5132              if (*code++ == OP_THEN_ARG)              if (*code++ == OP_THEN_ARG) cd->external_flags |= PCRE_HASTHEN;
               {  
               PUT(code, 0, code - bcptr->current_branch - 1);  
               code += LINK_SIZE;  
               }  
5133              *code++ = arglen;              *code++ = arglen;
5134              memcpy(code, arg, arglen);              memcpy(code, arg, arglen);
5135              code += arglen;              code += arglen;
# Line 5914  for (;; ptr++) Line 5964  for (;; ptr++)
5964      *code = bravalue;      *code = bravalue;
5965      tempcode = code;      tempcode = code;
5966      tempreqvary = cd->req_varyopt;        /* Save value before bracket */      tempreqvary = cd->req_varyopt;        /* Save value before bracket */
5967        tempbracount = cd->bracount;          /* Save value before bracket */
5968      length_prevgroup = 0;                 /* Initialize for pre-compile phase */      length_prevgroup = 0;                 /* Initialize for pre-compile phase */
5969    
5970      if (!compile_regex(      if (!compile_regex(
# Line 5936  for (;; ptr++) Line 5987  for (;; ptr++)
5987           ))           ))
5988        goto FAILED;        goto FAILED;
5989    
5990        /* If this was an atomic group and there are no capturing groups within it,
5991        generate OP_ONCE_NC instead of OP_ONCE. */
5992    
5993        if (bravalue == OP_ONCE && cd->bracount <= tempbracount)
5994          *code = OP_ONCE_NC;
5995    
5996      if (bravalue >= OP_ASSERT && bravalue <= OP_ASSERTBACK_NOT)      if (bravalue >= OP_ASSERT && bravalue <= OP_ASSERTBACK_NOT)
5997        cd->assert_depth -= 1;        cd->assert_depth -= 1;
5998    
5999      /* At the end of compiling, code is still pointing to the start of the      /* At the end of compiling, code is still pointing to the start of the
6000      group, while tempcode has been updated to point past the end of the group      group, while tempcode has been updated to point past the end of the group.
6001      and any option resetting that may follow it. The pattern pointer (ptr)      The pattern pointer (ptr) is on the bracket.
     is on the bracket. */  
6002    
6003      /* If this is a conditional bracket, check that there are no more than      If this is a conditional bracket, check that there are no more than
6004      two branches in the group, or just one if it's a DEFINE group. We do this      two branches in the group, or just one if it's a DEFINE group. We do this
6005      in the real compile phase, not in the pre-pass, where the whole group may      in the real compile phase, not in the pre-pass, where the whole group may
6006      not be available. */      not be available. */
# Line 6326  for (;; ptr++) Line 6382  for (;; ptr++)
6382      byte, set it from this character, but revert to none on a zero repeat.      byte, set it from this character, but revert to none on a zero repeat.
6383      Otherwise, leave the firstbyte value alone, and don't change it on a zero      Otherwise, leave the firstbyte value alone, and don't change it on a zero
6384      repeat. */      repeat. */
6385    
6386      if (firstbyte == REQ_UNSET)      if (firstbyte == REQ_UNSET)
6387        {        {
6388        zerofirstbyte = REQ_NONE;        zerofirstbyte = REQ_NONE;
# Line 6735  do { Line 6791  do {
6791    
6792     /* Other brackets */     /* Other brackets */
6793    
6794     else if (op == OP_ASSERT || op == OP_ONCE || op == OP_COND)     else if (op == OP_ASSERT || op == OP_ONCE || op == OP_ONCE_NC ||
6795                op == OP_COND)
6796       {       {
6797       if (!is_anchored(scode, bracket_map, backref_map)) return FALSE;       if (!is_anchored(scode, bracket_map, backref_map)) return FALSE;
6798       }       }
# Line 6839  do { Line 6896  do {
6896    
6897     /* Other brackets */     /* Other brackets */
6898    
6899     else if (op == OP_ASSERT || op == OP_ONCE)     else if (op == OP_ASSERT || op == OP_ONCE || op == OP_ONCE_NC)
6900       {       {
6901       if (!is_startline(scode, bracket_map, backref_map)) return FALSE;       if (!is_startline(scode, bracket_map, backref_map)) return FALSE;
6902       }       }
# Line 6909  do { Line 6966  do {
6966       case OP_SCBRAPOS:       case OP_SCBRAPOS:
6967       case OP_ASSERT:       case OP_ASSERT:
6968       case OP_ONCE:       case OP_ONCE:
6969         case OP_ONCE_NC:
6970       case OP_COND:       case OP_COND:
6971       if ((d = find_firstassertedchar(scode, op == OP_ASSERT)) < 0)       if ((d = find_firstassertedchar(scode, op == OP_ASSERT)) < 0)
6972         return -1;         return -1;

Legend:
Removed from v.701  
changed lines
  Added in v.744

  ViewVC Help
Powered by ViewVC 1.1.5