/[pcre]/code/branches/pcre16/pcre_compile.c
ViewVC logotype

Diff of /code/branches/pcre16/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 716 by ph10, Tue Oct 4 16:38:05 2011 UTC revision 745 by ph10, Mon Nov 14 11:41:03 2011 UTC
# Line 676  else Line 676  else
676    
677      case CHAR_l:      case CHAR_l:
678      case CHAR_L:      case CHAR_L:
679        *errorcodeptr = ERR37;
680        break;
681    
682      case CHAR_u:      case CHAR_u:
683        if ((options & PCRE_JAVASCRIPT_COMPAT) != 0)
684          {
685          /* In JavaScript, \u must be followed by four hexadecimal numbers.
686          Otherwise it is a lowercase u letter. */
687          if ((digitab[ptr[1]] & ctype_xdigit) != 0 && (digitab[ptr[2]] & ctype_xdigit) != 0
688               && (digitab[ptr[3]] & ctype_xdigit) != 0 && (digitab[ptr[4]] & ctype_xdigit) != 0)
689            {
690            c = 0;
691            for (i = 0; i < 4; ++i)
692              {
693              register int cc = *(++ptr);
694    #ifndef EBCDIC  /* ASCII/UTF-8 coding */
695              if (cc >= CHAR_a) cc -= 32;               /* Convert to upper case */
696              c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
697    #else           /* EBCDIC coding */
698              if (cc >= CHAR_a && cc <= CHAR_z) cc += 64;  /* Convert to upper case */
699              c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
700    #endif
701              }
702            }
703          }
704        else
705          *errorcodeptr = ERR37;
706        break;
707    
708      case CHAR_U:      case CHAR_U:
709      *errorcodeptr = ERR37;      /* In JavaScript, \U is an uppercase U letter. */
710        if ((options & PCRE_JAVASCRIPT_COMPAT) == 0) *errorcodeptr = ERR37;
711      break;      break;
712    
713      /* In a character class, \g is just a literal "g". Outside a character      /* In a character class, \g is just a literal "g". Outside a character
# Line 828  else Line 857  else
857      treated as a data character. */      treated as a data character. */
858    
859      case CHAR_x:      case CHAR_x:
860        if ((options & PCRE_JAVASCRIPT_COMPAT) != 0)
861          {
862          /* In JavaScript, \x must be followed by two hexadecimal numbers.
863          Otherwise it is a lowercase x letter. */
864          if ((digitab[ptr[1]] & ctype_xdigit) != 0 && (digitab[ptr[2]] & ctype_xdigit) != 0)
865            {
866            c = 0;
867            for (i = 0; i < 2; ++i)
868              {
869              register int cc = *(++ptr);
870    #ifndef EBCDIC  /* ASCII/UTF-8 coding */
871              if (cc >= CHAR_a) cc -= 32;               /* Convert to upper case */
872              c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
873    #else           /* EBCDIC coding */
874              if (cc >= CHAR_a && cc <= CHAR_z) cc += 64;  /* Convert to upper case */
875              c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
876    #endif
877              }
878            }
879          break;
880          }
881    
882      if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)      if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)
883        {        {
884        const uschar *pt = ptr + 2;        const uschar *pt = ptr + 2;
# Line 1506  for (;;) Line 1557  for (;;)
1557      case OP_CBRA:      case OP_CBRA:
1558      case OP_BRA:      case OP_BRA:
1559      case OP_ONCE:      case OP_ONCE:
1560        case OP_ONCE_NC:
1561      case OP_COND:      case OP_COND:
1562      d = find_fixedlength(cc + ((op == OP_CBRA)? 2:0), utf8, atend, cd);      d = find_fixedlength(cc + ((op == OP_CBRA)? 2:0), utf8, atend, cd);
1563      if (d < 0) return d;      if (d < 0) return d;
# Line 2045  for (code = first_significant_code(code Line 2097  for (code = first_significant_code(code
2097    
2098    if (c == OP_BRA  || c == OP_BRAPOS ||    if (c == OP_BRA  || c == OP_BRAPOS ||
2099        c == OP_CBRA || c == OP_CBRAPOS ||        c == OP_CBRA || c == OP_CBRAPOS ||
2100        c == OP_ONCE || c == OP_COND)        c == OP_ONCE || c == OP_ONCE_NC ||
2101          c == OP_COND)
2102      {      {
2103      BOOL empty_branch;      BOOL empty_branch;
2104      if (GET(code, 1) == 0) return TRUE;    /* Hit unclosed bracket */      if (GET(code, 1) == 0) return TRUE;    /* Hit unclosed bracket */
# Line 3142  for (;; ptr++) Line 3195  for (;; ptr++)
3195    int subfirstbyte;    int subfirstbyte;
3196    int terminator;    int terminator;
3197    int mclength;    int mclength;
3198      int tempbracount;
3199    uschar mcbuffer[8];    uschar mcbuffer[8];
3200    
3201    /* Get next byte in the pattern */    /* Get next byte in the pattern */
# Line 4840  for (;; ptr++) Line 4894  for (;; ptr++)
4894          uschar *ketcode = code - 1 - LINK_SIZE;          uschar *ketcode = code - 1 - LINK_SIZE;
4895          uschar *bracode = ketcode - GET(ketcode, 1);          uschar *bracode = ketcode - GET(ketcode, 1);
4896    
4897          if (*bracode == OP_ONCE && possessive_quantifier) *bracode = OP_BRA;          if ((*bracode == OP_ONCE || *bracode == OP_ONCE_NC) &&
4898          if (*bracode == OP_ONCE)              possessive_quantifier) *bracode = OP_BRA;
4899    
4900            if (*bracode == OP_ONCE || *bracode == OP_ONCE_NC)
4901            *ketcode = OP_KETRMAX + repeat_type;            *ketcode = OP_KETRMAX + repeat_type;
4902          else          else
4903            {            {
# Line 5906  for (;; ptr++) Line 5962  for (;; ptr++)
5962      *code = bravalue;      *code = bravalue;
5963      tempcode = code;      tempcode = code;
5964      tempreqvary = cd->req_varyopt;        /* Save value before bracket */      tempreqvary = cd->req_varyopt;        /* Save value before bracket */
5965        tempbracount = cd->bracount;          /* Save value before bracket */
5966      length_prevgroup = 0;                 /* Initialize for pre-compile phase */      length_prevgroup = 0;                 /* Initialize for pre-compile phase */
5967    
5968      if (!compile_regex(      if (!compile_regex(
# Line 5928  for (;; ptr++) Line 5985  for (;; ptr++)
5985           ))           ))
5986        goto FAILED;        goto FAILED;
5987    
5988        /* If this was an atomic group and there are no capturing groups within it,
5989        generate OP_ONCE_NC instead of OP_ONCE. */
5990    
5991        if (bravalue == OP_ONCE && cd->bracount <= tempbracount)
5992          *code = OP_ONCE_NC;
5993    
5994      if (bravalue >= OP_ASSERT && bravalue <= OP_ASSERTBACK_NOT)      if (bravalue >= OP_ASSERT && bravalue <= OP_ASSERTBACK_NOT)
5995        cd->assert_depth -= 1;        cd->assert_depth -= 1;
5996    
5997      /* At the end of compiling, code is still pointing to the start of the      /* At the end of compiling, code is still pointing to the start of the
5998      group, while tempcode has been updated to point past the end of the group      group, while tempcode has been updated to point past the end of the group.
5999      and any option resetting that may follow it. The pattern pointer (ptr)      The pattern pointer (ptr) is on the bracket.
     is on the bracket. */  
6000    
6001      /* If this is a conditional bracket, check that there are no more than      If this is a conditional bracket, check that there are no more than
6002      two branches in the group, or just one if it's a DEFINE group. We do this      two branches in the group, or just one if it's a DEFINE group. We do this
6003      in the real compile phase, not in the pre-pass, where the whole group may      in the real compile phase, not in the pre-pass, where the whole group may
6004      not be available. */      not be available. */
# Line 6727  do { Line 6789  do {
6789    
6790     /* Other brackets */     /* Other brackets */
6791    
6792     else if (op == OP_ASSERT || op == OP_ONCE || op == OP_COND)     else if (op == OP_ASSERT || op == OP_ONCE || op == OP_ONCE_NC ||
6793                op == OP_COND)
6794       {       {
6795       if (!is_anchored(scode, bracket_map, backref_map)) return FALSE;       if (!is_anchored(scode, bracket_map, backref_map)) return FALSE;
6796       }       }
# Line 6831  do { Line 6894  do {
6894    
6895     /* Other brackets */     /* Other brackets */
6896    
6897     else if (op == OP_ASSERT || op == OP_ONCE)     else if (op == OP_ASSERT || op == OP_ONCE || op == OP_ONCE_NC)
6898       {       {
6899       if (!is_startline(scode, bracket_map, backref_map)) return FALSE;       if (!is_startline(scode, bracket_map, backref_map)) return FALSE;
6900       }       }
# Line 6901  do { Line 6964  do {
6964       case OP_SCBRAPOS:       case OP_SCBRAPOS:
6965       case OP_ASSERT:       case OP_ASSERT:
6966       case OP_ONCE:       case OP_ONCE:
6967         case OP_ONCE_NC:
6968       case OP_COND:       case OP_COND:
6969       if ((d = find_firstassertedchar(scode, op == OP_ASSERT)) < 0)       if ((d = find_firstassertedchar(scode, op == OP_ASSERT)) < 0)
6970         return -1;         return -1;

Legend:
Removed from v.716  
changed lines
  Added in v.745

  ViewVC Help
Powered by ViewVC 1.1.5