/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 342 by ph10, Sun Apr 20 17:10:13 2008 UTC revision 354 by ph10, Mon Jul 7 16:30:33 2008 UTC
# Line 158  printf("\n"); Line 158  printf("\n");
158    
159  if (length > md->end_subject - eptr) return FALSE;  if (length > md->end_subject - eptr) return FALSE;
160    
161  /* Separate the caselesss case for speed */  /* Separate the caseless case for speed. In UTF-8 mode we can only do this
162    properly if Unicode properties are supported. Otherwise, we can check only
163    ASCII characters. */
164    
165  if ((ims & PCRE_CASELESS) != 0)  if ((ims & PCRE_CASELESS) != 0)
166    {    {
167    #ifdef SUPPORT_UTF8
168    #ifdef SUPPORT_UCP
169      if (md->utf8)
170        {
171        USPTR endptr = eptr + length;
172        while (eptr < endptr)
173          {
174          int c, d;
175          GETCHARINC(c, eptr);
176          GETCHARINC(d, p);
177          if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
178          }
179        }
180      else
181    #endif
182    #endif
183    
184      /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
185      is no UCP support. */
186    
187    while (length-- > 0)    while (length-- > 0)
188      if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;      { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
189    }    }
190    
191    /* In the caseful case, we can just compare the bytes, whether or not we
192    are in UTF-8 mode. */
193    
194  else  else
195    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
196    
# Line 1430  for (;;) Line 1456  for (;;)
1456    
1457      case OP_ANY:      case OP_ANY:
1458      if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);      if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1459      /* Fall through */      /* Fall through */
1460    
1461      case OP_ALLANY:      case OP_ALLANY:
1462      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
1463      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
# Line 1653  for (;;) Line 1679  for (;;)
1679      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1680      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1681        {        {
1682        int chartype, script;        const ucd_record * prop = GET_UCD(c);
       int category = _pcre_ucp_findprop(c, &chartype, &script);  
1683    
1684        switch(ecode[1])        switch(ecode[1])
1685          {          {
# Line 1663  for (;;) Line 1688  for (;;)
1688          break;          break;
1689    
1690          case PT_LAMP:          case PT_LAMP:
1691          if ((chartype == ucp_Lu ||          if ((prop->chartype == ucp_Lu ||
1692               chartype == ucp_Ll ||               prop->chartype == ucp_Ll ||
1693               chartype == ucp_Lt) == (op == OP_NOTPROP))               prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
1694            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1695           break;           break;
1696    
1697          case PT_GC:          case PT_GC:
1698          if ((ecode[2] != category) == (op == OP_PROP))          if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
1699            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1700          break;          break;
1701    
1702          case PT_PC:          case PT_PC:
1703          if ((ecode[2] != chartype) == (op == OP_PROP))          if ((ecode[2] != prop->chartype) == (op == OP_PROP))
1704            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1705          break;          break;
1706    
1707          case PT_SC:          case PT_SC:
1708          if ((ecode[2] != script) == (op == OP_PROP))          if ((ecode[2] != prop->script) == (op == OP_PROP))
1709            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1710          break;          break;
1711    
# Line 1699  for (;;) Line 1724  for (;;)
1724      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1725      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1726        {        {
1727        int chartype, script;        int category = UCD_CATEGORY(c);
       int category = _pcre_ucp_findprop(c, &chartype, &script);  
1728        if (category == ucp_M) RRETURN(MATCH_NOMATCH);        if (category == ucp_M) RRETURN(MATCH_NOMATCH);
1729        while (eptr < md->end_subject)        while (eptr < md->end_subject)
1730          {          {
# Line 1709  for (;;) Line 1733  for (;;)
1733            {            {
1734            GETCHARLEN(c, eptr, len);            GETCHARLEN(c, eptr, len);
1735            }            }
1736          category = _pcre_ucp_findprop(c, &chartype, &script);          category = UCD_CATEGORY(c);
1737          if (category != ucp_M) break;          if (category != ucp_M) break;
1738          eptr += len;          eptr += len;
1739          }          }
# Line 1730  for (;;) Line 1754  for (;;)
1754      case OP_REF:      case OP_REF:
1755        {        {
1756        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
1757        ecode += 3;        ecode += 3;
1758    
1759        /* If the reference is unset, there are two possibilities:        /* If the reference is unset, there are two possibilities:
1760    
1761        (a) In the default, Perl-compatible state, set the length to be longer        (a) In the default, Perl-compatible state, set the length to be longer
1762        than the amount of subject left; this ensures that every attempt at a        than the amount of subject left; this ensures that every attempt at a
1763        match fails. We can't just fail here, because of the possibility of        match fails. We can't just fail here, because of the possibility of
1764        quantifiers with zero minima.        quantifiers with zero minima.
1765    
1766        (b) If the JavaScript compatibility flag is set, set the length to zero        (b) If the JavaScript compatibility flag is set, set the length to zero
1767        so that the back reference matches an empty string.        so that the back reference matches an empty string.
1768    
1769        Otherwise, set the length to the length of what was matched by the        Otherwise, set the length to the length of what was matched by the
1770        referenced subpattern. */        referenced subpattern. */
1771    
1772        if (offset >= offset_top || md->offset_vector[offset] < 0)        if (offset >= offset_top || md->offset_vector[offset] < 0)
1773          length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;          length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;
1774        else        else
1775          length = md->offset_vector[offset+1] - md->offset_vector[offset];          length = md->offset_vector[offset+1] - md->offset_vector[offset];
1776    
# Line 2174  for (;;) Line 2198  for (;;)
2198          if (fc != dc)          if (fc != dc)
2199            {            {
2200  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2201            if (dc != _pcre_ucp_othercase(fc))            if (dc != UCD_OTHERCASE(fc))
2202  #endif  #endif
2203              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2204            }            }
# Line 2265  for (;;) Line 2289  for (;;)
2289  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2290          unsigned int othercase;          unsigned int othercase;
2291          if ((ims & PCRE_CASELESS) != 0 &&          if ((ims & PCRE_CASELESS) != 0 &&
2292              (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)              (othercase = UCD_OTHERCASE(fc)) != fc)
2293            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = _pcre_ord2utf8(othercase, occhars);
2294          else oclength = 0;          else oclength = 0;
2295  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
# Line 2870  for (;;) Line 2894  for (;;)
2894              {              {
2895              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2896              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
2897              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
2898              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
2899                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
2900                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
# Line 2883  for (;;) Line 2907  for (;;)
2907              {              {
2908              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2909              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
2910              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
2911              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
2912                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2913              }              }
# Line 2894  for (;;) Line 2918  for (;;)
2918              {              {
2919              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2920              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
2921              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
2922              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
2923                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2924              }              }
# Line 2905  for (;;) Line 2929  for (;;)
2929              {              {
2930              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2931              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
2932              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_script = UCD_SCRIPT(c);
2933              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
2934                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2935              }              }
# Line 2924  for (;;) Line 2948  for (;;)
2948          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2949            {            {
2950            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
2951            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
2952            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
2953            while (eptr < md->end_subject)            while (eptr < md->end_subject)
2954              {              {
# Line 2933  for (;;) Line 2957  for (;;)
2957                {                {
2958                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
2959                }                }
2960              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
2961              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
2962              eptr += len;              eptr += len;
2963              }              }
# Line 3349  for (;;) Line 3373  for (;;)
3373              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3374              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3375              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3376              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
3377              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
3378                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
3379                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
# Line 3364  for (;;) Line 3388  for (;;)
3388              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3389              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3390              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3391              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
3392              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
3393                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3394              }              }
# Line 3377  for (;;) Line 3401  for (;;)
3401              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3402              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3403              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3404              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
3405              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
3406                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3407              }              }
# Line 3390  for (;;) Line 3414  for (;;)
3414              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3415              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3416              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3417              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_script = UCD_SCRIPT(c);
3418              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
3419                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3420              }              }
# Line 3412  for (;;) Line 3436  for (;;)
3436            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3437            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3438            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3439            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
3440            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3441            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3442              {              {
# Line 3421  for (;;) Line 3445  for (;;)
3445                {                {
3446                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3447                }                }
3448              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
3449              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3450              eptr += len;              eptr += len;
3451              }              }
# Line 3447  for (;;) Line 3471  for (;;)
3471            switch(ctype)            switch(ctype)
3472              {              {
3473              case OP_ANY:        /* This is the non-NL case */              case OP_ANY:        /* This is the non-NL case */
3474              case OP_ALLANY:              case OP_ALLANY:
3475              case OP_ANYBYTE:              case OP_ANYBYTE:
3476              break;              break;
3477    
# Line 3606  for (;;) Line 3630  for (;;)
3630            switch(ctype)            switch(ctype)
3631              {              {
3632              case OP_ANY:     /* This is the non-NL case */              case OP_ANY:     /* This is the non-NL case */
3633              case OP_ALLANY:              case OP_ALLANY:
3634              case OP_ANYBYTE:              case OP_ANYBYTE:
3635              break;              break;
3636    
# Line 3739  for (;;) Line 3763  for (;;)
3763              int len = 1;              int len = 1;
3764              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject) break;
3765              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
3766              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
3767              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
3768                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
3769                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
# Line 3754  for (;;) Line 3778  for (;;)
3778              int len = 1;              int len = 1;
3779              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject) break;
3780              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
3781              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
3782              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
3783                break;                break;
3784              eptr+= len;              eptr+= len;
# Line 3767  for (;;) Line 3791  for (;;)
3791              int len = 1;              int len = 1;
3792              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject) break;
3793              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
3794              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
3795              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
3796                break;                break;
3797              eptr+= len;              eptr+= len;
# Line 3780  for (;;) Line 3804  for (;;)
3804              int len = 1;              int len = 1;
3805              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject) break;
3806              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
3807              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_script = UCD_SCRIPT(c);
3808              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
3809                break;                break;
3810              eptr+= len;              eptr+= len;
# Line 3809  for (;;) Line 3833  for (;;)
3833            {            {
3834            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject) break;
3835            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3836            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
3837            if (prop_category == ucp_M) break;            if (prop_category == ucp_M) break;
3838            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3839              {              {
# Line 3818  for (;;) Line 3842  for (;;)
3842                {                {
3843                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3844                }                }
3845              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
3846              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3847              eptr += len;              eptr += len;
3848              }              }
# Line 3840  for (;;) Line 3864  for (;;)
3864                BACKCHAR(eptr);                BACKCHAR(eptr);
3865                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3866                }                }
3867              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
3868              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3869              eptr--;              eptr--;
3870              }              }

Legend:
Removed from v.342  
changed lines
  Added in v.354

  ViewVC Help
Powered by ViewVC 1.1.5