/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 298 by ph10, Thu Jan 10 17:09:12 2008 UTC revision 354 by ph10, Mon Jul 7 16:30:33 2008 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2007 University of Cambridge             Copyright (c) 1997-2008 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 158  printf("\n"); Line 158  printf("\n");
158    
159  if (length > md->end_subject - eptr) return FALSE;  if (length > md->end_subject - eptr) return FALSE;
160    
161  /* Separate the caselesss case for speed */  /* Separate the caseless case for speed. In UTF-8 mode we can only do this
162    properly if Unicode properties are supported. Otherwise, we can check only
163    ASCII characters. */
164    
165  if ((ims & PCRE_CASELESS) != 0)  if ((ims & PCRE_CASELESS) != 0)
166    {    {
167    #ifdef SUPPORT_UTF8
168    #ifdef SUPPORT_UCP
169      if (md->utf8)
170        {
171        USPTR endptr = eptr + length;
172        while (eptr < endptr)
173          {
174          int c, d;
175          GETCHARINC(c, eptr);
176          GETCHARINC(d, p);
177          if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
178          }
179        }
180      else
181    #endif
182    #endif
183    
184      /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
185      is no UCP support. */
186    
187    while (length-- > 0)    while (length-- > 0)
188      if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;      { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
189    }    }
190    
191    /* In the caseful case, we can just compare the bytes, whether or not we
192    are in UTF-8 mode. */
193    
194  else  else
195    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
196    
# Line 1148  for (;;) Line 1174  for (;;)
1174      do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1175      break;      break;
1176    
1177      /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating      /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1178      that it may occur zero times. It may repeat infinitely, or not at all -      indicating that it may occur zero times. It may repeat infinitely, or not
1179      i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper      at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1180      repeat limits are compiled as a number of copies, with the optional ones      with fixed upper repeat limits are compiled as a number of copies, with the
1181      preceded by BRAZERO or BRAMINZERO. */      optional ones preceded by BRAZERO or BRAMINZERO. */
1182    
1183      case OP_BRAZERO:      case OP_BRAZERO:
1184        {        {
# Line 1174  for (;;) Line 1200  for (;;)
1200        }        }
1201      break;      break;
1202    
1203        case OP_SKIPZERO:
1204          {
1205          next = ecode+1;
1206          do next += GET(next,1); while (*next == OP_ALT);
1207          ecode = next + 1 + LINK_SIZE;
1208          }
1209        break;
1210    
1211      /* End of a group, repeated or non-repeating. */      /* End of a group, repeated or non-repeating. */
1212    
1213      case OP_KET:      case OP_KET:
# Line 1421  for (;;) Line 1455  for (;;)
1455      /* Match a single character type; inline for speed */      /* Match a single character type; inline for speed */
1456    
1457      case OP_ANY:      case OP_ANY:
1458      if ((ims & PCRE_DOTALL) == 0)      if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1459        {      /* Fall through */
1460        if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);  
1461        }      case OP_ALLANY:
1462      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
1463      if (utf8)      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
       while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
1464      ecode++;      ecode++;
1465      break;      break;
1466    
# Line 1646  for (;;) Line 1679  for (;;)
1679      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1680      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1681        {        {
1682        int chartype, script;        const ucd_record * prop = GET_UCD(c);
       int category = _pcre_ucp_findprop(c, &chartype, &script);  
1683    
1684        switch(ecode[1])        switch(ecode[1])
1685          {          {
# Line 1656  for (;;) Line 1688  for (;;)
1688          break;          break;
1689    
1690          case PT_LAMP:          case PT_LAMP:
1691          if ((chartype == ucp_Lu ||          if ((prop->chartype == ucp_Lu ||
1692               chartype == ucp_Ll ||               prop->chartype == ucp_Ll ||
1693               chartype == ucp_Lt) == (op == OP_NOTPROP))               prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
1694            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1695           break;           break;
1696    
1697          case PT_GC:          case PT_GC:
1698          if ((ecode[2] != category) == (op == OP_PROP))          if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
1699            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1700          break;          break;
1701    
1702          case PT_PC:          case PT_PC:
1703          if ((ecode[2] != chartype) == (op == OP_PROP))          if ((ecode[2] != prop->chartype) == (op == OP_PROP))
1704            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1705          break;          break;
1706    
1707          case PT_SC:          case PT_SC:
1708          if ((ecode[2] != script) == (op == OP_PROP))          if ((ecode[2] != prop->script) == (op == OP_PROP))
1709            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1710          break;          break;
1711    
# Line 1692  for (;;) Line 1724  for (;;)
1724      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1725      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1726        {        {
1727        int chartype, script;        int category = UCD_CATEGORY(c);
       int category = _pcre_ucp_findprop(c, &chartype, &script);  
1728        if (category == ucp_M) RRETURN(MATCH_NOMATCH);        if (category == ucp_M) RRETURN(MATCH_NOMATCH);
1729        while (eptr < md->end_subject)        while (eptr < md->end_subject)
1730          {          {
# Line 1702  for (;;) Line 1733  for (;;)
1733            {            {
1734            GETCHARLEN(c, eptr, len);            GETCHARLEN(c, eptr, len);
1735            }            }
1736          category = _pcre_ucp_findprop(c, &chartype, &script);          category = UCD_CATEGORY(c);
1737          if (category != ucp_M) break;          if (category != ucp_M) break;
1738          eptr += len;          eptr += len;
1739          }          }
# Line 1723  for (;;) Line 1754  for (;;)
1754      case OP_REF:      case OP_REF:
1755        {        {
1756        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
1757        ecode += 3;                                 /* Advance past item */        ecode += 3;
1758    
1759          /* If the reference is unset, there are two possibilities:
1760    
1761          (a) In the default, Perl-compatible state, set the length to be longer
1762          than the amount of subject left; this ensures that every attempt at a
1763          match fails. We can't just fail here, because of the possibility of
1764          quantifiers with zero minima.
1765    
1766        /* If the reference is unset, set the length to be longer than the amount        (b) If the JavaScript compatibility flag is set, set the length to zero
1767        of subject left; this ensures that every attempt at a match fails. We        so that the back reference matches an empty string.
1768        can't just fail here, because of the possibility of quantifiers with zero  
1769        minima. */        Otherwise, set the length to the length of what was matched by the
1770          referenced subpattern. */
1771        length = (offset >= offset_top || md->offset_vector[offset] < 0)?  
1772          md->end_subject - eptr + 1 :        if (offset >= offset_top || md->offset_vector[offset] < 0)
1773          md->offset_vector[offset+1] - md->offset_vector[offset];          length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;
1774          else
1775            length = md->offset_vector[offset+1] - md->offset_vector[offset];
1776    
1777        /* Set up for repetition, or handle the non-repeated case */        /* Set up for repetition, or handle the non-repeated case */
1778    
# Line 2158  for (;;) Line 2198  for (;;)
2198          if (fc != dc)          if (fc != dc)
2199            {            {
2200  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2201            if (dc != _pcre_ucp_othercase(fc))            if (dc != UCD_OTHERCASE(fc))
2202  #endif  #endif
2203              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2204            }            }
# Line 2249  for (;;) Line 2289  for (;;)
2289  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2290          unsigned int othercase;          unsigned int othercase;
2291          if ((ims & PCRE_CASELESS) != 0 &&          if ((ims & PCRE_CASELESS) != 0 &&
2292              (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)              (othercase = UCD_OTHERCASE(fc)) != fc)
2293            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = _pcre_ord2utf8(othercase, occhars);
2294          else oclength = 0;          else oclength = 0;
2295  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
# Line 2854  for (;;) Line 2894  for (;;)
2894              {              {
2895              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2896              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
2897              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
2898              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
2899                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
2900                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
# Line 2867  for (;;) Line 2907  for (;;)
2907              {              {
2908              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2909              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
2910              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
2911              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
2912                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2913              }              }
# Line 2878  for (;;) Line 2918  for (;;)
2918              {              {
2919              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2920              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
2921              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
2922              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
2923                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2924              }              }
# Line 2889  for (;;) Line 2929  for (;;)
2929              {              {
2930              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2931              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
2932              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_script = UCD_SCRIPT(c);
2933              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
2934                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2935              }              }
# Line 2908  for (;;) Line 2948  for (;;)
2948          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2949            {            {
2950            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
2951            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
2952            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
2953            while (eptr < md->end_subject)            while (eptr < md->end_subject)
2954              {              {
# Line 2917  for (;;) Line 2957  for (;;)
2957                {                {
2958                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
2959                }                }
2960              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
2961              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
2962              eptr += len;              eptr += len;
2963              }              }
# Line 2935  for (;;) Line 2975  for (;;)
2975          case OP_ANY:          case OP_ANY:
2976          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2977            {            {
2978            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject || IS_NEWLINE(eptr))
                ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))  
2979              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2980            eptr++;            eptr++;
2981            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2982            }            }
2983          break;          break;
2984    
2985            case OP_ALLANY:
2986            for (i = 1; i <= min; i++)
2987              {
2988              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2989              eptr++;
2990              while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2991              }
2992            break;
2993    
2994          case OP_ANYBYTE:          case OP_ANYBYTE:
2995          eptr += min;          eptr += min;
2996          break;          break;
# Line 3151  for (;;) Line 3199  for (;;)
3199        switch(ctype)        switch(ctype)
3200          {          {
3201          case OP_ANY:          case OP_ANY:
3202          if ((ims & PCRE_DOTALL) == 0)          for (i = 1; i <= min; i++)
3203            {            {
3204            for (i = 1; i <= min; i++)            if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3205              {            eptr++;
             if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);  
             eptr++;  
             }  
3206            }            }
3207          else eptr += min;          break;
3208    
3209            case OP_ALLANY:
3210            eptr += min;
3211          break;          break;
3212    
3213          case OP_ANYBYTE:          case OP_ANYBYTE:
# Line 3325  for (;;) Line 3373  for (;;)
3373              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3374              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3375              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3376              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
3377              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
3378                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
3379                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
# Line 3340  for (;;) Line 3388  for (;;)
3388              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3389              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3390              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3391              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
3392              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
3393                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3394              }              }
# Line 3353  for (;;) Line 3401  for (;;)
3401              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3402              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3403              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3404              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
3405              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
3406                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3407              }              }
# Line 3366  for (;;) Line 3414  for (;;)
3414              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3415              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3416              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3417              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_script = UCD_SCRIPT(c);
3418              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
3419                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3420              }              }
# Line 3388  for (;;) Line 3436  for (;;)
3436            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3437            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3438            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3439            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
3440            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3441            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3442              {              {
# Line 3397  for (;;) Line 3445  for (;;)
3445                {                {
3446                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3447                }                }
3448              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
3449              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3450              eptr += len;              eptr += len;
3451              }              }
# Line 3416  for (;;) Line 3464  for (;;)
3464            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
3465            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3466            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max || eptr >= md->end_subject ||
3467                 (ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&                 (ctype == OP_ANY && IS_NEWLINE(eptr)))
                 IS_NEWLINE(eptr)))  
3468              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3469    
3470            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3471            switch(ctype)            switch(ctype)
3472              {              {
3473              case OP_ANY:        /* This is the DOTALL case */              case OP_ANY:        /* This is the non-NL case */
3474              break;              case OP_ALLANY:
   
3475              case OP_ANYBYTE:              case OP_ANYBYTE:
3476              break;              break;
3477    
# Line 3577  for (;;) Line 3623  for (;;)
3623            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
3624            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3625            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max || eptr >= md->end_subject ||
3626                 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))                 (ctype == OP_ANY && IS_NEWLINE(eptr)))
3627              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3628    
3629            c = *eptr++;            c = *eptr++;
3630            switch(ctype)            switch(ctype)
3631              {              {
3632              case OP_ANY:   /* This is the DOTALL case */              case OP_ANY:     /* This is the non-NL case */
3633              break;              case OP_ALLANY:
   
3634              case OP_ANYBYTE:              case OP_ANYBYTE:
3635              break;              break;
3636    
# Line 3718  for (;;) Line 3763  for (;;)
3763              int len = 1;              int len = 1;
3764              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject) break;
3765              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
3766              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
3767              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
3768                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
3769                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
# Line 3733  for (;;) Line 3778  for (;;)
3778              int len = 1;              int len = 1;
3779              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject) break;
3780              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
3781              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
3782              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
3783                break;                break;
3784              eptr+= len;              eptr+= len;
# Line 3746  for (;;) Line 3791  for (;;)
3791              int len = 1;              int len = 1;
3792              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject) break;
3793              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
3794              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
3795              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
3796                break;                break;
3797              eptr+= len;              eptr+= len;
# Line 3759  for (;;) Line 3804  for (;;)
3804              int len = 1;              int len = 1;
3805              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject) break;
3806              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
3807              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_script = UCD_SCRIPT(c);
3808              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
3809                break;                break;
3810              eptr+= len;              eptr+= len;
# Line 3788  for (;;) Line 3833  for (;;)
3833            {            {
3834            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject) break;
3835            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3836            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
3837            if (prop_category == ucp_M) break;            if (prop_category == ucp_M) break;
3838            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3839              {              {
# Line 3797  for (;;) Line 3842  for (;;)
3842                {                {
3843                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3844                }                }
3845              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
3846              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3847              eptr += len;              eptr += len;
3848              }              }
# Line 3819  for (;;) Line 3864  for (;;)
3864                BACKCHAR(eptr);                BACKCHAR(eptr);
3865                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3866                }                }
3867              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
3868              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3869              eptr--;              eptr--;
3870              }              }
# Line 3839  for (;;) Line 3884  for (;;)
3884            case OP_ANY:            case OP_ANY:
3885            if (max < INT_MAX)            if (max < INT_MAX)
3886              {              {
3887              if ((ims & PCRE_DOTALL) == 0)              for (i = min; i < max; i++)
               {  
               for (i = min; i < max; i++)  
                 {  
                 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;  
                 eptr++;  
                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
                 }  
               }  
             else  
3888                {                {
3889                for (i = min; i < max; i++)                if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3890                  {                eptr++;
3891                  if (eptr >= md->end_subject) break;                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                 eptr++;  
                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
                 }  
3892                }                }
3893              }              }
3894    
# Line 3863  for (;;) Line 3896  for (;;)
3896    
3897            else            else
3898              {              {
3899              if ((ims & PCRE_DOTALL) == 0)              for (i = min; i < max; i++)
3900                {                {
3901                for (i = min; i < max; i++)                if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3902                  {                eptr++;
3903                  if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                 eptr++;  
                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
                 }  
3904                }                }
3905              else              }
3906              break;
3907    
3908              case OP_ALLANY:
3909              if (max < INT_MAX)
3910                {
3911                for (i = min; i < max; i++)
3912                {                {
3913                eptr = md->end_subject;                if (eptr >= md->end_subject) break;
3914                  eptr++;
3915                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3916                }                }
3917              }              }
3918              else eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
3919            break;            break;
3920    
3921            /* The byte case is the same as non-UTF8 */            /* The byte case is the same as non-UTF8 */
# Line 4064  for (;;) Line 4103  for (;;)
4103          switch(ctype)          switch(ctype)
4104            {            {
4105            case OP_ANY:            case OP_ANY:
4106            if ((ims & PCRE_DOTALL) == 0)            for (i = min; i < max; i++)
4107              {              {
4108              for (i = min; i < max; i++)              if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
4109                {              eptr++;
               if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;  
               eptr++;  
               }  
             break;  
4110              }              }
4111            /* For DOTALL case, fall through and treat as \C */            break;
4112    
4113              case OP_ALLANY:
4114            case OP_ANYBYTE:            case OP_ANYBYTE:
4115            c = max - min;            c = max - min;
4116            if (c > (unsigned int)(md->end_subject - eptr))            if (c > (unsigned int)(md->end_subject - eptr))
# Line 4450  end_subject = md->end_subject; Line 4486  end_subject = md->end_subject;
4486    
4487  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
4488  utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;  utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
4489    md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
4490    
4491  md->notbol = (options & PCRE_NOTBOL) != 0;  md->notbol = (options & PCRE_NOTBOL) != 0;
4492  md->noteol = (options & PCRE_NOTEOL) != 0;  md->noteol = (options & PCRE_NOTEOL) != 0;

Legend:
Removed from v.298  
changed lines
  Added in v.354

  ViewVC Help
Powered by ViewVC 1.1.5