/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 305 by ph10, Sun Jan 20 20:07:32 2008 UTC revision 384 by ph10, Sun Mar 8 16:27:43 2009 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2008 University of Cambridge             Copyright (c) 1997-2009 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 158  printf("\n"); Line 158  printf("\n");
158    
159  if (length > md->end_subject - eptr) return FALSE;  if (length > md->end_subject - eptr) return FALSE;
160    
161  /* Separate the caselesss case for speed */  /* Separate the caseless case for speed. In UTF-8 mode we can only do this
162    properly if Unicode properties are supported. Otherwise, we can check only
163    ASCII characters. */
164    
165  if ((ims & PCRE_CASELESS) != 0)  if ((ims & PCRE_CASELESS) != 0)
166    {    {
167    #ifdef SUPPORT_UTF8
168    #ifdef SUPPORT_UCP
169      if (md->utf8)
170        {
171        USPTR endptr = eptr + length;
172        while (eptr < endptr)
173          {
174          int c, d;
175          GETCHARINC(c, eptr);
176          GETCHARINC(d, p);
177          if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
178          }
179        }
180      else
181    #endif
182    #endif
183    
184      /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
185      is no UCP support. */
186    
187    while (length-- > 0)    while (length-- > 0)
188      if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;      { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
189    }    }
190    
191    /* In the caseful case, we can just compare the bytes, whether or not we
192    are in UTF-8 mode. */
193    
194  else  else
195    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
196    
# Line 761  for (;;) Line 787  for (;;)
787    
788      case OP_COND:      case OP_COND:
789      case OP_SCOND:      case OP_SCOND:
790        /* Because of the way auto-callout works during compile, a callout item is
791        inserted between OP_COND and an assertion condition. */
792    
793        if (ecode[LINK_SIZE+1] == OP_CALLOUT)
794          {
795          if (pcre_callout != NULL)
796            {
797            pcre_callout_block cb;
798            cb.version          = 1;   /* Version 1 of the callout block */
799            cb.callout_number   = ecode[LINK_SIZE+2];
800            cb.offset_vector    = md->offset_vector;
801            cb.subject          = (PCRE_SPTR)md->start_subject;
802            cb.subject_length   = md->end_subject - md->start_subject;
803            cb.start_match      = mstart - md->start_subject;
804            cb.current_position = eptr - md->start_subject;
805            cb.pattern_position = GET(ecode, LINK_SIZE + 3);
806            cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
807            cb.capture_top      = offset_top/2;
808            cb.capture_last     = md->capture_last;
809            cb.callout_data     = md->callout_data;
810            if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
811            if (rrc < 0) RRETURN(rrc);
812            }
813          ecode += _pcre_OP_lengths[OP_CALLOUT];
814          }
815    
816        /* Now see what the actual condition is */
817    
818      if (ecode[LINK_SIZE+1] == OP_RREF)         /* Recursion test */      if (ecode[LINK_SIZE+1] == OP_RREF)         /* Recursion test */
819        {        {
820        offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/        offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/
# Line 1148  for (;;) Line 1202  for (;;)
1202      do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1203      break;      break;
1204    
1205      /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating      /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1206      that it may occur zero times. It may repeat infinitely, or not at all -      indicating that it may occur zero times. It may repeat infinitely, or not
1207      i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper      at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1208      repeat limits are compiled as a number of copies, with the optional ones      with fixed upper repeat limits are compiled as a number of copies, with the
1209      preceded by BRAZERO or BRAMINZERO. */      optional ones preceded by BRAZERO or BRAMINZERO. */
1210    
1211      case OP_BRAZERO:      case OP_BRAZERO:
1212        {        {
# Line 1174  for (;;) Line 1228  for (;;)
1228        }        }
1229      break;      break;
1230    
1231        case OP_SKIPZERO:
1232          {
1233          next = ecode+1;
1234          do next += GET(next,1); while (*next == OP_ALT);
1235          ecode = next + 1 + LINK_SIZE;
1236          }
1237        break;
1238    
1239      /* End of a group, repeated or non-repeating. */      /* End of a group, repeated or non-repeating. */
1240    
1241      case OP_KET:      case OP_KET:
# Line 1421  for (;;) Line 1483  for (;;)
1483      /* Match a single character type; inline for speed */      /* Match a single character type; inline for speed */
1484    
1485      case OP_ANY:      case OP_ANY:
1486      if ((ims & PCRE_DOTALL) == 0)      if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1487        {      /* Fall through */
1488        if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);  
1489        }      case OP_ALLANY:
1490      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
1491      if (utf8)      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
       while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
1492      ecode++;      ecode++;
1493      break;      break;
1494    
# Line 1646  for (;;) Line 1707  for (;;)
1707      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1708      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1709        {        {
1710        int chartype, script;        const ucd_record *prop = GET_UCD(c);
       int category = _pcre_ucp_findprop(c, &chartype, &script);  
1711    
1712        switch(ecode[1])        switch(ecode[1])
1713          {          {
# Line 1656  for (;;) Line 1716  for (;;)
1716          break;          break;
1717    
1718          case PT_LAMP:          case PT_LAMP:
1719          if ((chartype == ucp_Lu ||          if ((prop->chartype == ucp_Lu ||
1720               chartype == ucp_Ll ||               prop->chartype == ucp_Ll ||
1721               chartype == ucp_Lt) == (op == OP_NOTPROP))               prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
1722            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1723           break;           break;
1724    
1725          case PT_GC:          case PT_GC:
1726          if ((ecode[2] != category) == (op == OP_PROP))          if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
1727            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1728          break;          break;
1729    
1730          case PT_PC:          case PT_PC:
1731          if ((ecode[2] != chartype) == (op == OP_PROP))          if ((ecode[2] != prop->chartype) == (op == OP_PROP))
1732            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1733          break;          break;
1734    
1735          case PT_SC:          case PT_SC:
1736          if ((ecode[2] != script) == (op == OP_PROP))          if ((ecode[2] != prop->script) == (op == OP_PROP))
1737            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1738          break;          break;
1739    
# Line 1692  for (;;) Line 1752  for (;;)
1752      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1753      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1754        {        {
1755        int chartype, script;        int category = UCD_CATEGORY(c);
       int category = _pcre_ucp_findprop(c, &chartype, &script);  
1756        if (category == ucp_M) RRETURN(MATCH_NOMATCH);        if (category == ucp_M) RRETURN(MATCH_NOMATCH);
1757        while (eptr < md->end_subject)        while (eptr < md->end_subject)
1758          {          {
# Line 1702  for (;;) Line 1761  for (;;)
1761            {            {
1762            GETCHARLEN(c, eptr, len);            GETCHARLEN(c, eptr, len);
1763            }            }
1764          category = _pcre_ucp_findprop(c, &chartype, &script);          category = UCD_CATEGORY(c);
1765          if (category != ucp_M) break;          if (category != ucp_M) break;
1766          eptr += len;          eptr += len;
1767          }          }
# Line 1723  for (;;) Line 1782  for (;;)
1782      case OP_REF:      case OP_REF:
1783        {        {
1784        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
1785        ecode += 3;                                 /* Advance past item */        ecode += 3;
1786    
1787          /* If the reference is unset, there are two possibilities:
1788    
1789        /* If the reference is unset, set the length to be longer than the amount        (a) In the default, Perl-compatible state, set the length to be longer
1790        of subject left; this ensures that every attempt at a match fails. We        than the amount of subject left; this ensures that every attempt at a
1791        can't just fail here, because of the possibility of quantifiers with zero        match fails. We can't just fail here, because of the possibility of
1792        minima. */        quantifiers with zero minima.
1793    
1794        length = (offset >= offset_top || md->offset_vector[offset] < 0)?        (b) If the JavaScript compatibility flag is set, set the length to zero
1795          md->end_subject - eptr + 1 :        so that the back reference matches an empty string.
1796          md->offset_vector[offset+1] - md->offset_vector[offset];  
1797          Otherwise, set the length to the length of what was matched by the
1798          referenced subpattern. */
1799    
1800          if (offset >= offset_top || md->offset_vector[offset] < 0)
1801            length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;
1802          else
1803            length = md->offset_vector[offset+1] - md->offset_vector[offset];
1804    
1805        /* Set up for repetition, or handle the non-repeated case */        /* Set up for repetition, or handle the non-repeated case */
1806    
# Line 2007  for (;;) Line 2075  for (;;)
2075    
2076    
2077      /* Match an extended character class. This opcode is encountered only      /* Match an extended character class. This opcode is encountered only
2078      in UTF-8 mode, because that's the only time it is compiled. */      when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2079        mode, because Unicode properties are supported in non-UTF-8 mode. */
2080    
2081  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2082      case OP_XCLASS:      case OP_XCLASS:
# Line 2049  for (;;) Line 2118  for (;;)
2118        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2119          {          {
2120          if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);          if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2121          GETCHARINC(c, eptr);          GETCHARINCTEST(c, eptr);
2122          if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);          if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2123          }          }
2124    
# Line 2068  for (;;) Line 2137  for (;;)
2137            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2138            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2139            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2140            GETCHARINC(c, eptr);            GETCHARINCTEST(c, eptr);
2141            if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);            if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2142            }            }
2143          /* Control never gets here */          /* Control never gets here */
# Line 2083  for (;;) Line 2152  for (;;)
2152            {            {
2153            int len = 1;            int len = 1;
2154            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject) break;
2155            GETCHARLEN(c, eptr, len);            GETCHARLENTEST(c, eptr, len);
2156            if (!_pcre_xclass(c, data)) break;            if (!_pcre_xclass(c, data)) break;
2157            eptr += len;            eptr += len;
2158            }            }
# Line 2158  for (;;) Line 2227  for (;;)
2227          if (fc != dc)          if (fc != dc)
2228            {            {
2229  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2230            if (dc != _pcre_ucp_othercase(fc))            if (dc != UCD_OTHERCASE(fc))
2231  #endif  #endif
2232              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2233            }            }
# Line 2249  for (;;) Line 2318  for (;;)
2318  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2319          unsigned int othercase;          unsigned int othercase;
2320          if ((ims & PCRE_CASELESS) != 0 &&          if ((ims & PCRE_CASELESS) != 0 &&
2321              (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)              (othercase = UCD_OTHERCASE(fc)) != fc)
2322            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = _pcre_ord2utf8(othercase, occhars);
2323          else oclength = 0;          else oclength = 0;
2324  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
# Line 2569  for (;;) Line 2638  for (;;)
2638              {              {
2639              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
2640              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2641                if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2642              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
2643              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
2644              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) RRETURN(MATCH_NOMATCH);
2645                RRETURN(MATCH_NOMATCH);  
2646              }              }
2647            }            }
2648          else          else
# Line 2678  for (;;) Line 2748  for (;;)
2748              {              {
2749              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
2750              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2751                if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2752              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
2753              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) RRETURN(MATCH_NOMATCH);
               RRETURN(MATCH_NOMATCH);  
2754              }              }
2755            }            }
2756          else          else
# Line 2854  for (;;) Line 2924  for (;;)
2924              {              {
2925              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2926              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
2927              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
2928              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
2929                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
2930                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
# Line 2867  for (;;) Line 2937  for (;;)
2937              {              {
2938              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2939              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
2940              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
2941              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
2942                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2943              }              }
# Line 2878  for (;;) Line 2948  for (;;)
2948              {              {
2949              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2950              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
2951              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
2952              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
2953                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2954              }              }
# Line 2889  for (;;) Line 2959  for (;;)
2959              {              {
2960              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2961              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
2962              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_script = UCD_SCRIPT(c);
2963              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
2964                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2965              }              }
# Line 2908  for (;;) Line 2978  for (;;)
2978          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2979            {            {
2980            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
2981            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
2982            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
2983            while (eptr < md->end_subject)            while (eptr < md->end_subject)
2984              {              {
# Line 2917  for (;;) Line 2987  for (;;)
2987                {                {
2988                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
2989                }                }
2990              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
2991              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
2992              eptr += len;              eptr += len;
2993              }              }
# Line 2935  for (;;) Line 3005  for (;;)
3005          case OP_ANY:          case OP_ANY:
3006          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3007            {            {
3008            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject || IS_NEWLINE(eptr))
                ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))  
3009              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3010            eptr++;            eptr++;
3011            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3012            }            }
3013          break;          break;
3014    
3015            case OP_ALLANY:
3016            for (i = 1; i <= min; i++)
3017              {
3018              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3019              eptr++;
3020              while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3021              }
3022            break;
3023    
3024          case OP_ANYBYTE:          case OP_ANYBYTE:
3025          eptr += min;          eptr += min;
3026          break;          break;
# Line 3151  for (;;) Line 3229  for (;;)
3229        switch(ctype)        switch(ctype)
3230          {          {
3231          case OP_ANY:          case OP_ANY:
3232          if ((ims & PCRE_DOTALL) == 0)          for (i = 1; i <= min; i++)
3233            {            {
3234            for (i = 1; i <= min; i++)            if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3235              {            eptr++;
             if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);  
             eptr++;  
             }  
3236            }            }
3237          else eptr += min;          break;
3238    
3239            case OP_ALLANY:
3240            eptr += min;
3241          break;          break;
3242    
3243          case OP_ANYBYTE:          case OP_ANYBYTE:
# Line 3325  for (;;) Line 3403  for (;;)
3403              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3404              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3405              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3406              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
3407              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
3408                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
3409                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
# Line 3340  for (;;) Line 3418  for (;;)
3418              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3419              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3420              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3421              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
3422              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
3423                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3424              }              }
# Line 3353  for (;;) Line 3431  for (;;)
3431              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3432              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3433              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3434              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
3435              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
3436                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3437              }              }
# Line 3366  for (;;) Line 3444  for (;;)
3444              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3445              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3446              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3447              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_script = UCD_SCRIPT(c);
3448              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
3449                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3450              }              }
# Line 3388  for (;;) Line 3466  for (;;)
3466            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3467            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3468            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3469            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
3470            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3471            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3472              {              {
# Line 3397  for (;;) Line 3475  for (;;)
3475                {                {
3476                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3477                }                }
3478              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
3479              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3480              eptr += len;              eptr += len;
3481              }              }
# Line 3416  for (;;) Line 3494  for (;;)
3494            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
3495            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3496            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max || eptr >= md->end_subject ||
3497                 (ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&                 (ctype == OP_ANY && IS_NEWLINE(eptr)))
                 IS_NEWLINE(eptr)))  
3498              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3499    
3500            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3501            switch(ctype)            switch(ctype)
3502              {              {
3503              case OP_ANY:        /* This is the DOTALL case */              case OP_ANY:        /* This is the non-NL case */
3504              break;              case OP_ALLANY:
   
3505              case OP_ANYBYTE:              case OP_ANYBYTE:
3506              break;              break;
3507    
# Line 3577  for (;;) Line 3653  for (;;)
3653            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
3654            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3655            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max || eptr >= md->end_subject ||
3656                 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))                 (ctype == OP_ANY && IS_NEWLINE(eptr)))
3657              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3658    
3659            c = *eptr++;            c = *eptr++;
3660            switch(ctype)            switch(ctype)
3661              {              {
3662              case OP_ANY:   /* This is the DOTALL case */              case OP_ANY:     /* This is the non-NL case */
3663              break;              case OP_ALLANY:
   
3664              case OP_ANYBYTE:              case OP_ANYBYTE:
3665              break;              break;
3666    
# Line 3718  for (;;) Line 3793  for (;;)
3793              int len = 1;              int len = 1;
3794              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject) break;
3795              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
3796              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
3797              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
3798                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
3799                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
# Line 3733  for (;;) Line 3808  for (;;)
3808              int len = 1;              int len = 1;
3809              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject) break;
3810              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
3811              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
3812              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
3813                break;                break;
3814              eptr+= len;              eptr+= len;
# Line 3746  for (;;) Line 3821  for (;;)
3821              int len = 1;              int len = 1;
3822              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject) break;
3823              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
3824              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
3825              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
3826                break;                break;
3827              eptr+= len;              eptr+= len;
# Line 3759  for (;;) Line 3834  for (;;)
3834              int len = 1;              int len = 1;
3835              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject) break;
3836              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
3837              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_script = UCD_SCRIPT(c);
3838              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
3839                break;                break;
3840              eptr+= len;              eptr+= len;
# Line 3788  for (;;) Line 3863  for (;;)
3863            {            {
3864            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject) break;
3865            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3866            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
3867            if (prop_category == ucp_M) break;            if (prop_category == ucp_M) break;
3868            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3869              {              {
# Line 3797  for (;;) Line 3872  for (;;)
3872                {                {
3873                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3874                }                }
3875              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
3876              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3877              eptr += len;              eptr += len;
3878              }              }
# Line 3819  for (;;) Line 3894  for (;;)
3894                BACKCHAR(eptr);                BACKCHAR(eptr);
3895                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3896                }                }
3897              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
3898              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3899              eptr--;              eptr--;
3900              }              }
# Line 3839  for (;;) Line 3914  for (;;)
3914            case OP_ANY:            case OP_ANY:
3915            if (max < INT_MAX)            if (max < INT_MAX)
3916              {              {
3917              if ((ims & PCRE_DOTALL) == 0)              for (i = min; i < max; i++)
               {  
               for (i = min; i < max; i++)  
                 {  
                 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;  
                 eptr++;  
                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
                 }  
               }  
             else  
3918                {                {
3919                for (i = min; i < max; i++)                if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3920                  {                eptr++;
3921                  if (eptr >= md->end_subject) break;                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                 eptr++;  
                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
                 }  
3922                }                }
3923              }              }
3924    
# Line 3863  for (;;) Line 3926  for (;;)
3926    
3927            else            else
3928              {              {
3929              if ((ims & PCRE_DOTALL) == 0)              for (i = min; i < max; i++)
3930                {                {
3931                for (i = min; i < max; i++)                if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3932                  {                eptr++;
3933                  if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                 eptr++;  
                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
                 }  
3934                }                }
3935              else              }
3936              break;
3937    
3938              case OP_ALLANY:
3939              if (max < INT_MAX)
3940                {
3941                for (i = min; i < max; i++)
3942                {                {
3943                eptr = md->end_subject;                if (eptr >= md->end_subject) break;
3944                  eptr++;
3945                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3946                }                }
3947              }              }
3948              else eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
3949            break;            break;
3950    
3951            /* The byte case is the same as non-UTF8 */            /* The byte case is the same as non-UTF8 */
# Line 4064  for (;;) Line 4133  for (;;)
4133          switch(ctype)          switch(ctype)
4134            {            {
4135            case OP_ANY:            case OP_ANY:
4136            if ((ims & PCRE_DOTALL) == 0)            for (i = min; i < max; i++)
4137              {              {
4138              for (i = min; i < max; i++)              if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
4139                {              eptr++;
               if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;  
               eptr++;  
               }  
             break;  
4140              }              }
4141            /* For DOTALL case, fall through and treat as \C */            break;
4142    
4143              case OP_ALLANY:
4144            case OP_ANYBYTE:            case OP_ANYBYTE:
4145            c = max - min;            c = max - min;
4146            if (c > (unsigned int)(md->end_subject - eptr))            if (c > (unsigned int)(md->end_subject - eptr))
# Line 4348  Returns:          > 0 => success; value Line 4414  Returns:          > 0 => success; value
4414                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
4415  */  */
4416    
4417  PCRE_EXP_DEFN int  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
4418  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
4419    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
4420    int offsetcount)    int offsetcount)
# Line 4450  end_subject = md->end_subject; Line 4516  end_subject = md->end_subject;
4516    
4517  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
4518  utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;  utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
4519    md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
4520    
4521  md->notbol = (options & PCRE_NOTBOL) != 0;  md->notbol = (options & PCRE_NOTBOL) != 0;
4522  md->noteol = (options & PCRE_NOTEOL) != 0;  md->noteol = (options & PCRE_NOTEOL) != 0;
# Line 4659  for(;;) Line 4726  for(;;)
4726    if (firstline)    if (firstline)
4727      {      {
4728      USPTR t = start_match;      USPTR t = start_match;
4729    #ifdef SUPPORT_UTF8
4730        if (utf8)
4731          {
4732          while (t < md->end_subject && !IS_NEWLINE(t))
4733            {
4734            t++;
4735            while (t < end_subject && (*t & 0xc0) == 0x80) t++;
4736            }
4737          }
4738        else
4739    #endif
4740      while (t < md->end_subject && !IS_NEWLINE(t)) t++;      while (t < md->end_subject && !IS_NEWLINE(t)) t++;
4741      end_subject = t;      end_subject = t;
4742      }      }
4743    
4744    /* Now test for a unique first byte */    /* Now advance to a unique first byte if there is one. */
4745    
4746    if (first_byte >= 0)    if (first_byte >= 0)
4747      {      {
4748      if (first_byte_caseless)      if (first_byte_caseless)
4749        while (start_match < end_subject &&        while (start_match < end_subject && md->lcc[*start_match] != first_byte)
4750               md->lcc[*start_match] != first_byte)          start_match++;
         { NEXTCHAR(start_match); }  
4751      else      else
4752        while (start_match < end_subject && *start_match != first_byte)        while (start_match < end_subject && *start_match != first_byte)
4753          { NEXTCHAR(start_match); }          start_match++;
4754      }      }
4755    
4756    /* Or to just after a linebreak for a multiline match if possible */    /* Or to just after a linebreak for a multiline match */
4757    
4758    else if (startline)    else if (startline)
4759      {      {
4760      if (start_match > md->start_subject + start_offset)      if (start_match > md->start_subject + start_offset)
4761        {        {
4762        while (start_match <= end_subject && !WAS_NEWLINE(start_match))  #ifdef SUPPORT_UTF8
4763          { NEXTCHAR(start_match); }        if (utf8)
4764            {
4765            while (start_match < end_subject && !WAS_NEWLINE(start_match))
4766              {
4767              start_match++;
4768              while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
4769                start_match++;
4770              }
4771            }
4772          else
4773    #endif
4774          while (start_match < end_subject && !WAS_NEWLINE(start_match))
4775            start_match++;
4776    
4777        /* If we have just passed a CR and the newline option is ANY or ANYCRLF,        /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
4778        and we are now at a LF, advance the match position by one more character.        and we are now at a LF, advance the match position by one more character.
# Line 4697  for(;;) Line 4786  for(;;)
4786        }        }
4787      }      }
4788    
4789    /* Or to a non-unique first char after study */    /* Or to a non-unique first byte after study */
4790    
4791    else if (start_bits != NULL)    else if (start_bits != NULL)
4792      {      {
4793      while (start_match < end_subject)      while (start_match < end_subject)
4794        {        {
4795        register unsigned int c = *start_match;        register unsigned int c = *start_match;
4796        if ((start_bits[c/8] & (1 << (c&7))) == 0)        if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++;
4797          { NEXTCHAR(start_match); }          else break;
       else break;  
4798        }        }
4799      }      }
4800    

Legend:
Removed from v.305  
changed lines
  Added in v.384

  ViewVC Help
Powered by ViewVC 1.1.5