/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 236 by ph10, Tue Sep 11 12:57:06 2007 UTC revision 399 by ph10, Sat Mar 21 12:34:15 2009 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2007 University of Cambridge             Copyright (c) 1997-2009 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 158  printf("\n"); Line 158  printf("\n");
158    
159  if (length > md->end_subject - eptr) return FALSE;  if (length > md->end_subject - eptr) return FALSE;
160    
161  /* Separate the caselesss case for speed */  /* Separate the caseless case for speed. In UTF-8 mode we can only do this
162    properly if Unicode properties are supported. Otherwise, we can check only
163    ASCII characters. */
164    
165  if ((ims & PCRE_CASELESS) != 0)  if ((ims & PCRE_CASELESS) != 0)
166    {    {
167    #ifdef SUPPORT_UTF8
168    #ifdef SUPPORT_UCP
169      if (md->utf8)
170        {
171        USPTR endptr = eptr + length;
172        while (eptr < endptr)
173          {
174          int c, d;
175          GETCHARINC(c, eptr);
176          GETCHARINC(d, p);
177          if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
178          }
179        }
180      else
181    #endif
182    #endif
183    
184      /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
185      is no UCP support. */
186    
187    while (length-- > 0)    while (length-- > 0)
188      if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;      { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
189    }    }
190    
191    /* In the caseful case, we can just compare the bytes, whether or not we
192    are in UTF-8 mode. */
193    
194  else  else
195    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
196    
# Line 535  int oclength; Line 561  int oclength;
561  uschar occhars[8];  uschar occhars[8];
562  #endif  #endif
563    
564    int codelink;
565    int condcode;
566  int ctype;  int ctype;
567  int length;  int length;
568  int max;  int max;
# Line 609  for (;;) Line 637  for (;;)
637    {    {
638    minimize = possessive = FALSE;    minimize = possessive = FALSE;
639    op = *ecode;    op = *ecode;
640    
641    /* For partial matching, remember if we ever hit the end of the subject after    /* For partial matching, remember if we ever hit the end of the subject after
642    matching at least one subject character. */    matching at least one subject character. */
643    
# Line 761  for (;;) Line 789  for (;;)
789    
790      case OP_COND:      case OP_COND:
791      case OP_SCOND:      case OP_SCOND:
792      if (ecode[LINK_SIZE+1] == OP_RREF)         /* Recursion test */      codelink= GET(ecode, 1);
793    
794        /* Because of the way auto-callout works during compile, a callout item is
795        inserted between OP_COND and an assertion condition. */
796    
797        if (ecode[LINK_SIZE+1] == OP_CALLOUT)
798          {
799          if (pcre_callout != NULL)
800            {
801            pcre_callout_block cb;
802            cb.version          = 1;   /* Version 1 of the callout block */
803            cb.callout_number   = ecode[LINK_SIZE+2];
804            cb.offset_vector    = md->offset_vector;
805            cb.subject          = (PCRE_SPTR)md->start_subject;
806            cb.subject_length   = md->end_subject - md->start_subject;
807            cb.start_match      = mstart - md->start_subject;
808            cb.current_position = eptr - md->start_subject;
809            cb.pattern_position = GET(ecode, LINK_SIZE + 3);
810            cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
811            cb.capture_top      = offset_top/2;
812            cb.capture_last     = md->capture_last;
813            cb.callout_data     = md->callout_data;
814            if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
815            if (rrc < 0) RRETURN(rrc);
816            }
817          ecode += _pcre_OP_lengths[OP_CALLOUT];
818          }
819    
820        condcode = ecode[LINK_SIZE+1];
821    
822        /* Now see what the actual condition is */
823    
824        if (condcode == OP_RREF)         /* Recursion test */
825        {        {
826        offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/        offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/
827        condition = md->recursive != NULL &&        condition = md->recursive != NULL &&
# Line 769  for (;;) Line 829  for (;;)
829        ecode += condition? 3 : GET(ecode, 1);        ecode += condition? 3 : GET(ecode, 1);
830        }        }
831    
832      else if (ecode[LINK_SIZE+1] == OP_CREF)    /* Group used test */      else if (condcode == OP_CREF)    /* Group used test */
833        {        {
834        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
835        condition = offset < offset_top && md->offset_vector[offset] >= 0;        condition = offset < offset_top && md->offset_vector[offset] >= 0;
836        ecode += condition? 3 : GET(ecode, 1);        ecode += condition? 3 : GET(ecode, 1);
837        }        }
838    
839      else if (ecode[LINK_SIZE+1] == OP_DEF)     /* DEFINE - always false */      else if (condcode == OP_DEF)     /* DEFINE - always false */
840        {        {
841        condition = FALSE;        condition = FALSE;
842        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
# Line 803  for (;;) Line 863  for (;;)
863        else        else
864          {          {
865          condition = FALSE;          condition = FALSE;
866          ecode += GET(ecode, 1);          ecode += codelink;
867          }          }
868        }        }
869    
# Line 826  for (;;) Line 886  for (;;)
886          goto TAIL_RECURSE;          goto TAIL_RECURSE;
887          }          }
888        }        }
889      else                         /* Condition false & no 2nd alternative */      else                         /* Condition false & no alternative */
890        {        {
891        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
892        }        }
# Line 1148  for (;;) Line 1208  for (;;)
1208      do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1209      break;      break;
1210    
1211      /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating      /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1212      that it may occur zero times. It may repeat infinitely, or not at all -      indicating that it may occur zero times. It may repeat infinitely, or not
1213      i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper      at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1214      repeat limits are compiled as a number of copies, with the optional ones      with fixed upper repeat limits are compiled as a number of copies, with the
1215      preceded by BRAZERO or BRAMINZERO. */      optional ones preceded by BRAZERO or BRAMINZERO. */
1216    
1217      case OP_BRAZERO:      case OP_BRAZERO:
1218        {        {
# Line 1174  for (;;) Line 1234  for (;;)
1234        }        }
1235      break;      break;
1236    
1237        case OP_SKIPZERO:
1238          {
1239          next = ecode+1;
1240          do next += GET(next,1); while (*next == OP_ALT);
1241          ecode = next + 1 + LINK_SIZE;
1242          }
1243        break;
1244    
1245      /* End of a group, repeated or non-repeating. */      /* End of a group, repeated or non-repeating. */
1246    
1247      case OP_KET:      case OP_KET:
# Line 1421  for (;;) Line 1489  for (;;)
1489      /* Match a single character type; inline for speed */      /* Match a single character type; inline for speed */
1490    
1491      case OP_ANY:      case OP_ANY:
1492      if ((ims & PCRE_DOTALL) == 0)      if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1493        {      /* Fall through */
1494        if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);  
1495        }      case OP_ALLANY:
1496      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
1497      if (utf8)      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
       while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
1498      ecode++;      ecode++;
1499      break;      break;
1500    
# Line 1646  for (;;) Line 1713  for (;;)
1713      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1714      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1715        {        {
1716        int chartype, script;        const ucd_record *prop = GET_UCD(c);
       int category = _pcre_ucp_findprop(c, &chartype, &script);  
1717    
1718        switch(ecode[1])        switch(ecode[1])
1719          {          {
# Line 1656  for (;;) Line 1722  for (;;)
1722          break;          break;
1723    
1724          case PT_LAMP:          case PT_LAMP:
1725          if ((chartype == ucp_Lu ||          if ((prop->chartype == ucp_Lu ||
1726               chartype == ucp_Ll ||               prop->chartype == ucp_Ll ||
1727               chartype == ucp_Lt) == (op == OP_NOTPROP))               prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
1728            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1729           break;           break;
1730    
1731          case PT_GC:          case PT_GC:
1732          if ((ecode[2] != category) == (op == OP_PROP))          if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
1733            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1734          break;          break;
1735    
1736          case PT_PC:          case PT_PC:
1737          if ((ecode[2] != chartype) == (op == OP_PROP))          if ((ecode[2] != prop->chartype) == (op == OP_PROP))
1738            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1739          break;          break;
1740    
1741          case PT_SC:          case PT_SC:
1742          if ((ecode[2] != script) == (op == OP_PROP))          if ((ecode[2] != prop->script) == (op == OP_PROP))
1743            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1744          break;          break;
1745    
# Line 1692  for (;;) Line 1758  for (;;)
1758      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1759      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1760        {        {
1761        int chartype, script;        int category = UCD_CATEGORY(c);
       int category = _pcre_ucp_findprop(c, &chartype, &script);  
1762        if (category == ucp_M) RRETURN(MATCH_NOMATCH);        if (category == ucp_M) RRETURN(MATCH_NOMATCH);
1763        while (eptr < md->end_subject)        while (eptr < md->end_subject)
1764          {          {
# Line 1702  for (;;) Line 1767  for (;;)
1767            {            {
1768            GETCHARLEN(c, eptr, len);            GETCHARLEN(c, eptr, len);
1769            }            }
1770          category = _pcre_ucp_findprop(c, &chartype, &script);          category = UCD_CATEGORY(c);
1771          if (category != ucp_M) break;          if (category != ucp_M) break;
1772          eptr += len;          eptr += len;
1773          }          }
# Line 1723  for (;;) Line 1788  for (;;)
1788      case OP_REF:      case OP_REF:
1789        {        {
1790        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
1791        ecode += 3;                                 /* Advance past item */        ecode += 3;
1792    
1793          /* If the reference is unset, there are two possibilities:
1794    
1795          (a) In the default, Perl-compatible state, set the length to be longer
1796          than the amount of subject left; this ensures that every attempt at a
1797          match fails. We can't just fail here, because of the possibility of
1798          quantifiers with zero minima.
1799    
1800          (b) If the JavaScript compatibility flag is set, set the length to zero
1801          so that the back reference matches an empty string.
1802    
1803        /* If the reference is unset, set the length to be longer than the amount        Otherwise, set the length to the length of what was matched by the
1804        of subject left; this ensures that every attempt at a match fails. We        referenced subpattern. */
1805        can't just fail here, because of the possibility of quantifiers with zero  
1806        minima. */        if (offset >= offset_top || md->offset_vector[offset] < 0)
1807            length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;
1808        length = (offset >= offset_top || md->offset_vector[offset] < 0)?        else
1809          md->end_subject - eptr + 1 :          length = md->offset_vector[offset+1] - md->offset_vector[offset];
         md->offset_vector[offset+1] - md->offset_vector[offset];  
1810    
1811        /* Set up for repetition, or handle the non-repeated case */        /* Set up for repetition, or handle the non-repeated case */
1812    
# Line 2007  for (;;) Line 2081  for (;;)
2081    
2082    
2083      /* Match an extended character class. This opcode is encountered only      /* Match an extended character class. This opcode is encountered only
2084      in UTF-8 mode, because that's the only time it is compiled. */      when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2085        mode, because Unicode properties are supported in non-UTF-8 mode. */
2086    
2087  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2088      case OP_XCLASS:      case OP_XCLASS:
# Line 2049  for (;;) Line 2124  for (;;)
2124        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2125          {          {
2126          if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);          if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2127          GETCHARINC(c, eptr);          GETCHARINCTEST(c, eptr);
2128          if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);          if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2129          }          }
2130    
# Line 2068  for (;;) Line 2143  for (;;)
2143            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2144            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2145            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2146            GETCHARINC(c, eptr);            GETCHARINCTEST(c, eptr);
2147            if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);            if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2148            }            }
2149          /* Control never gets here */          /* Control never gets here */
# Line 2083  for (;;) Line 2158  for (;;)
2158            {            {
2159            int len = 1;            int len = 1;
2160            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject) break;
2161            GETCHARLEN(c, eptr, len);            GETCHARLENTEST(c, eptr, len);
2162            if (!_pcre_xclass(c, data)) break;            if (!_pcre_xclass(c, data)) break;
2163            eptr += len;            eptr += len;
2164            }            }
# Line 2158  for (;;) Line 2233  for (;;)
2233          if (fc != dc)          if (fc != dc)
2234            {            {
2235  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2236            if (dc != _pcre_ucp_othercase(fc))            if (dc != UCD_OTHERCASE(fc))
2237  #endif  #endif
2238              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2239            }            }
# Line 2249  for (;;) Line 2324  for (;;)
2324  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2325          unsigned int othercase;          unsigned int othercase;
2326          if ((ims & PCRE_CASELESS) != 0 &&          if ((ims & PCRE_CASELESS) != 0 &&
2327              (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)              (othercase = UCD_OTHERCASE(fc)) != fc)
2328            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = _pcre_ord2utf8(othercase, occhars);
2329          else oclength = 0;          else oclength = 0;
2330  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
# Line 2569  for (;;) Line 2644  for (;;)
2644              {              {
2645              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
2646              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2647                if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2648              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
2649              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
2650              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) RRETURN(MATCH_NOMATCH);
2651                RRETURN(MATCH_NOMATCH);  
2652              }              }
2653            }            }
2654          else          else
# Line 2678  for (;;) Line 2754  for (;;)
2754              {              {
2755              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
2756              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2757                if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2758              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
2759              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) RRETURN(MATCH_NOMATCH);
               RRETURN(MATCH_NOMATCH);  
2760              }              }
2761            }            }
2762          else          else
# Line 2854  for (;;) Line 2930  for (;;)
2930              {              {
2931              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2932              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
2933              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
2934              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
2935                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
2936                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
# Line 2867  for (;;) Line 2943  for (;;)
2943              {              {
2944              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2945              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
2946              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
2947              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
2948                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2949              }              }
# Line 2878  for (;;) Line 2954  for (;;)
2954              {              {
2955              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2956              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
2957              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
2958              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
2959                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2960              }              }
# Line 2889  for (;;) Line 2965  for (;;)
2965              {              {
2966              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2967              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
2968              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_script = UCD_SCRIPT(c);
2969              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
2970                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2971              }              }
# Line 2908  for (;;) Line 2984  for (;;)
2984          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2985            {            {
2986            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
2987            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
2988            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
2989            while (eptr < md->end_subject)            while (eptr < md->end_subject)
2990              {              {
# Line 2917  for (;;) Line 2993  for (;;)
2993                {                {
2994                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
2995                }                }
2996              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
2997              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
2998              eptr += len;              eptr += len;
2999              }              }
# Line 2935  for (;;) Line 3011  for (;;)
3011          case OP_ANY:          case OP_ANY:
3012          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3013            {            {
3014            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject || IS_NEWLINE(eptr))
                ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))  
3015              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3016            eptr++;            eptr++;
3017            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3018            }            }
3019          break;          break;
3020    
3021            case OP_ALLANY:
3022            for (i = 1; i <= min; i++)
3023              {
3024              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3025              eptr++;
3026              while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3027              }
3028            break;
3029    
3030          case OP_ANYBYTE:          case OP_ANYBYTE:
3031          eptr += min;          eptr += min;
3032          break;          break;
# Line 3151  for (;;) Line 3235  for (;;)
3235        switch(ctype)        switch(ctype)
3236          {          {
3237          case OP_ANY:          case OP_ANY:
3238          if ((ims & PCRE_DOTALL) == 0)          for (i = 1; i <= min; i++)
3239            {            {
3240            for (i = 1; i <= min; i++)            if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3241              {            eptr++;
             if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);  
             eptr++;  
             }  
3242            }            }
3243          else eptr += min;          break;
3244    
3245            case OP_ALLANY:
3246            eptr += min;
3247          break;          break;
3248    
3249          case OP_ANYBYTE:          case OP_ANYBYTE:
# Line 3325  for (;;) Line 3409  for (;;)
3409              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3410              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3411              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3412              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
3413              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
3414                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
3415                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
# Line 3340  for (;;) Line 3424  for (;;)
3424              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3425              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3426              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3427              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
3428              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
3429                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3430              }              }
# Line 3353  for (;;) Line 3437  for (;;)
3437              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3438              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3439              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3440              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
3441              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
3442                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3443              }              }
# Line 3366  for (;;) Line 3450  for (;;)
3450              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3451              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3452              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3453              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_script = UCD_SCRIPT(c);
3454              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
3455                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3456              }              }
# Line 3388  for (;;) Line 3472  for (;;)
3472            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3473            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3474            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3475            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
3476            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3477            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3478              {              {
# Line 3397  for (;;) Line 3481  for (;;)
3481                {                {
3482                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3483                }                }
3484              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
3485              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3486              eptr += len;              eptr += len;
3487              }              }
# Line 3416  for (;;) Line 3500  for (;;)
3500            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
3501            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3502            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max || eptr >= md->end_subject ||
3503                 (ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&                 (ctype == OP_ANY && IS_NEWLINE(eptr)))
                 IS_NEWLINE(eptr)))  
3504              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3505    
3506            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3507            switch(ctype)            switch(ctype)
3508              {              {
3509              case OP_ANY:        /* This is the DOTALL case */              case OP_ANY:        /* This is the non-NL case */
3510              break;              case OP_ALLANY:
   
3511              case OP_ANYBYTE:              case OP_ANYBYTE:
3512              break;              break;
3513    
# Line 3577  for (;;) Line 3659  for (;;)
3659            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
3660            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3661            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max || eptr >= md->end_subject ||
3662                 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))                 (ctype == OP_ANY && IS_NEWLINE(eptr)))
3663              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3664    
3665            c = *eptr++;            c = *eptr++;
3666            switch(ctype)            switch(ctype)
3667              {              {
3668              case OP_ANY:   /* This is the DOTALL case */              case OP_ANY:     /* This is the non-NL case */
3669              break;              case OP_ALLANY:
   
3670              case OP_ANYBYTE:              case OP_ANYBYTE:
3671              break;              break;
3672    
# Line 3718  for (;;) Line 3799  for (;;)
3799              int len = 1;              int len = 1;
3800              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject) break;
3801              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
3802              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
3803              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
3804                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
3805                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
# Line 3733  for (;;) Line 3814  for (;;)
3814              int len = 1;              int len = 1;
3815              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject) break;
3816              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
3817              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
3818              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
3819                break;                break;
3820              eptr+= len;              eptr+= len;
# Line 3746  for (;;) Line 3827  for (;;)
3827              int len = 1;              int len = 1;
3828              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject) break;
3829              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
3830              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
3831              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
3832                break;                break;
3833              eptr+= len;              eptr+= len;
# Line 3759  for (;;) Line 3840  for (;;)
3840              int len = 1;              int len = 1;
3841              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject) break;
3842              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
3843              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_script = UCD_SCRIPT(c);
3844              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
3845                break;                break;
3846              eptr+= len;              eptr+= len;
# Line 3788  for (;;) Line 3869  for (;;)
3869            {            {
3870            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject) break;
3871            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3872            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
3873            if (prop_category == ucp_M) break;            if (prop_category == ucp_M) break;
3874            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3875              {              {
# Line 3797  for (;;) Line 3878  for (;;)
3878                {                {
3879                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3880                }                }
3881              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
3882              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3883              eptr += len;              eptr += len;
3884              }              }
# Line 3819  for (;;) Line 3900  for (;;)
3900                BACKCHAR(eptr);                BACKCHAR(eptr);
3901                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3902                }                }
3903              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
3904              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3905              eptr--;              eptr--;
3906              }              }
# Line 3839  for (;;) Line 3920  for (;;)
3920            case OP_ANY:            case OP_ANY:
3921            if (max < INT_MAX)            if (max < INT_MAX)
3922              {              {
3923              if ((ims & PCRE_DOTALL) == 0)              for (i = min; i < max; i++)
               {  
               for (i = min; i < max; i++)  
                 {  
                 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;  
                 eptr++;  
                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
                 }  
               }  
             else  
3924                {                {
3925                for (i = min; i < max; i++)                if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3926                  {                eptr++;
3927                  if (eptr >= md->end_subject) break;                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                 eptr++;  
                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
                 }  
3928                }                }
3929              }              }
3930    
# Line 3863  for (;;) Line 3932  for (;;)
3932    
3933            else            else
3934              {              {
3935              if ((ims & PCRE_DOTALL) == 0)              for (i = min; i < max; i++)
3936                {                {
3937                for (i = min; i < max; i++)                if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3938                  {                eptr++;
3939                  if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                 eptr++;  
                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
                 }  
3940                }                }
3941              else              }
3942              break;
3943    
3944              case OP_ALLANY:
3945              if (max < INT_MAX)
3946                {
3947                for (i = min; i < max; i++)
3948                {                {
3949                eptr = md->end_subject;                if (eptr >= md->end_subject) break;
3950                  eptr++;
3951                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3952                }                }
3953              }              }
3954              else eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
3955            break;            break;
3956    
3957            /* The byte case is the same as non-UTF8 */            /* The byte case is the same as non-UTF8 */
# Line 4064  for (;;) Line 4139  for (;;)
4139          switch(ctype)          switch(ctype)
4140            {            {
4141            case OP_ANY:            case OP_ANY:
4142            if ((ims & PCRE_DOTALL) == 0)            for (i = min; i < max; i++)
4143              {              {
4144              for (i = min; i < max; i++)              if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
4145                {              eptr++;
               if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;  
               eptr++;  
               }  
             break;  
4146              }              }
4147            /* For DOTALL case, fall through and treat as \C */            break;
4148    
4149              case OP_ALLANY:
4150            case OP_ANYBYTE:            case OP_ANYBYTE:
4151            c = max - min;            c = max - min;
4152            if (c > (unsigned int)(md->end_subject - eptr))            if (c > (unsigned int)(md->end_subject - eptr))
# Line 4246  HEAP_RETURN: Line 4318  HEAP_RETURN:
4318  switch (frame->Xwhere)  switch (frame->Xwhere)
4319    {    {
4320    LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)    LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
4321    LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(16)    LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
4322    LBL(17) LBL(18) LBL(19) LBL(20) LBL(21) LBL(22) LBL(23) LBL(24)    LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
4323    LBL(25) LBL(26) LBL(27) LBL(28) LBL(29) LBL(30) LBL(31) LBL(32)    LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
4324    LBL(33) LBL(34) LBL(35) LBL(36) LBL(37) LBL(38) LBL(39) LBL(40)    LBL(53) LBL(54)
4325    LBL(41) LBL(42) LBL(43) LBL(44) LBL(45) LBL(46) LBL(47) LBL(48)  #ifdef SUPPORT_UTF8
4326    LBL(49) LBL(50) LBL(51) LBL(52) LBL(53) LBL(54)    LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)
4327      LBL(32) LBL(34) LBL(42) LBL(46)
4328    #ifdef SUPPORT_UCP
4329      LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
4330    #endif  /* SUPPORT_UCP */
4331    #endif  /* SUPPORT_UTF8 */
4332    default:    default:
4333    DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));    DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
4334    return PCRE_ERROR_INTERNAL;    return PCRE_ERROR_INTERNAL;
# Line 4343  Returns:          > 0 => success; value Line 4420  Returns:          > 0 => success; value
4420                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
4421  */  */
4422    
4423  PCRE_EXP_DEFN int  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
4424  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
4425    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
4426    int offsetcount)    int offsetcount)
# Line 4445  end_subject = md->end_subject; Line 4522  end_subject = md->end_subject;
4522    
4523  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
4524  utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;  utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
4525    md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
4526    
4527  md->notbol = (options & PCRE_NOTBOL) != 0;  md->notbol = (options & PCRE_NOTBOL) != 0;
4528  md->noteol = (options & PCRE_NOTEOL) != 0;  md->noteol = (options & PCRE_NOTEOL) != 0;
# Line 4469  switch (options & (PCRE_BSR_ANYCRLF|PCRE Line 4547  switch (options & (PCRE_BSR_ANYCRLF|PCRE
4547    md->bsr_anycrlf = TRUE;    md->bsr_anycrlf = TRUE;
4548  #else  #else
4549    md->bsr_anycrlf = FALSE;    md->bsr_anycrlf = FALSE;
4550  #endif  #endif
4551    break;    break;
4552    
4553    case PCRE_BSR_ANYCRLF:    case PCRE_BSR_ANYCRLF:
# Line 4490  switch ((((options & PCRE_NEWLINE_BITS) Line 4568  switch ((((options & PCRE_NEWLINE_BITS)
4568          (pcre_uint32)options) & PCRE_NEWLINE_BITS)          (pcre_uint32)options) & PCRE_NEWLINE_BITS)
4569    {    {
4570    case 0: newline = NEWLINE; break;   /* Compile-time default */    case 0: newline = NEWLINE; break;   /* Compile-time default */
4571    case PCRE_NEWLINE_CR: newline = '\r'; break;    case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
4572    case PCRE_NEWLINE_LF: newline = '\n'; break;    case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
4573    case PCRE_NEWLINE_CR+    case PCRE_NEWLINE_CR+
4574         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;         PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
4575    case PCRE_NEWLINE_ANY: newline = -1; break;    case PCRE_NEWLINE_ANY: newline = -1; break;
4576    case PCRE_NEWLINE_ANYCRLF: newline = -2; break;    case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
4577    default: return PCRE_ERROR_BADNEWLINE;    default: return PCRE_ERROR_BADNEWLINE;
# Line 4645  for(;;) Line 4723  for(;;)
4723      while (iptr < iend) *iptr++ = -1;      while (iptr < iend) *iptr++ = -1;
4724      }      }
4725    
4726    /* Advance to a unique first char if possible. If firstline is TRUE, the    /* If firstline is TRUE, the start of the match is constrained to the first
4727    start of the match is constrained to the first line of a multiline string.    line of a multiline string. That is, the match must be before or at the first
4728    That is, the match must be before or at the first newline. Implement this by    newline. Implement this by temporarily adjusting end_subject so that we stop
4729    temporarily adjusting end_subject so that we stop scanning at a newline. If    scanning at a newline. If the match fails at the newline, later code breaks
4730    the match fails at the newline, later code breaks this loop. */    this loop. */
4731    
4732    if (firstline)    if (firstline)
4733      {      {
4734      USPTR t = start_match;      USPTR t = start_match;
4735    #ifdef SUPPORT_UTF8
4736        if (utf8)
4737          {
4738          while (t < md->end_subject && !IS_NEWLINE(t))
4739            {
4740            t++;
4741            while (t < end_subject && (*t & 0xc0) == 0x80) t++;
4742            }
4743          }
4744        else
4745    #endif
4746      while (t < md->end_subject && !IS_NEWLINE(t)) t++;      while (t < md->end_subject && !IS_NEWLINE(t)) t++;
4747      end_subject = t;      end_subject = t;
4748      }      }
4749    
4750    /* Now test for a unique first byte */    /* There are some optimizations that avoid running the match if a known
4751      starting point is not found, or if a known later character is not present.
4752      However, there is an option that disables these, for testing and for ensuring
4753      that all callouts do actually occur. */
4754    
4755    if (first_byte >= 0)    if ((options & PCRE_NO_START_OPTIMIZE) == 0)
4756      {      {
4757      if (first_byte_caseless)      /* Advance to a unique first byte if there is one. */
4758        while (start_match < end_subject &&  
4759               md->lcc[*start_match] != first_byte)      if (first_byte >= 0)
4760          start_match++;        {
4761      else        if (first_byte_caseless)
4762        while (start_match < end_subject && *start_match != first_byte)          while (start_match < end_subject && md->lcc[*start_match] != first_byte)
4763          start_match++;            start_match++;
4764      }        else
4765            while (start_match < end_subject && *start_match != first_byte)
4766              start_match++;
4767          }
4768    
4769    /* Or to just after a linebreak for a multiline match if possible */      /* Or to just after a linebreak for a multiline match */
4770    
4771    else if (startline)      else if (startline)
     {  
     if (start_match > md->start_subject + start_offset)  
4772        {        {
4773        while (start_match <= end_subject && !WAS_NEWLINE(start_match))        if (start_match > md->start_subject + start_offset)
4774          start_match++;          {
4775    #ifdef SUPPORT_UTF8
4776            if (utf8)
4777              {
4778              while (start_match < end_subject && !WAS_NEWLINE(start_match))
4779                {
4780                start_match++;
4781                while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
4782                  start_match++;
4783                }
4784              }
4785            else
4786    #endif
4787            while (start_match < end_subject && !WAS_NEWLINE(start_match))
4788              start_match++;
4789    
4790        /* If we have just passed a CR and the newline option is ANY or ANYCRLF,          /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
4791        and we are now at a LF, advance the match position by one more character.          and we are now at a LF, advance the match position by one more character.
4792        */          */
4793    
4794        if (start_match[-1] == '\r' &&          if (start_match[-1] == CHAR_CR &&
4795             (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&               (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
4796             start_match < end_subject &&               start_match < end_subject &&
4797             *start_match == '\n')               *start_match == CHAR_NL)
4798          start_match++;            start_match++;
4799            }
4800        }        }
     }  
4801    
4802    /* Or to a non-unique first char after study */      /* Or to a non-unique first byte after study */
4803    
4804    else if (start_bits != NULL)      else if (start_bits != NULL)
     {  
     while (start_match < end_subject)  
4805        {        {
4806        register unsigned int c = *start_match;        while (start_match < end_subject)
4807        if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++; else break;          {
4808            register unsigned int c = *start_match;
4809            if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++;
4810              else break;
4811            }
4812        }        }
4813      }      }   /* Starting optimizations */
4814    
4815    /* Restore fudged end_subject */    /* Restore fudged end_subject */
4816    
# Line 4713  for(;;) Line 4822  for(;;)
4822    printf("\n");    printf("\n");
4823  #endif  #endif
4824    
4825    /* If req_byte is set, we know that that character must appear in the subject    /* If req_byte is set, we know that that character must appear in the
4826    for the match to succeed. If the first character is set, req_byte must be    subject for the match to succeed. If the first character is set, req_byte
4827    later in the subject; otherwise the test starts at the match point. This    must be later in the subject; otherwise the test starts at the match point.
4828    optimization can save a huge amount of backtracking in patterns with nested    This optimization can save a huge amount of backtracking in patterns with
4829    unlimited repeats that aren't going to match. Writing separate code for    nested unlimited repeats that aren't going to match. Writing separate code
4830    cased/caseless versions makes it go faster, as does using an autoincrement    for cased/caseless versions makes it go faster, as does using an
4831    and backing off on a match.    autoincrement and backing off on a match.
4832    
4833    HOWEVER: when the subject string is very, very long, searching to its end can    HOWEVER: when the subject string is very, very long, searching to its end
4834    take a long time, and give bad performance on quite ordinary patterns. This    can take a long time, and give bad performance on quite ordinary patterns.
4835    showed up when somebody was matching something like /^\d+C/ on a 32-megabyte    This showed up when somebody was matching something like /^\d+C/ on a
4836    string... so we don't do this when the string is sufficiently long.    32-megabyte string... so we don't do this when the string is sufficiently
4837      long.
4838    
4839    ALSO: this processing is disabled when partial matching is requested.    ALSO: this processing is disabled when partial matching is requested, or if
4840    */    disabling is explicitly requested. */
4841    
4842    if (req_byte >= 0 &&    if ((options & PCRE_NO_START_OPTIMIZE) == 0 &&
4843          req_byte >= 0 &&
4844        end_subject - start_match < REQ_BYTE_MAX &&        end_subject - start_match < REQ_BYTE_MAX &&
4845        !md->partial)        !md->partial)
4846      {      {
# Line 4837  for(;;) Line 4948  for(;;)
4948    not contain any explicit matches for \r or \n, and the newline option is CRLF    not contain any explicit matches for \r or \n, and the newline option is CRLF
4949    or ANY or ANYCRLF, advance the match position by one more character. */    or ANY or ANYCRLF, advance the match position by one more character. */
4950    
4951    if (start_match[-1] == '\r' &&    if (start_match[-1] == CHAR_CR &&
4952        start_match < end_subject &&        start_match < end_subject &&
4953        *start_match == '\n' &&        *start_match == CHAR_NL &&
4954        (re->flags & PCRE_HASCRORLF) == 0 &&        (re->flags & PCRE_HASCRORLF) == 0 &&
4955          (md->nltype == NLTYPE_ANY ||          (md->nltype == NLTYPE_ANY ||
4956           md->nltype == NLTYPE_ANYCRLF ||           md->nltype == NLTYPE_ANYCRLF ||

Legend:
Removed from v.236  
changed lines
  Added in v.399

  ViewVC Help
Powered by ViewVC 1.1.5