/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 214 by ph10, Wed Aug 15 14:08:10 2007 UTC revision 381 by ph10, Tue Mar 3 16:08:23 2009 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2007 University of Cambridge             Copyright (c) 1997-2009 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 43  pattern matching using an NFA algorithm, Line 43  pattern matching using an NFA algorithm,
43  possible. There are also some static supporting functions. */  possible. There are also some static supporting functions. */
44    
45  #ifdef HAVE_CONFIG_H  #ifdef HAVE_CONFIG_H
46  #include <config.h>  #include "config.h"
47  #endif  #endif
48    
49  #define NLBLOCK md             /* Block containing newline information */  #define NLBLOCK md             /* Block containing newline information */
# Line 158  printf("\n"); Line 158  printf("\n");
158    
159  if (length > md->end_subject - eptr) return FALSE;  if (length > md->end_subject - eptr) return FALSE;
160    
161  /* Separate the caselesss case for speed */  /* Separate the caseless case for speed. In UTF-8 mode we can only do this
162    properly if Unicode properties are supported. Otherwise, we can check only
163    ASCII characters. */
164    
165  if ((ims & PCRE_CASELESS) != 0)  if ((ims & PCRE_CASELESS) != 0)
166    {    {
167    #ifdef SUPPORT_UTF8
168    #ifdef SUPPORT_UCP
169      if (md->utf8)
170        {
171        USPTR endptr = eptr + length;
172        while (eptr < endptr)
173          {
174          int c, d;
175          GETCHARINC(c, eptr);
176          GETCHARINC(d, p);
177          if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
178          }
179        }
180      else
181    #endif
182    #endif
183    
184      /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
185      is no UCP support. */
186    
187    while (length-- > 0)    while (length-- > 0)
188      if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;      { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
189    }    }
190    
191    /* In the caseful case, we can just compare the bytes, whether or not we
192    are in UTF-8 mode. */
193    
194  else  else
195    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
196    
# Line 761  for (;;) Line 787  for (;;)
787    
788      case OP_COND:      case OP_COND:
789      case OP_SCOND:      case OP_SCOND:
790        /* Because of the way auto-callout works during compile, a callout item is
791        inserted between OP_COND and an assertion condition. */
792    
793        if (ecode[LINK_SIZE+1] == OP_CALLOUT)
794          {
795          if (pcre_callout != NULL)
796            {
797            pcre_callout_block cb;
798            cb.version          = 1;   /* Version 1 of the callout block */
799            cb.callout_number   = ecode[LINK_SIZE+2];
800            cb.offset_vector    = md->offset_vector;
801            cb.subject          = (PCRE_SPTR)md->start_subject;
802            cb.subject_length   = md->end_subject - md->start_subject;
803            cb.start_match      = mstart - md->start_subject;
804            cb.current_position = eptr - md->start_subject;
805            cb.pattern_position = GET(ecode, LINK_SIZE + 3);
806            cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
807            cb.capture_top      = offset_top/2;
808            cb.capture_last     = md->capture_last;
809            cb.callout_data     = md->callout_data;
810            if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
811            if (rrc < 0) RRETURN(rrc);
812            }
813          ecode += _pcre_OP_lengths[OP_CALLOUT];
814          }
815    
816        /* Now see what the actual condition is */
817    
818      if (ecode[LINK_SIZE+1] == OP_RREF)         /* Recursion test */      if (ecode[LINK_SIZE+1] == OP_RREF)         /* Recursion test */
819        {        {
820        offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/        offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/
# Line 1148  for (;;) Line 1202  for (;;)
1202      do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1203      break;      break;
1204    
1205      /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating      /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1206      that it may occur zero times. It may repeat infinitely, or not at all -      indicating that it may occur zero times. It may repeat infinitely, or not
1207      i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper      at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1208      repeat limits are compiled as a number of copies, with the optional ones      with fixed upper repeat limits are compiled as a number of copies, with the
1209      preceded by BRAZERO or BRAMINZERO. */      optional ones preceded by BRAZERO or BRAMINZERO. */
1210    
1211      case OP_BRAZERO:      case OP_BRAZERO:
1212        {        {
# Line 1174  for (;;) Line 1228  for (;;)
1228        }        }
1229      break;      break;
1230    
1231        case OP_SKIPZERO:
1232          {
1233          next = ecode+1;
1234          do next += GET(next,1); while (*next == OP_ALT);
1235          ecode = next + 1 + LINK_SIZE;
1236          }
1237        break;
1238    
1239      /* End of a group, repeated or non-repeating. */      /* End of a group, repeated or non-repeating. */
1240    
1241      case OP_KET:      case OP_KET:
# Line 1421  for (;;) Line 1483  for (;;)
1483      /* Match a single character type; inline for speed */      /* Match a single character type; inline for speed */
1484    
1485      case OP_ANY:      case OP_ANY:
1486      if ((ims & PCRE_DOTALL) == 0)      if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1487        {      /* Fall through */
1488        if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);  
1489        }      case OP_ALLANY:
1490      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
1491      if (utf8)      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
       while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
1492      ecode++;      ecode++;
1493      break;      break;
1494    
# Line 1526  for (;;) Line 1587  for (;;)
1587        case 0x000d:        case 0x000d:
1588        if (eptr < md->end_subject && *eptr == 0x0a) eptr++;        if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1589        break;        break;
1590    
1591        case 0x000a:        case 0x000a:
1592          break;
1593    
1594        case 0x000b:        case 0x000b:
1595        case 0x000c:        case 0x000c:
1596        case 0x0085:        case 0x0085:
1597        case 0x2028:        case 0x2028:
1598        case 0x2029:        case 0x2029:
1599          if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
1600        break;        break;
1601        }        }
1602      ecode++;      ecode++;
# Line 1642  for (;;) Line 1707  for (;;)
1707      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1708      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1709        {        {
1710        int chartype, script;        const ucd_record * prop = GET_UCD(c);
       int category = _pcre_ucp_findprop(c, &chartype, &script);  
1711    
1712        switch(ecode[1])        switch(ecode[1])
1713          {          {
# Line 1652  for (;;) Line 1716  for (;;)
1716          break;          break;
1717    
1718          case PT_LAMP:          case PT_LAMP:
1719          if ((chartype == ucp_Lu ||          if ((prop->chartype == ucp_Lu ||
1720               chartype == ucp_Ll ||               prop->chartype == ucp_Ll ||
1721               chartype == ucp_Lt) == (op == OP_NOTPROP))               prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
1722            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1723           break;           break;
1724    
1725          case PT_GC:          case PT_GC:
1726          if ((ecode[2] != category) == (op == OP_PROP))          if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
1727            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1728          break;          break;
1729    
1730          case PT_PC:          case PT_PC:
1731          if ((ecode[2] != chartype) == (op == OP_PROP))          if ((ecode[2] != prop->chartype) == (op == OP_PROP))
1732            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1733          break;          break;
1734    
1735          case PT_SC:          case PT_SC:
1736          if ((ecode[2] != script) == (op == OP_PROP))          if ((ecode[2] != prop->script) == (op == OP_PROP))
1737            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1738          break;          break;
1739    
# Line 1688  for (;;) Line 1752  for (;;)
1752      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1753      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1754        {        {
1755        int chartype, script;        int category = UCD_CATEGORY(c);
       int category = _pcre_ucp_findprop(c, &chartype, &script);  
1756        if (category == ucp_M) RRETURN(MATCH_NOMATCH);        if (category == ucp_M) RRETURN(MATCH_NOMATCH);
1757        while (eptr < md->end_subject)        while (eptr < md->end_subject)
1758          {          {
# Line 1698  for (;;) Line 1761  for (;;)
1761            {            {
1762            GETCHARLEN(c, eptr, len);            GETCHARLEN(c, eptr, len);
1763            }            }
1764          category = _pcre_ucp_findprop(c, &chartype, &script);          category = UCD_CATEGORY(c);
1765          if (category != ucp_M) break;          if (category != ucp_M) break;
1766          eptr += len;          eptr += len;
1767          }          }
# Line 1719  for (;;) Line 1782  for (;;)
1782      case OP_REF:      case OP_REF:
1783        {        {
1784        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
1785        ecode += 3;                                 /* Advance past item */        ecode += 3;
1786    
1787        /* If the reference is unset, set the length to be longer than the amount        /* If the reference is unset, there are two possibilities:
1788        of subject left; this ensures that every attempt at a match fails. We  
1789        can't just fail here, because of the possibility of quantifiers with zero        (a) In the default, Perl-compatible state, set the length to be longer
1790        minima. */        than the amount of subject left; this ensures that every attempt at a
1791          match fails. We can't just fail here, because of the possibility of
1792        length = (offset >= offset_top || md->offset_vector[offset] < 0)?        quantifiers with zero minima.
1793          md->end_subject - eptr + 1 :  
1794          md->offset_vector[offset+1] - md->offset_vector[offset];        (b) If the JavaScript compatibility flag is set, set the length to zero
1795          so that the back reference matches an empty string.
1796    
1797          Otherwise, set the length to the length of what was matched by the
1798          referenced subpattern. */
1799    
1800          if (offset >= offset_top || md->offset_vector[offset] < 0)
1801            length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;
1802          else
1803            length = md->offset_vector[offset+1] - md->offset_vector[offset];
1804    
1805        /* Set up for repetition, or handle the non-repeated case */        /* Set up for repetition, or handle the non-repeated case */
1806    
# Line 2154  for (;;) Line 2226  for (;;)
2226          if (fc != dc)          if (fc != dc)
2227            {            {
2228  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2229            if (dc != _pcre_ucp_othercase(fc))            if (dc != UCD_OTHERCASE(fc))
2230  #endif  #endif
2231              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2232            }            }
# Line 2245  for (;;) Line 2317  for (;;)
2317  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2318          unsigned int othercase;          unsigned int othercase;
2319          if ((ims & PCRE_CASELESS) != 0 &&          if ((ims & PCRE_CASELESS) != 0 &&
2320              (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)              (othercase = UCD_OTHERCASE(fc)) != fc)
2321            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = _pcre_ord2utf8(othercase, occhars);
2322          else oclength = 0;          else oclength = 0;
2323  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
# Line 2565  for (;;) Line 2637  for (;;)
2637              {              {
2638              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
2639              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2640                if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2641              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
2642              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
2643              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) RRETURN(MATCH_NOMATCH);
2644                RRETURN(MATCH_NOMATCH);  
2645              }              }
2646            }            }
2647          else          else
# Line 2674  for (;;) Line 2747  for (;;)
2747              {              {
2748              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
2749              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2750                if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2751              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
2752              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) RRETURN(MATCH_NOMATCH);
               RRETURN(MATCH_NOMATCH);  
2753              }              }
2754            }            }
2755          else          else
# Line 2850  for (;;) Line 2923  for (;;)
2923              {              {
2924              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2925              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
2926              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
2927              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
2928                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
2929                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
# Line 2863  for (;;) Line 2936  for (;;)
2936              {              {
2937              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2938              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
2939              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
2940              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
2941                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2942              }              }
# Line 2874  for (;;) Line 2947  for (;;)
2947              {              {
2948              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2949              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
2950              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
2951              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
2952                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2953              }              }
# Line 2885  for (;;) Line 2958  for (;;)
2958              {              {
2959              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2960              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
2961              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_script = UCD_SCRIPT(c);
2962              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
2963                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2964              }              }
# Line 2904  for (;;) Line 2977  for (;;)
2977          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2978            {            {
2979            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
2980            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
2981            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
2982            while (eptr < md->end_subject)            while (eptr < md->end_subject)
2983              {              {
# Line 2913  for (;;) Line 2986  for (;;)
2986                {                {
2987                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
2988                }                }
2989              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
2990              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
2991              eptr += len;              eptr += len;
2992              }              }
# Line 2931  for (;;) Line 3004  for (;;)
3004          case OP_ANY:          case OP_ANY:
3005          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3006            {            {
3007            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject || IS_NEWLINE(eptr))
                ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))  
3008              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3009            eptr++;            eptr++;
3010            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3011            }            }
3012          break;          break;
3013    
3014            case OP_ALLANY:
3015            for (i = 1; i <= min; i++)
3016              {
3017              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3018              eptr++;
3019              while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3020              }
3021            break;
3022    
3023          case OP_ANYBYTE:          case OP_ANYBYTE:
3024          eptr += min;          eptr += min;
3025          break;          break;
# Line 2954  for (;;) Line 3035  for (;;)
3035              case 0x000d:              case 0x000d:
3036              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3037              break;              break;
3038    
3039              case 0x000a:              case 0x000a:
3040                break;
3041    
3042              case 0x000b:              case 0x000b:
3043              case 0x000c:              case 0x000c:
3044              case 0x0085:              case 0x0085:
3045              case 0x2028:              case 0x2028:
3046              case 0x2029:              case 0x2029:
3047                if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3048              break;              break;
3049              }              }
3050            }            }
# Line 3093  for (;;) Line 3178  for (;;)
3178          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3179            {            {
3180            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject ||
3181               (*eptr < 128 && (md->ctypes[*eptr++] & ctype_space) != 0))               (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0))
3182              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3183            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3184            }            }
3185          break;          break;
3186    
# Line 3113  for (;;) Line 3198  for (;;)
3198          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3199            {            {
3200            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject ||
3201               (*eptr < 128 && (md->ctypes[*eptr++] & ctype_word) != 0))               (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0))
3202              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3203            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3204            }            }
3205          break;          break;
3206    
# Line 3143  for (;;) Line 3228  for (;;)
3228        switch(ctype)        switch(ctype)
3229          {          {
3230          case OP_ANY:          case OP_ANY:
3231          if ((ims & PCRE_DOTALL) == 0)          for (i = 1; i <= min; i++)
3232            {            {
3233            for (i = 1; i <= min; i++)            if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3234              {            eptr++;
             if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);  
             eptr++;  
             }  
3235            }            }
3236          else eptr += min;          break;
3237    
3238            case OP_ALLANY:
3239            eptr += min;
3240          break;          break;
3241    
3242          case OP_ANYBYTE:          case OP_ANYBYTE:
# Line 3172  for (;;) Line 3257  for (;;)
3257              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3258              break;              break;
3259              case 0x000a:              case 0x000a:
3260                break;
3261    
3262              case 0x000b:              case 0x000b:
3263              case 0x000c:              case 0x000c:
3264              case 0x0085:              case 0x0085:
3265                if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3266              break;              break;
3267              }              }
3268            }            }
# Line 3314  for (;;) Line 3402  for (;;)
3402              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3403              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3404              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3405              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
3406              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
3407                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
3408                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
# Line 3329  for (;;) Line 3417  for (;;)
3417              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3418              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3419              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3420              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
3421              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
3422                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3423              }              }
# Line 3342  for (;;) Line 3430  for (;;)
3430              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3431              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3432              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3433              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
3434              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
3435                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3436              }              }
# Line 3355  for (;;) Line 3443  for (;;)
3443              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3444              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3445              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3446              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_script = UCD_SCRIPT(c);
3447              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
3448                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3449              }              }
# Line 3377  for (;;) Line 3465  for (;;)
3465            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3466            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3467            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3468            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
3469            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3470            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3471              {              {
# Line 3386  for (;;) Line 3474  for (;;)
3474                {                {
3475                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3476                }                }
3477              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
3478              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3479              eptr += len;              eptr += len;
3480              }              }
# Line 3405  for (;;) Line 3493  for (;;)
3493            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
3494            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3495            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max || eptr >= md->end_subject ||
3496                 (ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&                 (ctype == OP_ANY && IS_NEWLINE(eptr)))
                 IS_NEWLINE(eptr)))  
3497              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3498    
3499            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3500            switch(ctype)            switch(ctype)
3501              {              {
3502              case OP_ANY:        /* This is the DOTALL case */              case OP_ANY:        /* This is the non-NL case */
3503              break;              case OP_ALLANY:
   
3504              case OP_ANYBYTE:              case OP_ANYBYTE:
3505              break;              break;
3506    
# Line 3426  for (;;) Line 3512  for (;;)
3512                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3513                break;                break;
3514                case 0x000a:                case 0x000a:
3515                  break;
3516    
3517                case 0x000b:                case 0x000b:
3518                case 0x000c:                case 0x000c:
3519                case 0x0085:                case 0x0085:
3520                case 0x2028:                case 0x2028:
3521                case 0x2029:                case 0x2029:
3522                  if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3523                break;                break;
3524                }                }
3525              break;              break;
# Line 3563  for (;;) Line 3652  for (;;)
3652            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
3653            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3654            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max || eptr >= md->end_subject ||
3655                 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))                 (ctype == OP_ANY && IS_NEWLINE(eptr)))
3656              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3657    
3658            c = *eptr++;            c = *eptr++;
3659            switch(ctype)            switch(ctype)
3660              {              {
3661              case OP_ANY:   /* This is the DOTALL case */              case OP_ANY:     /* This is the non-NL case */
3662              break;              case OP_ALLANY:
   
3663              case OP_ANYBYTE:              case OP_ANYBYTE:
3664              break;              break;
3665    
# Line 3582  for (;;) Line 3670  for (;;)
3670                case 0x000d:                case 0x000d:
3671                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3672                break;                break;
3673    
3674                case 0x000a:                case 0x000a:
3675                  break;
3676    
3677                case 0x000b:                case 0x000b:
3678                case 0x000c:                case 0x000c:
3679                case 0x0085:                case 0x0085:
3680                  if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3681                break;                break;
3682                }                }
3683              break;              break;
# Line 3700  for (;;) Line 3792  for (;;)
3792              int len = 1;              int len = 1;
3793              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject) break;
3794              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
3795              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
3796              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
3797                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
3798                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
# Line 3715  for (;;) Line 3807  for (;;)
3807              int len = 1;              int len = 1;
3808              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject) break;
3809              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
3810              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
3811              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
3812                break;                break;
3813              eptr+= len;              eptr+= len;
# Line 3728  for (;;) Line 3820  for (;;)
3820              int len = 1;              int len = 1;
3821              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject) break;
3822              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
3823              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
3824              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
3825                break;                break;
3826              eptr+= len;              eptr+= len;
# Line 3741  for (;;) Line 3833  for (;;)
3833              int len = 1;              int len = 1;
3834              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject) break;
3835              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
3836              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_script = UCD_SCRIPT(c);
3837              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
3838                break;                break;
3839              eptr+= len;              eptr+= len;
# Line 3770  for (;;) Line 3862  for (;;)
3862            {            {
3863            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject) break;
3864            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3865            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
3866            if (prop_category == ucp_M) break;            if (prop_category == ucp_M) break;
3867            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3868              {              {
# Line 3779  for (;;) Line 3871  for (;;)
3871                {                {
3872                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3873                }                }
3874              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
3875              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3876              eptr += len;              eptr += len;
3877              }              }
# Line 3801  for (;;) Line 3893  for (;;)
3893                BACKCHAR(eptr);                BACKCHAR(eptr);
3894                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3895                }                }
3896              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
3897              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3898              eptr--;              eptr--;
3899              }              }
# Line 3821  for (;;) Line 3913  for (;;)
3913            case OP_ANY:            case OP_ANY:
3914            if (max < INT_MAX)            if (max < INT_MAX)
3915              {              {
3916              if ((ims & PCRE_DOTALL) == 0)              for (i = min; i < max; i++)
               {  
               for (i = min; i < max; i++)  
                 {  
                 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;  
                 eptr++;  
                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
                 }  
               }  
             else  
3917                {                {
3918                for (i = min; i < max; i++)                if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3919                  {                eptr++;
3920                  if (eptr >= md->end_subject) break;                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                 eptr++;  
                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
                 }  
3921                }                }
3922              }              }
3923    
# Line 3845  for (;;) Line 3925  for (;;)
3925    
3926            else            else
3927              {              {
3928              if ((ims & PCRE_DOTALL) == 0)              for (i = min; i < max; i++)
3929                {                {
3930                for (i = min; i < max; i++)                if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3931                  {                eptr++;
3932                  if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                 eptr++;  
                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
                 }  
3933                }                }
3934              else              }
3935              break;
3936    
3937              case OP_ALLANY:
3938              if (max < INT_MAX)
3939                {
3940                for (i = min; i < max; i++)
3941                {                {
3942                eptr = md->end_subject;                if (eptr >= md->end_subject) break;
3943                  eptr++;
3944                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3945                }                }
3946              }              }
3947              else eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
3948            break;            break;
3949    
3950            /* The byte case is the same as non-UTF8 */            /* The byte case is the same as non-UTF8 */
# Line 3883  for (;;) Line 3969  for (;;)
3969                }                }
3970              else              else
3971                {                {
3972                if (c != 0x000a && c != 0x000b && c != 0x000c &&                if (c != 0x000a &&
3973                    c != 0x0085 && c != 0x2028 && c != 0x2029)                    (md->bsr_anycrlf ||
3974                       (c != 0x000b && c != 0x000c &&
3975                        c != 0x0085 && c != 0x2028 && c != 0x2029)))
3976                  break;                  break;
3977                eptr += len;                eptr += len;
3978                }                }
# Line 4044  for (;;) Line 4132  for (;;)
4132          switch(ctype)          switch(ctype)
4133            {            {
4134            case OP_ANY:            case OP_ANY:
4135            if ((ims & PCRE_DOTALL) == 0)            for (i = min; i < max; i++)
4136              {              {
4137              for (i = min; i < max; i++)              if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
4138                {              eptr++;
               if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;  
               eptr++;  
               }  
             break;  
4139              }              }
4140            /* For DOTALL case, fall through and treat as \C */            break;
4141    
4142              case OP_ALLANY:
4143            case OP_ANYBYTE:            case OP_ANYBYTE:
4144            c = max - min;            c = max - min;
4145            if (c > (unsigned int)(md->end_subject - eptr))            if (c > (unsigned int)(md->end_subject - eptr))
# Line 4074  for (;;) Line 4159  for (;;)
4159                }                }
4160              else              else
4161                {                {
4162                if (c != 0x000a && c != 0x000b && c != 0x000c && c != 0x0085)                if (c != 0x000a &&
4163                      (md->bsr_anycrlf ||
4164                        (c != 0x000b && c != 0x000c && c != 0x0085)))
4165                  break;                  break;
4166                eptr++;                eptr++;
4167                }                }
# Line 4224  HEAP_RETURN: Line 4311  HEAP_RETURN:
4311  switch (frame->Xwhere)  switch (frame->Xwhere)
4312    {    {
4313    LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)    LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
4314    LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(16)    LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
4315    LBL(17) LBL(18) LBL(19) LBL(20) LBL(21) LBL(22) LBL(23) LBL(24)    LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
4316    LBL(25) LBL(26) LBL(27) LBL(28) LBL(29) LBL(30) LBL(31) LBL(32)    LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
4317    LBL(33) LBL(34) LBL(35) LBL(36) LBL(37) LBL(38) LBL(39) LBL(40)    LBL(53) LBL(54)
4318    LBL(41) LBL(42) LBL(43) LBL(44) LBL(45) LBL(46) LBL(47) LBL(48)  #ifdef SUPPORT_UTF8
4319    LBL(49) LBL(50) LBL(51) LBL(52) LBL(53) LBL(54)    LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)
4320      LBL(32) LBL(34) LBL(42) LBL(46)
4321    #ifdef SUPPORT_UCP
4322      LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
4323    #endif  /* SUPPORT_UCP */
4324    #endif  /* SUPPORT_UTF8 */
4325    default:    default:
4326    DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));    DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
4327    return PCRE_ERROR_INTERNAL;    return PCRE_ERROR_INTERNAL;
# Line 4321  Returns:          > 0 => success; value Line 4413  Returns:          > 0 => success; value
4413                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
4414  */  */
4415    
4416  PCRE_EXP_DEFN int  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
4417  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
4418    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
4419    int offsetcount)    int offsetcount)
# Line 4408  if (re->magic_number != MAGIC_NUMBER) Line 4500  if (re->magic_number != MAGIC_NUMBER)
4500  /* Set up other data */  /* Set up other data */
4501    
4502  anchored = ((re->options | options) & PCRE_ANCHORED) != 0;  anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
4503  startline = (re->options & PCRE_STARTLINE) != 0;  startline = (re->flags & PCRE_STARTLINE) != 0;
4504  firstline = (re->options & PCRE_FIRSTLINE) != 0;  firstline = (re->options & PCRE_FIRSTLINE) != 0;
4505    
4506  /* The code starts after the real_pcre block and the capture name table. */  /* The code starts after the real_pcre block and the capture name table. */
# Line 4423  end_subject = md->end_subject; Line 4515  end_subject = md->end_subject;
4515    
4516  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
4517  utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;  utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
4518    md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
4519    
4520  md->notbol = (options & PCRE_NOTBOL) != 0;  md->notbol = (options & PCRE_NOTBOL) != 0;
4521  md->noteol = (options & PCRE_NOTEOL) != 0;  md->noteol = (options & PCRE_NOTEOL) != 0;
# Line 4435  md->recursive = NULL; Line 4528  md->recursive = NULL;
4528  md->lcc = tables + lcc_offset;  md->lcc = tables + lcc_offset;
4529  md->ctypes = tables + ctypes_offset;  md->ctypes = tables + ctypes_offset;
4530    
4531    /* Handle different \R options. */
4532    
4533    switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
4534      {
4535      case 0:
4536      if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
4537        md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
4538      else
4539    #ifdef BSR_ANYCRLF
4540      md->bsr_anycrlf = TRUE;
4541    #else
4542      md->bsr_anycrlf = FALSE;
4543    #endif
4544      break;
4545    
4546      case PCRE_BSR_ANYCRLF:
4547      md->bsr_anycrlf = TRUE;
4548      break;
4549    
4550      case PCRE_BSR_UNICODE:
4551      md->bsr_anycrlf = FALSE;
4552      break;
4553    
4554      default: return PCRE_ERROR_BADNEWLINE;
4555      }
4556    
4557  /* Handle different types of newline. The three bits give eight cases. If  /* Handle different types of newline. The three bits give eight cases. If
4558  nothing is set at run time, whatever was used at compile time applies. */  nothing is set at run time, whatever was used at compile time applies. */
4559    
4560  switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &  switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
4561         PCRE_NEWLINE_BITS)          (pcre_uint32)options) & PCRE_NEWLINE_BITS)
4562    {    {
4563    case 0: newline = NEWLINE; break;   /* Compile-time default */    case 0: newline = NEWLINE; break;   /* Compile-time default */
4564    case PCRE_NEWLINE_CR: newline = '\r'; break;    case PCRE_NEWLINE_CR: newline = '\r'; break;
# Line 4478  else Line 4597  else
4597  /* Partial matching is supported only for a restricted set of regexes at the  /* Partial matching is supported only for a restricted set of regexes at the
4598  moment. */  moment. */
4599    
4600  if (md->partial && (re->options & PCRE_NOPARTIAL) != 0)  if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
4601    return PCRE_ERROR_BADPARTIAL;    return PCRE_ERROR_BADPARTIAL;
4602    
4603  /* Check a UTF-8 string if required. Unfortunately there's no way of passing  /* Check a UTF-8 string if required. Unfortunately there's no way of passing
# Line 4555  studied, there may be a bitmap of possib Line 4674  studied, there may be a bitmap of possib
4674    
4675  if (!anchored)  if (!anchored)
4676    {    {
4677    if ((re->options & PCRE_FIRSTSET) != 0)    if ((re->flags & PCRE_FIRSTSET) != 0)
4678      {      {
4679      first_byte = re->first_byte & 255;      first_byte = re->first_byte & 255;
4680      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
# Line 4570  if (!anchored) Line 4689  if (!anchored)
4689  /* For anchored or unanchored matches, there may be a "last known required  /* For anchored or unanchored matches, there may be a "last known required
4690  character" set. */  character" set. */
4691    
4692  if ((re->options & PCRE_REQCHSET) != 0)  if ((re->flags & PCRE_REQCHSET) != 0)
4693    {    {
4694    req_byte = re->req_byte & 255;    req_byte = re->req_byte & 255;
4695    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
# Line 4606  for(;;) Line 4725  for(;;)
4725    if (firstline)    if (firstline)
4726      {      {
4727      USPTR t = start_match;      USPTR t = start_match;
4728    #ifdef SUPPORT_UTF8
4729        if (utf8)
4730          {
4731          while (t < md->end_subject && !IS_NEWLINE(t))
4732            {
4733            t++;
4734            while (t < end_subject && (*t & 0xc0) == 0x80) t++;
4735            }
4736          }
4737        else
4738    #endif
4739      while (t < md->end_subject && !IS_NEWLINE(t)) t++;      while (t < md->end_subject && !IS_NEWLINE(t)) t++;
4740      end_subject = t;      end_subject = t;
4741      }      }
4742    
4743    /* Now test for a unique first byte */    /* Now advance to a unique first byte if there is one. */
4744    
4745    if (first_byte >= 0)    if (first_byte >= 0)
4746      {      {
4747      if (first_byte_caseless)      if (first_byte_caseless)
4748        while (start_match < end_subject &&        while (start_match < end_subject && md->lcc[*start_match] != first_byte)
              md->lcc[*start_match] != first_byte)  
4749          start_match++;          start_match++;
4750      else      else
4751        while (start_match < end_subject && *start_match != first_byte)        while (start_match < end_subject && *start_match != first_byte)
4752          start_match++;          start_match++;
4753      }      }
4754    
4755    /* Or to just after a linebreak for a multiline match if possible */    /* Or to just after a linebreak for a multiline match */
4756    
4757    else if (startline)    else if (startline)
4758      {      {
4759      if (start_match > md->start_subject + start_offset)      if (start_match > md->start_subject + start_offset)
4760        {        {
4761        while (start_match <= end_subject && !WAS_NEWLINE(start_match))  #ifdef SUPPORT_UTF8
4762          if (utf8)
4763            {
4764            while (start_match < end_subject && !WAS_NEWLINE(start_match))
4765              {
4766              start_match++;
4767              while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
4768                start_match++;
4769              }
4770            }
4771          else
4772    #endif
4773          while (start_match < end_subject && !WAS_NEWLINE(start_match))
4774          start_match++;          start_match++;
4775    
4776        /* If we have just passed a CR and the newline option is ANY or ANYCRLF,        /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
# Line 4644  for(;;) Line 4785  for(;;)
4785        }        }
4786      }      }
4787    
4788    /* Or to a non-unique first char after study */    /* Or to a non-unique first byte after study */
4789    
4790    else if (start_bits != NULL)    else if (start_bits != NULL)
4791      {      {
4792      while (start_match < end_subject)      while (start_match < end_subject)
4793        {        {
4794        register unsigned int c = *start_match;        register unsigned int c = *start_match;
4795        if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++; else break;        if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++;
4796            else break;
4797        }        }
4798      }      }
4799    
# Line 4785  for(;;) Line 4927  for(;;)
4927    
4928    if (anchored || start_match > end_subject) break;    if (anchored || start_match > end_subject) break;
4929    
4930    /* If we have just passed a CR and the newline option is CRLF or ANY or    /* If we have just passed a CR and we are now at a LF, and the pattern does
4931    ANYCRLF, and we are now at a LF, advance the match position by one more    not contain any explicit matches for \r or \n, and the newline option is CRLF
4932    character. */    or ANY or ANYCRLF, advance the match position by one more character. */
4933    
4934    if (start_match[-1] == '\r' &&    if (start_match[-1] == '\r' &&
4935         (md->nltype == NLTYPE_ANY ||        start_match < end_subject &&
4936          md->nltype == NLTYPE_ANYCRLF ||        *start_match == '\n' &&
4937          md->nllen == 2) &&        (re->flags & PCRE_HASCRORLF) == 0 &&
4938         start_match < end_subject &&          (md->nltype == NLTYPE_ANY ||
4939         *start_match == '\n')           md->nltype == NLTYPE_ANYCRLF ||
4940             md->nllen == 2))
4941      start_match++;      start_match++;
4942    
4943    }   /* End of for(;;) "bumpalong" loop */    }   /* End of for(;;) "bumpalong" loop */

Legend:
Removed from v.214  
changed lines
  Added in v.381

  ViewVC Help
Powered by ViewVC 1.1.5