/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 210 by ph10, Wed Aug 8 14:24:50 2007 UTC revision 349 by ph10, Wed Jul 2 18:42:11 2008 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2007 University of Cambridge             Copyright (c) 1997-2008 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 43  pattern matching using an NFA algorithm, Line 43  pattern matching using an NFA algorithm,
43  possible. There are also some static supporting functions. */  possible. There are also some static supporting functions. */
44    
45  #ifdef HAVE_CONFIG_H  #ifdef HAVE_CONFIG_H
46  #include <config.h>  #include "config.h"
47  #endif  #endif
48    
49  #define NLBLOCK md             /* Block containing newline information */  #define NLBLOCK md             /* Block containing newline information */
# Line 68  defined PCRE_ERROR_xxx codes, which are Line 68  defined PCRE_ERROR_xxx codes, which are
68  #define MATCH_MATCH        1  #define MATCH_MATCH        1
69  #define MATCH_NOMATCH      0  #define MATCH_NOMATCH      0
70    
71  /* Special internal returns from the match() function. Make them sufficiently  /* Special internal returns from the match() function. Make them sufficiently
72  negative to avoid the external error codes. */  negative to avoid the external error codes. */
73    
74  #define MATCH_COMMIT       (-999)  #define MATCH_COMMIT       (-999)
# Line 211  variable instead of being passed in the Line 211  variable instead of being passed in the
211  ****************************************************************************  ****************************************************************************
212  ***************************************************************************/  ***************************************************************************/
213    
214    /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
215  /* Numbers for RMATCH calls */  below must be updated in sync.  */
216    
217  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
218         RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,         RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
219         RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,         RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
220         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
221         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
222         RM51,  RM52, RM53 };         RM51,  RM52, RM53, RM54 };
   
223    
224  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
225  versions and production versions. Note that the "rw" argument of RMATCH isn't  versions and production versions. Note that the "rw" argument of RMATCH isn't
# Line 622  for (;;) Line 621  for (;;)
621    switch(op)    switch(op)
622      {      {
623      case OP_FAIL:      case OP_FAIL:
624      return MATCH_NOMATCH;      RRETURN(MATCH_NOMATCH);
625    
626      case OP_PRUNE:      case OP_PRUNE:
627      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
628        ims, eptrb, flags, RM51);        ims, eptrb, flags, RM51);
629      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
630      return MATCH_PRUNE;      RRETURN(MATCH_PRUNE);
631    
632      case OP_COMMIT:      case OP_COMMIT:
633      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
634        ims, eptrb, flags, RM52);        ims, eptrb, flags, RM52);
635      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
636      return MATCH_COMMIT;      RRETURN(MATCH_COMMIT);
637    
638      case OP_SKIP:      case OP_SKIP:
639      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
640        ims, eptrb, flags, RM53);        ims, eptrb, flags, RM53);
641      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
642      md->start_match_ptr = eptr;   /* Pass back current position */      md->start_match_ptr = eptr;   /* Pass back current position */
643      return MATCH_SKIP;      RRETURN(MATCH_SKIP);
644    
645      case OP_THEN:      case OP_THEN:
646      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
647        ims, eptrb, flags, RM53);        ims, eptrb, flags, RM54);
648      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
649      return MATCH_THEN;      RRETURN(MATCH_THEN);
650    
651      /* Handle a capturing bracket. If there is space in the offset vector, save      /* Handle a capturing bracket. If there is space in the offset vector, save
652      the current subject position in the working slot at the top of the vector.      the current subject position in the working slot at the top of the vector.
653      We mustn't change the current values of the data slot, because they may be      We mustn't change the current values of the data slot, because they may be
# Line 1149  for (;;) Line 1148  for (;;)
1148      do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1149      break;      break;
1150    
1151      /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating      /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1152      that it may occur zero times. It may repeat infinitely, or not at all -      indicating that it may occur zero times. It may repeat infinitely, or not
1153      i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper      at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1154      repeat limits are compiled as a number of copies, with the optional ones      with fixed upper repeat limits are compiled as a number of copies, with the
1155      preceded by BRAZERO or BRAMINZERO. */      optional ones preceded by BRAZERO or BRAMINZERO. */
1156    
1157      case OP_BRAZERO:      case OP_BRAZERO:
1158        {        {
# Line 1175  for (;;) Line 1174  for (;;)
1174        }        }
1175      break;      break;
1176    
1177        case OP_SKIPZERO:
1178          {
1179          next = ecode+1;
1180          do next += GET(next,1); while (*next == OP_ALT);
1181          ecode = next + 1 + LINK_SIZE;
1182          }
1183        break;
1184    
1185      /* End of a group, repeated or non-repeating. */      /* End of a group, repeated or non-repeating. */
1186    
1187      case OP_KET:      case OP_KET:
# Line 1422  for (;;) Line 1429  for (;;)
1429      /* Match a single character type; inline for speed */      /* Match a single character type; inline for speed */
1430    
1431      case OP_ANY:      case OP_ANY:
1432      if ((ims & PCRE_DOTALL) == 0)      if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1433        {      /* Fall through */
1434        if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);  
1435        }      case OP_ALLANY:
1436      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
1437      if (utf8)      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
       while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
1438      ecode++;      ecode++;
1439      break;      break;
1440    
# Line 1527  for (;;) Line 1533  for (;;)
1533        case 0x000d:        case 0x000d:
1534        if (eptr < md->end_subject && *eptr == 0x0a) eptr++;        if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1535        break;        break;
1536    
1537        case 0x000a:        case 0x000a:
1538          break;
1539    
1540        case 0x000b:        case 0x000b:
1541        case 0x000c:        case 0x000c:
1542        case 0x0085:        case 0x0085:
1543        case 0x2028:        case 0x2028:
1544        case 0x2029:        case 0x2029:
1545          if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
1546        break;        break;
1547        }        }
1548      ecode++;      ecode++;
# Line 1643  for (;;) Line 1653  for (;;)
1653      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1654      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1655        {        {
1656        int chartype, script;        const ucd_record * prop = GET_UCD(c);
       int category = _pcre_ucp_findprop(c, &chartype, &script);  
1657    
1658        switch(ecode[1])        switch(ecode[1])
1659          {          {
# Line 1653  for (;;) Line 1662  for (;;)
1662          break;          break;
1663    
1664          case PT_LAMP:          case PT_LAMP:
1665          if ((chartype == ucp_Lu ||          if ((prop->chartype == ucp_Lu ||
1666               chartype == ucp_Ll ||               prop->chartype == ucp_Ll ||
1667               chartype == ucp_Lt) == (op == OP_NOTPROP))               prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
1668            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1669           break;           break;
1670    
1671          case PT_GC:          case PT_GC:
1672          if ((ecode[2] != category) == (op == OP_PROP))          if ((ecode[2] != ucp_gentype[prop->chartype]) == (op == OP_PROP))
1673            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1674          break;          break;
1675    
1676          case PT_PC:          case PT_PC:
1677          if ((ecode[2] != chartype) == (op == OP_PROP))          if ((ecode[2] != prop->chartype) == (op == OP_PROP))
1678            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1679          break;          break;
1680    
1681          case PT_SC:          case PT_SC:
1682          if ((ecode[2] != script) == (op == OP_PROP))          if ((ecode[2] != prop->script) == (op == OP_PROP))
1683            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1684          break;          break;
1685    
# Line 1689  for (;;) Line 1698  for (;;)
1698      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1699      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1700        {        {
1701        int chartype, script;        int category = UCD_CATEGORY(c);
       int category = _pcre_ucp_findprop(c, &chartype, &script);  
1702        if (category == ucp_M) RRETURN(MATCH_NOMATCH);        if (category == ucp_M) RRETURN(MATCH_NOMATCH);
1703        while (eptr < md->end_subject)        while (eptr < md->end_subject)
1704          {          {
# Line 1699  for (;;) Line 1707  for (;;)
1707            {            {
1708            GETCHARLEN(c, eptr, len);            GETCHARLEN(c, eptr, len);
1709            }            }
1710          category = _pcre_ucp_findprop(c, &chartype, &script);          category = UCD_CATEGORY(c);
1711          if (category != ucp_M) break;          if (category != ucp_M) break;
1712          eptr += len;          eptr += len;
1713          }          }
# Line 1720  for (;;) Line 1728  for (;;)
1728      case OP_REF:      case OP_REF:
1729        {        {
1730        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
1731        ecode += 3;                                 /* Advance past item */        ecode += 3;
1732    
1733        /* If the reference is unset, set the length to be longer than the amount        /* If the reference is unset, there are two possibilities:
1734        of subject left; this ensures that every attempt at a match fails. We  
1735        can't just fail here, because of the possibility of quantifiers with zero        (a) In the default, Perl-compatible state, set the length to be longer
1736        minima. */        than the amount of subject left; this ensures that every attempt at a
1737          match fails. We can't just fail here, because of the possibility of
1738        length = (offset >= offset_top || md->offset_vector[offset] < 0)?        quantifiers with zero minima.
1739          md->end_subject - eptr + 1 :  
1740          md->offset_vector[offset+1] - md->offset_vector[offset];        (b) If the JavaScript compatibility flag is set, set the length to zero
1741          so that the back reference matches an empty string.
1742    
1743          Otherwise, set the length to the length of what was matched by the
1744          referenced subpattern. */
1745    
1746          if (offset >= offset_top || md->offset_vector[offset] < 0)
1747            length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;
1748          else
1749            length = md->offset_vector[offset+1] - md->offset_vector[offset];
1750    
1751        /* Set up for repetition, or handle the non-repeated case */        /* Set up for repetition, or handle the non-repeated case */
1752    
# Line 2089  for (;;) Line 2106  for (;;)
2106            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2107            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2108            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
2109            BACKCHAR(eptr);            if (utf8) BACKCHAR(eptr);
2110            }            }
2111          RRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
2112          }          }
# Line 2155  for (;;) Line 2172  for (;;)
2172          if (fc != dc)          if (fc != dc)
2173            {            {
2174  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2175            if (dc != _pcre_ucp_othercase(fc))            if (dc != UCD_OTHERCASE(fc))
2176  #endif  #endif
2177              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2178            }            }
# Line 2246  for (;;) Line 2263  for (;;)
2263  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2264          unsigned int othercase;          unsigned int othercase;
2265          if ((ims & PCRE_CASELESS) != 0 &&          if ((ims & PCRE_CASELESS) != 0 &&
2266              (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)              (othercase = UCD_OTHERCASE(fc)) != fc)
2267            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = _pcre_ord2utf8(othercase, occhars);
2268          else oclength = 0;          else oclength = 0;
2269  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
# Line 2851  for (;;) Line 2868  for (;;)
2868              {              {
2869              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2870              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
2871              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
2872              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
2873                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
2874                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
# Line 2864  for (;;) Line 2881  for (;;)
2881              {              {
2882              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2883              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
2884              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
2885              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
2886                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2887              }              }
# Line 2875  for (;;) Line 2892  for (;;)
2892              {              {
2893              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2894              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
2895              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
2896              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
2897                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2898              }              }
# Line 2886  for (;;) Line 2903  for (;;)
2903              {              {
2904              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2905              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
2906              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_script = UCD_SCRIPT(c);
2907              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
2908                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2909              }              }
# Line 2905  for (;;) Line 2922  for (;;)
2922          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2923            {            {
2924            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
2925            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
2926            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
2927            while (eptr < md->end_subject)            while (eptr < md->end_subject)
2928              {              {
# Line 2914  for (;;) Line 2931  for (;;)
2931                {                {
2932                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
2933                }                }
2934              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
2935              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
2936              eptr += len;              eptr += len;
2937              }              }
# Line 2932  for (;;) Line 2949  for (;;)
2949          case OP_ANY:          case OP_ANY:
2950          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2951            {            {
2952            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject || IS_NEWLINE(eptr))
                ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))  
2953              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2954            eptr++;            eptr++;
2955            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2956            }            }
2957          break;          break;
2958    
2959            case OP_ALLANY:
2960            for (i = 1; i <= min; i++)
2961              {
2962              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2963              eptr++;
2964              while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2965              }
2966            break;
2967    
2968          case OP_ANYBYTE:          case OP_ANYBYTE:
2969          eptr += min;          eptr += min;
2970          break;          break;
# Line 2955  for (;;) Line 2980  for (;;)
2980              case 0x000d:              case 0x000d:
2981              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2982              break;              break;
2983    
2984              case 0x000a:              case 0x000a:
2985                break;
2986    
2987              case 0x000b:              case 0x000b:
2988              case 0x000c:              case 0x000c:
2989              case 0x0085:              case 0x0085:
2990              case 0x2028:              case 0x2028:
2991              case 0x2029:              case 0x2029:
2992                if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
2993              break;              break;
2994              }              }
2995            }            }
# Line 3094  for (;;) Line 3123  for (;;)
3123          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3124            {            {
3125            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject ||
3126               (*eptr < 128 && (md->ctypes[*eptr++] & ctype_space) != 0))               (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0))
3127              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3128            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3129            }            }
3130          break;          break;
3131    
# Line 3114  for (;;) Line 3143  for (;;)
3143          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3144            {            {
3145            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject ||
3146               (*eptr < 128 && (md->ctypes[*eptr++] & ctype_word) != 0))               (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0))
3147              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3148            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3149            }            }
3150          break;          break;
3151    
# Line 3144  for (;;) Line 3173  for (;;)
3173        switch(ctype)        switch(ctype)
3174          {          {
3175          case OP_ANY:          case OP_ANY:
3176          if ((ims & PCRE_DOTALL) == 0)          for (i = 1; i <= min; i++)
3177            {            {
3178            for (i = 1; i <= min; i++)            if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3179              {            eptr++;
             if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);  
             eptr++;  
             }  
3180            }            }
3181          else eptr += min;          break;
3182    
3183            case OP_ALLANY:
3184            eptr += min;
3185          break;          break;
3186    
3187          case OP_ANYBYTE:          case OP_ANYBYTE:
# Line 3173  for (;;) Line 3202  for (;;)
3202              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3203              break;              break;
3204              case 0x000a:              case 0x000a:
3205                break;
3206    
3207              case 0x000b:              case 0x000b:
3208              case 0x000c:              case 0x000c:
3209              case 0x0085:              case 0x0085:
3210                if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3211              break;              break;
3212              }              }
3213            }            }
# Line 3315  for (;;) Line 3347  for (;;)
3347              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3348              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3349              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3350              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
3351              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
3352                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
3353                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
# Line 3330  for (;;) Line 3362  for (;;)
3362              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3363              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3364              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3365              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
3366              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
3367                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3368              }              }
# Line 3343  for (;;) Line 3375  for (;;)
3375              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3376              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3377              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3378              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
3379              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
3380                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3381              }              }
# Line 3356  for (;;) Line 3388  for (;;)
3388              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3389              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3390              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3391              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_script = UCD_SCRIPT(c);
3392              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
3393                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3394              }              }
# Line 3378  for (;;) Line 3410  for (;;)
3410            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3411            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3412            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3413            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
3414            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3415            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3416              {              {
# Line 3387  for (;;) Line 3419  for (;;)
3419                {                {
3420                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3421                }                }
3422              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
3423              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3424              eptr += len;              eptr += len;
3425              }              }
# Line 3406  for (;;) Line 3438  for (;;)
3438            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
3439            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3440            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max || eptr >= md->end_subject ||
3441                 (ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&                 (ctype == OP_ANY && IS_NEWLINE(eptr)))
                 IS_NEWLINE(eptr)))  
3442              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3443    
3444            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3445            switch(ctype)            switch(ctype)
3446              {              {
3447              case OP_ANY:        /* This is the DOTALL case */              case OP_ANY:        /* This is the non-NL case */
3448              break;              case OP_ALLANY:
   
3449              case OP_ANYBYTE:              case OP_ANYBYTE:
3450              break;              break;
3451    
# Line 3427  for (;;) Line 3457  for (;;)
3457                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3458                break;                break;
3459                case 0x000a:                case 0x000a:
3460                  break;
3461    
3462                case 0x000b:                case 0x000b:
3463                case 0x000c:                case 0x000c:
3464                case 0x0085:                case 0x0085:
3465                case 0x2028:                case 0x2028:
3466                case 0x2029:                case 0x2029:
3467                  if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3468                break;                break;
3469                }                }
3470              break;              break;
# Line 3564  for (;;) Line 3597  for (;;)
3597            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
3598            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3599            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max || eptr >= md->end_subject ||
3600                 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))                 (ctype == OP_ANY && IS_NEWLINE(eptr)))
3601              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3602    
3603            c = *eptr++;            c = *eptr++;
3604            switch(ctype)            switch(ctype)
3605              {              {
3606              case OP_ANY:   /* This is the DOTALL case */              case OP_ANY:     /* This is the non-NL case */
3607              break;              case OP_ALLANY:
   
3608              case OP_ANYBYTE:              case OP_ANYBYTE:
3609              break;              break;
3610    
# Line 3583  for (;;) Line 3615  for (;;)
3615                case 0x000d:                case 0x000d:
3616                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3617                break;                break;
3618    
3619                case 0x000a:                case 0x000a:
3620                  break;
3621    
3622                case 0x000b:                case 0x000b:
3623                case 0x000c:                case 0x000c:
3624                case 0x0085:                case 0x0085:
3625                  if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3626                break;                break;
3627                }                }
3628              break;              break;
# Line 3701  for (;;) Line 3737  for (;;)
3737              int len = 1;              int len = 1;
3738              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject) break;
3739              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
3740              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
3741              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
3742                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
3743                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
# Line 3716  for (;;) Line 3752  for (;;)
3752              int len = 1;              int len = 1;
3753              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject) break;
3754              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
3755              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
3756              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
3757                break;                break;
3758              eptr+= len;              eptr+= len;
# Line 3729  for (;;) Line 3765  for (;;)
3765              int len = 1;              int len = 1;
3766              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject) break;
3767              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
3768              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
3769              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
3770                break;                break;
3771              eptr+= len;              eptr+= len;
# Line 3742  for (;;) Line 3778  for (;;)
3778              int len = 1;              int len = 1;
3779              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject) break;
3780              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
3781              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_script = UCD_SCRIPT(c);
3782              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
3783                break;                break;
3784              eptr+= len;              eptr+= len;
# Line 3771  for (;;) Line 3807  for (;;)
3807            {            {
3808            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject) break;
3809            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3810            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
3811            if (prop_category == ucp_M) break;            if (prop_category == ucp_M) break;
3812            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3813              {              {
# Line 3780  for (;;) Line 3816  for (;;)
3816                {                {
3817                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3818                }                }
3819              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
3820              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3821              eptr += len;              eptr += len;
3822              }              }
# Line 3802  for (;;) Line 3838  for (;;)
3838                BACKCHAR(eptr);                BACKCHAR(eptr);
3839                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3840                }                }
3841              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
3842              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3843              eptr--;              eptr--;
3844              }              }
# Line 3822  for (;;) Line 3858  for (;;)
3858            case OP_ANY:            case OP_ANY:
3859            if (max < INT_MAX)            if (max < INT_MAX)
3860              {              {
3861              if ((ims & PCRE_DOTALL) == 0)              for (i = min; i < max; i++)
               {  
               for (i = min; i < max; i++)  
                 {  
                 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;  
                 eptr++;  
                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
                 }  
               }  
             else  
3862                {                {
3863                for (i = min; i < max; i++)                if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3864                  {                eptr++;
3865                  if (eptr >= md->end_subject) break;                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                 eptr++;  
                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
                 }  
3866                }                }
3867              }              }
3868    
# Line 3846  for (;;) Line 3870  for (;;)
3870    
3871            else            else
3872              {              {
3873              if ((ims & PCRE_DOTALL) == 0)              for (i = min; i < max; i++)
3874                {                {
3875                for (i = min; i < max; i++)                if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3876                  {                eptr++;
3877                  if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                 eptr++;  
                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
                 }  
3878                }                }
3879              else              }
3880              break;
3881    
3882              case OP_ALLANY:
3883              if (max < INT_MAX)
3884                {
3885                for (i = min; i < max; i++)
3886                {                {
3887                eptr = md->end_subject;                if (eptr >= md->end_subject) break;
3888                  eptr++;
3889                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3890                }                }
3891              }              }
3892              else eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
3893            break;            break;
3894    
3895            /* The byte case is the same as non-UTF8 */            /* The byte case is the same as non-UTF8 */
# Line 3884  for (;;) Line 3914  for (;;)
3914                }                }
3915              else              else
3916                {                {
3917                if (c != 0x000a && c != 0x000b && c != 0x000c &&                if (c != 0x000a &&
3918                    c != 0x0085 && c != 0x2028 && c != 0x2029)                    (md->bsr_anycrlf ||
3919                       (c != 0x000b && c != 0x000c &&
3920                        c != 0x0085 && c != 0x2028 && c != 0x2029)))
3921                  break;                  break;
3922                eptr += len;                eptr += len;
3923                }                }
# Line 4045  for (;;) Line 4077  for (;;)
4077          switch(ctype)          switch(ctype)
4078            {            {
4079            case OP_ANY:            case OP_ANY:
4080            if ((ims & PCRE_DOTALL) == 0)            for (i = min; i < max; i++)
4081              {              {
4082              for (i = min; i < max; i++)              if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
4083                {              eptr++;
               if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;  
               eptr++;  
               }  
             break;  
4084              }              }
4085            /* For DOTALL case, fall through and treat as \C */            break;
4086    
4087              case OP_ALLANY:
4088            case OP_ANYBYTE:            case OP_ANYBYTE:
4089            c = max - min;            c = max - min;
4090            if (c > (unsigned int)(md->end_subject - eptr))            if (c > (unsigned int)(md->end_subject - eptr))
# Line 4075  for (;;) Line 4104  for (;;)
4104                }                }
4105              else              else
4106                {                {
4107                if (c != 0x000a && c != 0x000b && c != 0x000c && c != 0x0085)                if (c != 0x000a &&
4108                      (md->bsr_anycrlf ||
4109                        (c != 0x000b && c != 0x000c && c != 0x0085)))
4110                  break;                  break;
4111                eptr++;                eptr++;
4112                }                }
# Line 4225  HEAP_RETURN: Line 4256  HEAP_RETURN:
4256  switch (frame->Xwhere)  switch (frame->Xwhere)
4257    {    {
4258    LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)    LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
4259    LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(16)    LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
4260    LBL(17) LBL(18) LBL(19) LBL(20) LBL(21) LBL(22) LBL(23) LBL(24)    LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
4261    LBL(25) LBL(26) LBL(27) LBL(28) LBL(29) LBL(30) LBL(31) LBL(32)    LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
4262    LBL(33) LBL(34) LBL(35) LBL(36) LBL(37) LBL(38) LBL(39) LBL(40)    LBL(53) LBL(54)
4263    LBL(41) LBL(42) LBL(43) LBL(44) LBL(45) LBL(46) LBL(47)  #ifdef SUPPORT_UTF8
4264      LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)
4265      LBL(32) LBL(34) LBL(42) LBL(46)
4266    #ifdef SUPPORT_UCP
4267      LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
4268    #endif  /* SUPPORT_UCP */
4269    #endif  /* SUPPORT_UTF8 */
4270    default:    default:
4271    DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));    DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
4272    return PCRE_ERROR_INTERNAL;    return PCRE_ERROR_INTERNAL;
# Line 4408  if (re->magic_number != MAGIC_NUMBER) Line 4445  if (re->magic_number != MAGIC_NUMBER)
4445  /* Set up other data */  /* Set up other data */
4446    
4447  anchored = ((re->options | options) & PCRE_ANCHORED) != 0;  anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
4448  startline = (re->options & PCRE_STARTLINE) != 0;  startline = (re->flags & PCRE_STARTLINE) != 0;
4449  firstline = (re->options & PCRE_FIRSTLINE) != 0;  firstline = (re->options & PCRE_FIRSTLINE) != 0;
4450    
4451  /* The code starts after the real_pcre block and the capture name table. */  /* The code starts after the real_pcre block and the capture name table. */
# Line 4423  end_subject = md->end_subject; Line 4460  end_subject = md->end_subject;
4460    
4461  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
4462  utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;  utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
4463    md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
4464    
4465  md->notbol = (options & PCRE_NOTBOL) != 0;  md->notbol = (options & PCRE_NOTBOL) != 0;
4466  md->noteol = (options & PCRE_NOTEOL) != 0;  md->noteol = (options & PCRE_NOTEOL) != 0;
# Line 4435  md->recursive = NULL; Line 4473  md->recursive = NULL;
4473  md->lcc = tables + lcc_offset;  md->lcc = tables + lcc_offset;
4474  md->ctypes = tables + ctypes_offset;  md->ctypes = tables + ctypes_offset;
4475    
4476    /* Handle different \R options. */
4477    
4478    switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
4479      {
4480      case 0:
4481      if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
4482        md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
4483      else
4484    #ifdef BSR_ANYCRLF
4485      md->bsr_anycrlf = TRUE;
4486    #else
4487      md->bsr_anycrlf = FALSE;
4488    #endif
4489      break;
4490    
4491      case PCRE_BSR_ANYCRLF:
4492      md->bsr_anycrlf = TRUE;
4493      break;
4494    
4495      case PCRE_BSR_UNICODE:
4496      md->bsr_anycrlf = FALSE;
4497      break;
4498    
4499      default: return PCRE_ERROR_BADNEWLINE;
4500      }
4501    
4502  /* Handle different types of newline. The three bits give eight cases. If  /* Handle different types of newline. The three bits give eight cases. If
4503  nothing is set at run time, whatever was used at compile time applies. */  nothing is set at run time, whatever was used at compile time applies. */
4504    
4505  switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &  switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
4506         PCRE_NEWLINE_BITS)          (pcre_uint32)options) & PCRE_NEWLINE_BITS)
4507    {    {
4508    case 0: newline = NEWLINE; break;   /* Compile-time default */    case 0: newline = NEWLINE; break;   /* Compile-time default */
4509    case PCRE_NEWLINE_CR: newline = '\r'; break;    case PCRE_NEWLINE_CR: newline = '\r'; break;
# Line 4478  else Line 4542  else
4542  /* Partial matching is supported only for a restricted set of regexes at the  /* Partial matching is supported only for a restricted set of regexes at the
4543  moment. */  moment. */
4544    
4545  if (md->partial && (re->options & PCRE_NOPARTIAL) != 0)  if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
4546    return PCRE_ERROR_BADPARTIAL;    return PCRE_ERROR_BADPARTIAL;
4547    
4548  /* Check a UTF-8 string if required. Unfortunately there's no way of passing  /* Check a UTF-8 string if required. Unfortunately there's no way of passing
# Line 4555  studied, there may be a bitmap of possib Line 4619  studied, there may be a bitmap of possib
4619    
4620  if (!anchored)  if (!anchored)
4621    {    {
4622    if ((re->options & PCRE_FIRSTSET) != 0)    if ((re->flags & PCRE_FIRSTSET) != 0)
4623      {      {
4624      first_byte = re->first_byte & 255;      first_byte = re->first_byte & 255;
4625      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
# Line 4570  if (!anchored) Line 4634  if (!anchored)
4634  /* For anchored or unanchored matches, there may be a "last known required  /* For anchored or unanchored matches, there may be a "last known required
4635  character" set. */  character" set. */
4636    
4637  if ((re->options & PCRE_REQCHSET) != 0)  if ((re->flags & PCRE_REQCHSET) != 0)
4638    {    {
4639    req_byte = re->req_byte & 255;    req_byte = re->req_byte & 255;
4640    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
# Line 4586  the loop runs just once. */ Line 4650  the loop runs just once. */
4650  for(;;)  for(;;)
4651    {    {
4652    USPTR save_end_subject = end_subject;    USPTR save_end_subject = end_subject;
4653    USPTR new_start_match;    USPTR new_start_match;
4654    
4655    /* Reset the maximum number of extractions we might see. */    /* Reset the maximum number of extractions we might see. */
4656    
# Line 4617  for(;;) Line 4681  for(;;)
4681      if (first_byte_caseless)      if (first_byte_caseless)
4682        while (start_match < end_subject &&        while (start_match < end_subject &&
4683               md->lcc[*start_match] != first_byte)               md->lcc[*start_match] != first_byte)
4684          start_match++;          { NEXTCHAR(start_match); }
4685      else      else
4686        while (start_match < end_subject && *start_match != first_byte)        while (start_match < end_subject && *start_match != first_byte)
4687          start_match++;          { NEXTCHAR(start_match); }
4688      }      }
4689    
4690    /* Or to just after a linebreak for a multiline match if possible */    /* Or to just after a linebreak for a multiline match if possible */
# Line 4630  for(;;) Line 4694  for(;;)
4694      if (start_match > md->start_subject + start_offset)      if (start_match > md->start_subject + start_offset)
4695        {        {
4696        while (start_match <= end_subject && !WAS_NEWLINE(start_match))        while (start_match <= end_subject && !WAS_NEWLINE(start_match))
4697          start_match++;          { NEXTCHAR(start_match); }
4698    
4699        /* If we have just passed a CR and the newline option is ANY or ANYCRLF,        /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
4700        and we are now at a LF, advance the match position by one more character.        and we are now at a LF, advance the match position by one more character.
# Line 4651  for(;;) Line 4715  for(;;)
4715      while (start_match < end_subject)      while (start_match < end_subject)
4716        {        {
4717        register unsigned int c = *start_match;        register unsigned int c = *start_match;
4718        if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++; else break;        if ((start_bits[c/8] & (1 << (c&7))) == 0)
4719            { NEXTCHAR(start_match); }
4720          else break;
4721        }        }
4722      }      }
4723    
# Line 4730  for(;;) Line 4796  for(;;)
4796    md->start_match_ptr = start_match;    md->start_match_ptr = start_match;
4797    md->match_call_count = 0;    md->match_call_count = 0;
4798    rc = match(start_match, md->start_code, start_match, 2, md, ims, NULL, 0, 0);    rc = match(start_match, md->start_code, start_match, 2, md, ims, NULL, 0, 0);
4799    
4800    switch(rc)    switch(rc)
4801      {      {
4802      /* NOMATCH and PRUNE advance by one character. THEN at this level acts      /* NOMATCH and PRUNE advance by one character. THEN at this level acts
4803      exactly like PRUNE. */      exactly like PRUNE. */
4804    
4805      case MATCH_NOMATCH:      case MATCH_NOMATCH:
4806      case MATCH_PRUNE:      case MATCH_PRUNE:
4807      case MATCH_THEN:      case MATCH_THEN:
4808      new_start_match = start_match + 1;      new_start_match = start_match + 1;
4809  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
4810      if (utf8)      if (utf8)
4811        while(new_start_match < end_subject && (*new_start_match & 0xc0) == 0x80)        while(new_start_match < end_subject && (*new_start_match & 0xc0) == 0x80)
4812          new_start_match++;          new_start_match++;
4813  #endif  #endif
4814      break;      break;
4815    
4816      /* SKIP passes back the next starting point explicitly. */      /* SKIP passes back the next starting point explicitly. */
4817    
4818      case MATCH_SKIP:      case MATCH_SKIP:
4819      new_start_match = md->start_match_ptr;      new_start_match = md->start_match_ptr;
4820      break;      break;
4821    
4822      /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */      /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
4823    
4824      case MATCH_COMMIT:      case MATCH_COMMIT:
4825      rc = MATCH_NOMATCH;      rc = MATCH_NOMATCH;
4826      goto ENDLOOP;      goto ENDLOOP;
4827    
4828      /* Any other return is some kind of error. */      /* Any other return is some kind of error. */
4829    
4830      default:      default:
4831      goto ENDLOOP;      goto ENDLOOP;
4832      }      }
4833    
4834    /* Control reaches here for the various types of "no match at this point"    /* Control reaches here for the various types of "no match at this point"
4835    result. Reset the code to MATCH_NOMATCH for subsequent checking. */    result. Reset the code to MATCH_NOMATCH for subsequent checking. */
4836    
4837    rc = MATCH_NOMATCH;    rc = MATCH_NOMATCH;
4838    
4839    /* If PCRE_FIRSTLINE is set, the match must happen before or at the first    /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
4840    newline in the subject (though it may continue over the newline). Therefore,    newline in the subject (though it may continue over the newline). Therefore,
# Line 4785  for(;;) Line 4851  for(;;)
4851    
4852    if (anchored || start_match > end_subject) break;    if (anchored || start_match > end_subject) break;
4853    
4854    /* If we have just passed a CR and the newline option is CRLF or ANY or    /* If we have just passed a CR and we are now at a LF, and the pattern does
4855    ANYCRLF, and we are now at a LF, advance the match position by one more    not contain any explicit matches for \r or \n, and the newline option is CRLF
4856    character. */    or ANY or ANYCRLF, advance the match position by one more character. */
4857    
4858    if (start_match[-1] == '\r' &&    if (start_match[-1] == '\r' &&
4859         (md->nltype == NLTYPE_ANY ||        start_match < end_subject &&
4860          md->nltype == NLTYPE_ANYCRLF ||        *start_match == '\n' &&
4861          md->nllen == 2) &&        (re->flags & PCRE_HASCRORLF) == 0 &&
4862         start_match < end_subject &&          (md->nltype == NLTYPE_ANY ||
4863         *start_match == '\n')           md->nltype == NLTYPE_ANYCRLF ||
4864             md->nllen == 2))
4865      start_match++;      start_match++;
4866    
4867    }   /* End of for(;;) "bumpalong" loop */    }   /* End of for(;;) "bumpalong" loop */

Legend:
Removed from v.210  
changed lines
  Added in v.349

  ViewVC Help
Powered by ViewVC 1.1.5