/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 335 by ph10, Sat Apr 12 14:36:14 2008 UTC revision 381 by ph10, Tue Mar 3 16:08:23 2009 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2008 University of Cambridge             Copyright (c) 1997-2009 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 158  printf("\n"); Line 158  printf("\n");
158    
159  if (length > md->end_subject - eptr) return FALSE;  if (length > md->end_subject - eptr) return FALSE;
160    
161  /* Separate the caselesss case for speed */  /* Separate the caseless case for speed. In UTF-8 mode we can only do this
162    properly if Unicode properties are supported. Otherwise, we can check only
163    ASCII characters. */
164    
165  if ((ims & PCRE_CASELESS) != 0)  if ((ims & PCRE_CASELESS) != 0)
166    {    {
167    #ifdef SUPPORT_UTF8
168    #ifdef SUPPORT_UCP
169      if (md->utf8)
170        {
171        USPTR endptr = eptr + length;
172        while (eptr < endptr)
173          {
174          int c, d;
175          GETCHARINC(c, eptr);
176          GETCHARINC(d, p);
177          if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
178          }
179        }
180      else
181    #endif
182    #endif
183    
184      /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
185      is no UCP support. */
186    
187    while (length-- > 0)    while (length-- > 0)
188      if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;      { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
189    }    }
190    
191    /* In the caseful case, we can just compare the bytes, whether or not we
192    are in UTF-8 mode. */
193    
194  else  else
195    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
196    
# Line 761  for (;;) Line 787  for (;;)
787    
788      case OP_COND:      case OP_COND:
789      case OP_SCOND:      case OP_SCOND:
790        /* Because of the way auto-callout works during compile, a callout item is
791        inserted between OP_COND and an assertion condition. */
792    
793        if (ecode[LINK_SIZE+1] == OP_CALLOUT)
794          {
795          if (pcre_callout != NULL)
796            {
797            pcre_callout_block cb;
798            cb.version          = 1;   /* Version 1 of the callout block */
799            cb.callout_number   = ecode[LINK_SIZE+2];
800            cb.offset_vector    = md->offset_vector;
801            cb.subject          = (PCRE_SPTR)md->start_subject;
802            cb.subject_length   = md->end_subject - md->start_subject;
803            cb.start_match      = mstart - md->start_subject;
804            cb.current_position = eptr - md->start_subject;
805            cb.pattern_position = GET(ecode, LINK_SIZE + 3);
806            cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
807            cb.capture_top      = offset_top/2;
808            cb.capture_last     = md->capture_last;
809            cb.callout_data     = md->callout_data;
810            if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
811            if (rrc < 0) RRETURN(rrc);
812            }
813          ecode += _pcre_OP_lengths[OP_CALLOUT];
814          }
815    
816        /* Now see what the actual condition is */
817    
818      if (ecode[LINK_SIZE+1] == OP_RREF)         /* Recursion test */      if (ecode[LINK_SIZE+1] == OP_RREF)         /* Recursion test */
819        {        {
820        offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/        offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/
# Line 1429  for (;;) Line 1483  for (;;)
1483      /* Match a single character type; inline for speed */      /* Match a single character type; inline for speed */
1484    
1485      case OP_ANY:      case OP_ANY:
1486      if ((ims & PCRE_DOTALL) == 0)      if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1487        {      /* Fall through */
1488        if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);  
1489        }      case OP_ALLANY:
1490      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
1491      if (utf8)      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
       while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
1492      ecode++;      ecode++;
1493      break;      break;
1494    
# Line 1654  for (;;) Line 1707  for (;;)
1707      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1708      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1709        {        {
1710        int chartype, script;        const ucd_record * prop = GET_UCD(c);
       int category = _pcre_ucp_findprop(c, &chartype, &script);  
1711    
1712        switch(ecode[1])        switch(ecode[1])
1713          {          {
# Line 1664  for (;;) Line 1716  for (;;)
1716          break;          break;
1717    
1718          case PT_LAMP:          case PT_LAMP:
1719          if ((chartype == ucp_Lu ||          if ((prop->chartype == ucp_Lu ||
1720               chartype == ucp_Ll ||               prop->chartype == ucp_Ll ||
1721               chartype == ucp_Lt) == (op == OP_NOTPROP))               prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
1722            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1723           break;           break;
1724    
1725          case PT_GC:          case PT_GC:
1726          if ((ecode[2] != category) == (op == OP_PROP))          if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
1727            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1728          break;          break;
1729    
1730          case PT_PC:          case PT_PC:
1731          if ((ecode[2] != chartype) == (op == OP_PROP))          if ((ecode[2] != prop->chartype) == (op == OP_PROP))
1732            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1733          break;          break;
1734    
1735          case PT_SC:          case PT_SC:
1736          if ((ecode[2] != script) == (op == OP_PROP))          if ((ecode[2] != prop->script) == (op == OP_PROP))
1737            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1738          break;          break;
1739    
# Line 1700  for (;;) Line 1752  for (;;)
1752      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1753      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1754        {        {
1755        int chartype, script;        int category = UCD_CATEGORY(c);
       int category = _pcre_ucp_findprop(c, &chartype, &script);  
1756        if (category == ucp_M) RRETURN(MATCH_NOMATCH);        if (category == ucp_M) RRETURN(MATCH_NOMATCH);
1757        while (eptr < md->end_subject)        while (eptr < md->end_subject)
1758          {          {
# Line 1710  for (;;) Line 1761  for (;;)
1761            {            {
1762            GETCHARLEN(c, eptr, len);            GETCHARLEN(c, eptr, len);
1763            }            }
1764          category = _pcre_ucp_findprop(c, &chartype, &script);          category = UCD_CATEGORY(c);
1765          if (category != ucp_M) break;          if (category != ucp_M) break;
1766          eptr += len;          eptr += len;
1767          }          }
# Line 1731  for (;;) Line 1782  for (;;)
1782      case OP_REF:      case OP_REF:
1783        {        {
1784        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
1785        ecode += 3;                                 /* Advance past item */        ecode += 3;
1786    
1787          /* If the reference is unset, there are two possibilities:
1788    
1789          (a) In the default, Perl-compatible state, set the length to be longer
1790          than the amount of subject left; this ensures that every attempt at a
1791          match fails. We can't just fail here, because of the possibility of
1792          quantifiers with zero minima.
1793    
1794        /* If the reference is unset, set the length to be longer than the amount        (b) If the JavaScript compatibility flag is set, set the length to zero
1795        of subject left; this ensures that every attempt at a match fails. We        so that the back reference matches an empty string.
1796        can't just fail here, because of the possibility of quantifiers with zero  
1797        minima. */        Otherwise, set the length to the length of what was matched by the
1798          referenced subpattern. */
1799        length = (offset >= offset_top || md->offset_vector[offset] < 0)?  
1800          md->end_subject - eptr + 1 :        if (offset >= offset_top || md->offset_vector[offset] < 0)
1801          md->offset_vector[offset+1] - md->offset_vector[offset];          length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;
1802          else
1803            length = md->offset_vector[offset+1] - md->offset_vector[offset];
1804    
1805        /* Set up for repetition, or handle the non-repeated case */        /* Set up for repetition, or handle the non-repeated case */
1806    
# Line 2166  for (;;) Line 2226  for (;;)
2226          if (fc != dc)          if (fc != dc)
2227            {            {
2228  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2229            if (dc != _pcre_ucp_othercase(fc))            if (dc != UCD_OTHERCASE(fc))
2230  #endif  #endif
2231              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2232            }            }
# Line 2257  for (;;) Line 2317  for (;;)
2317  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2318          unsigned int othercase;          unsigned int othercase;
2319          if ((ims & PCRE_CASELESS) != 0 &&          if ((ims & PCRE_CASELESS) != 0 &&
2320              (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)              (othercase = UCD_OTHERCASE(fc)) != fc)
2321            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = _pcre_ord2utf8(othercase, occhars);
2322          else oclength = 0;          else oclength = 0;
2323  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
# Line 2577  for (;;) Line 2637  for (;;)
2637              {              {
2638              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
2639              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2640                if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2641              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
2642              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
2643              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) RRETURN(MATCH_NOMATCH);
2644                RRETURN(MATCH_NOMATCH);  
2645              }              }
2646            }            }
2647          else          else
# Line 2686  for (;;) Line 2747  for (;;)
2747              {              {
2748              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
2749              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2750                if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2751              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
2752              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) RRETURN(MATCH_NOMATCH);
               RRETURN(MATCH_NOMATCH);  
2753              }              }
2754            }            }
2755          else          else
# Line 2862  for (;;) Line 2923  for (;;)
2923              {              {
2924              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2925              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
2926              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
2927              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
2928                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
2929                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
# Line 2875  for (;;) Line 2936  for (;;)
2936              {              {
2937              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2938              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
2939              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
2940              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
2941                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2942              }              }
# Line 2886  for (;;) Line 2947  for (;;)
2947              {              {
2948              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2949              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
2950              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
2951              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
2952                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2953              }              }
# Line 2897  for (;;) Line 2958  for (;;)
2958              {              {
2959              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2960              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
2961              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_script = UCD_SCRIPT(c);
2962              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
2963                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2964              }              }
# Line 2916  for (;;) Line 2977  for (;;)
2977          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2978            {            {
2979            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
2980            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
2981            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
2982            while (eptr < md->end_subject)            while (eptr < md->end_subject)
2983              {              {
# Line 2925  for (;;) Line 2986  for (;;)
2986                {                {
2987                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
2988                }                }
2989              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
2990              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
2991              eptr += len;              eptr += len;
2992              }              }
# Line 2943  for (;;) Line 3004  for (;;)
3004          case OP_ANY:          case OP_ANY:
3005          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3006            {            {
3007            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject || IS_NEWLINE(eptr))
                ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))  
3008              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3009            eptr++;            eptr++;
3010            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3011            }            }
3012          break;          break;
3013    
3014            case OP_ALLANY:
3015            for (i = 1; i <= min; i++)
3016              {
3017              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3018              eptr++;
3019              while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3020              }
3021            break;
3022    
3023          case OP_ANYBYTE:          case OP_ANYBYTE:
3024          eptr += min;          eptr += min;
3025          break;          break;
# Line 3159  for (;;) Line 3228  for (;;)
3228        switch(ctype)        switch(ctype)
3229          {          {
3230          case OP_ANY:          case OP_ANY:
3231          if ((ims & PCRE_DOTALL) == 0)          for (i = 1; i <= min; i++)
3232            {            {
3233            for (i = 1; i <= min; i++)            if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3234              {            eptr++;
             if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);  
             eptr++;  
             }  
3235            }            }
3236          else eptr += min;          break;
3237    
3238            case OP_ALLANY:
3239            eptr += min;
3240          break;          break;
3241    
3242          case OP_ANYBYTE:          case OP_ANYBYTE:
# Line 3333  for (;;) Line 3402  for (;;)
3402              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3403              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3404              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3405              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
3406              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
3407                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
3408                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
# Line 3348  for (;;) Line 3417  for (;;)
3417              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3418              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3419              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3420              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
3421              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
3422                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3423              }              }
# Line 3361  for (;;) Line 3430  for (;;)
3430              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3431              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3432              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3433              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
3434              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
3435                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3436              }              }
# Line 3374  for (;;) Line 3443  for (;;)
3443              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3444              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3445              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3446              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_script = UCD_SCRIPT(c);
3447              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
3448                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3449              }              }
# Line 3396  for (;;) Line 3465  for (;;)
3465            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3466            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3467            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3468            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
3469            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3470            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3471              {              {
# Line 3405  for (;;) Line 3474  for (;;)
3474                {                {
3475                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3476                }                }
3477              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
3478              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3479              eptr += len;              eptr += len;
3480              }              }
# Line 3424  for (;;) Line 3493  for (;;)
3493            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
3494            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3495            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max || eptr >= md->end_subject ||
3496                 (ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&                 (ctype == OP_ANY && IS_NEWLINE(eptr)))
                 IS_NEWLINE(eptr)))  
3497              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3498    
3499            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3500            switch(ctype)            switch(ctype)
3501              {              {
3502              case OP_ANY:        /* This is the DOTALL case */              case OP_ANY:        /* This is the non-NL case */
3503              break;              case OP_ALLANY:
   
3504              case OP_ANYBYTE:              case OP_ANYBYTE:
3505              break;              break;
3506    
# Line 3585  for (;;) Line 3652  for (;;)
3652            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
3653            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3654            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max || eptr >= md->end_subject ||
3655                 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))                 (ctype == OP_ANY && IS_NEWLINE(eptr)))
3656              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3657    
3658            c = *eptr++;            c = *eptr++;
3659            switch(ctype)            switch(ctype)
3660              {              {
3661              case OP_ANY:   /* This is the DOTALL case */              case OP_ANY:     /* This is the non-NL case */
3662              break;              case OP_ALLANY:
   
3663              case OP_ANYBYTE:              case OP_ANYBYTE:
3664              break;              break;
3665    
# Line 3726  for (;;) Line 3792  for (;;)
3792              int len = 1;              int len = 1;
3793              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject) break;
3794              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
3795              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
3796              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
3797                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
3798                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
# Line 3741  for (;;) Line 3807  for (;;)
3807              int len = 1;              int len = 1;
3808              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject) break;
3809              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
3810              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
3811              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
3812                break;                break;
3813              eptr+= len;              eptr+= len;
# Line 3754  for (;;) Line 3820  for (;;)
3820              int len = 1;              int len = 1;
3821              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject) break;
3822              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
3823              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
3824              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
3825                break;                break;
3826              eptr+= len;              eptr+= len;
# Line 3767  for (;;) Line 3833  for (;;)
3833              int len = 1;              int len = 1;
3834              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject) break;
3835              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
3836              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_script = UCD_SCRIPT(c);
3837              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
3838                break;                break;
3839              eptr+= len;              eptr+= len;
# Line 3796  for (;;) Line 3862  for (;;)
3862            {            {
3863            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject) break;
3864            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3865            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
3866            if (prop_category == ucp_M) break;            if (prop_category == ucp_M) break;
3867            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3868              {              {
# Line 3805  for (;;) Line 3871  for (;;)
3871                {                {
3872                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3873                }                }
3874              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
3875              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3876              eptr += len;              eptr += len;
3877              }              }
# Line 3827  for (;;) Line 3893  for (;;)
3893                BACKCHAR(eptr);                BACKCHAR(eptr);
3894                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3895                }                }
3896              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
3897              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3898              eptr--;              eptr--;
3899              }              }
# Line 3847  for (;;) Line 3913  for (;;)
3913            case OP_ANY:            case OP_ANY:
3914            if (max < INT_MAX)            if (max < INT_MAX)
3915              {              {
3916              if ((ims & PCRE_DOTALL) == 0)              for (i = min; i < max; i++)
               {  
               for (i = min; i < max; i++)  
                 {  
                 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;  
                 eptr++;  
                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
                 }  
               }  
             else  
3917                {                {
3918                for (i = min; i < max; i++)                if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3919                  {                eptr++;
3920                  if (eptr >= md->end_subject) break;                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                 eptr++;  
                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
                 }  
3921                }                }
3922              }              }
3923    
# Line 3871  for (;;) Line 3925  for (;;)
3925    
3926            else            else
3927              {              {
3928              if ((ims & PCRE_DOTALL) == 0)              for (i = min; i < max; i++)
3929                {                {
3930                for (i = min; i < max; i++)                if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3931                  {                eptr++;
3932                  if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                 eptr++;  
                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
                 }  
3933                }                }
3934              else              }
3935              break;
3936    
3937              case OP_ALLANY:
3938              if (max < INT_MAX)
3939                {
3940                for (i = min; i < max; i++)
3941                {                {
3942                eptr = md->end_subject;                if (eptr >= md->end_subject) break;
3943                  eptr++;
3944                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3945                }                }
3946              }              }
3947              else eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
3948            break;            break;
3949    
3950            /* The byte case is the same as non-UTF8 */            /* The byte case is the same as non-UTF8 */
# Line 4072  for (;;) Line 4132  for (;;)
4132          switch(ctype)          switch(ctype)
4133            {            {
4134            case OP_ANY:            case OP_ANY:
4135            if ((ims & PCRE_DOTALL) == 0)            for (i = min; i < max; i++)
4136              {              {
4137              for (i = min; i < max; i++)              if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
4138                {              eptr++;
               if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;  
               eptr++;  
               }  
             break;  
4139              }              }
4140            /* For DOTALL case, fall through and treat as \C */            break;
4141    
4142              case OP_ALLANY:
4143            case OP_ANYBYTE:            case OP_ANYBYTE:
4144            c = max - min;            c = max - min;
4145            if (c > (unsigned int)(md->end_subject - eptr))            if (c > (unsigned int)(md->end_subject - eptr))
# Line 4356  Returns:          > 0 => success; value Line 4413  Returns:          > 0 => success; value
4413                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
4414  */  */
4415    
4416  PCRE_EXP_DEFN int  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
4417  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
4418    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
4419    int offsetcount)    int offsetcount)
# Line 4458  end_subject = md->end_subject; Line 4515  end_subject = md->end_subject;
4515    
4516  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
4517  utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;  utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
4518    md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
4519    
4520  md->notbol = (options & PCRE_NOTBOL) != 0;  md->notbol = (options & PCRE_NOTBOL) != 0;
4521  md->noteol = (options & PCRE_NOTEOL) != 0;  md->noteol = (options & PCRE_NOTEOL) != 0;
# Line 4667  for(;;) Line 4725  for(;;)
4725    if (firstline)    if (firstline)
4726      {      {
4727      USPTR t = start_match;      USPTR t = start_match;
4728    #ifdef SUPPORT_UTF8
4729        if (utf8)
4730          {
4731          while (t < md->end_subject && !IS_NEWLINE(t))
4732            {
4733            t++;
4734            while (t < end_subject && (*t & 0xc0) == 0x80) t++;
4735            }
4736          }
4737        else
4738    #endif
4739      while (t < md->end_subject && !IS_NEWLINE(t)) t++;      while (t < md->end_subject && !IS_NEWLINE(t)) t++;
4740      end_subject = t;      end_subject = t;
4741      }      }
4742    
4743    /* Now test for a unique first byte */    /* Now advance to a unique first byte if there is one. */
4744    
4745    if (first_byte >= 0)    if (first_byte >= 0)
4746      {      {
4747      if (first_byte_caseless)      if (first_byte_caseless)
4748        while (start_match < end_subject &&        while (start_match < end_subject && md->lcc[*start_match] != first_byte)
4749               md->lcc[*start_match] != first_byte)          start_match++;
         { NEXTCHAR(start_match); }  
4750      else      else
4751        while (start_match < end_subject && *start_match != first_byte)        while (start_match < end_subject && *start_match != first_byte)
4752          { NEXTCHAR(start_match); }          start_match++;
4753      }      }
4754    
4755    /* Or to just after a linebreak for a multiline match if possible */    /* Or to just after a linebreak for a multiline match */
4756    
4757    else if (startline)    else if (startline)
4758      {      {
4759      if (start_match > md->start_subject + start_offset)      if (start_match > md->start_subject + start_offset)
4760        {        {
4761        while (start_match <= end_subject && !WAS_NEWLINE(start_match))  #ifdef SUPPORT_UTF8
4762          { NEXTCHAR(start_match); }        if (utf8)
4763            {
4764            while (start_match < end_subject && !WAS_NEWLINE(start_match))
4765              {
4766              start_match++;
4767              while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
4768                start_match++;
4769              }
4770            }
4771          else
4772    #endif
4773          while (start_match < end_subject && !WAS_NEWLINE(start_match))
4774            start_match++;
4775    
4776        /* If we have just passed a CR and the newline option is ANY or ANYCRLF,        /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
4777        and we are now at a LF, advance the match position by one more character.        and we are now at a LF, advance the match position by one more character.
# Line 4705  for(;;) Line 4785  for(;;)
4785        }        }
4786      }      }
4787    
4788    /* Or to a non-unique first char after study */    /* Or to a non-unique first byte after study */
4789    
4790    else if (start_bits != NULL)    else if (start_bits != NULL)
4791      {      {
4792      while (start_match < end_subject)      while (start_match < end_subject)
4793        {        {
4794        register unsigned int c = *start_match;        register unsigned int c = *start_match;
4795        if ((start_bits[c/8] & (1 << (c&7))) == 0)        if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++;
4796          { NEXTCHAR(start_match); }          else break;
       else break;  
4797        }        }
4798      }      }
4799    

Legend:
Removed from v.335  
changed lines
  Added in v.381

  ViewVC Help
Powered by ViewVC 1.1.5