/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 227 by ph10, Tue Aug 21 15:00:15 2007 UTC revision 349 by ph10, Wed Jul 2 18:42:11 2008 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2007 University of Cambridge             Copyright (c) 1997-2008 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 43  pattern matching using an NFA algorithm, Line 43  pattern matching using an NFA algorithm,
43  possible. There are also some static supporting functions. */  possible. There are also some static supporting functions. */
44    
45  #ifdef HAVE_CONFIG_H  #ifdef HAVE_CONFIG_H
46  #include <config.h>  #include "config.h"
47  #endif  #endif
48    
49  #define NLBLOCK md             /* Block containing newline information */  #define NLBLOCK md             /* Block containing newline information */
# Line 1148  for (;;) Line 1148  for (;;)
1148      do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1149      break;      break;
1150    
1151      /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating      /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1152      that it may occur zero times. It may repeat infinitely, or not at all -      indicating that it may occur zero times. It may repeat infinitely, or not
1153      i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper      at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1154      repeat limits are compiled as a number of copies, with the optional ones      with fixed upper repeat limits are compiled as a number of copies, with the
1155      preceded by BRAZERO or BRAMINZERO. */      optional ones preceded by BRAZERO or BRAMINZERO. */
1156    
1157      case OP_BRAZERO:      case OP_BRAZERO:
1158        {        {
# Line 1174  for (;;) Line 1174  for (;;)
1174        }        }
1175      break;      break;
1176    
1177        case OP_SKIPZERO:
1178          {
1179          next = ecode+1;
1180          do next += GET(next,1); while (*next == OP_ALT);
1181          ecode = next + 1 + LINK_SIZE;
1182          }
1183        break;
1184    
1185      /* End of a group, repeated or non-repeating. */      /* End of a group, repeated or non-repeating. */
1186    
1187      case OP_KET:      case OP_KET:
# Line 1421  for (;;) Line 1429  for (;;)
1429      /* Match a single character type; inline for speed */      /* Match a single character type; inline for speed */
1430    
1431      case OP_ANY:      case OP_ANY:
1432      if ((ims & PCRE_DOTALL) == 0)      if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1433        {      /* Fall through */
1434        if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);  
1435        }      case OP_ALLANY:
1436      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
1437      if (utf8)      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
       while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
1438      ecode++;      ecode++;
1439      break;      break;
1440    
# Line 1526  for (;;) Line 1533  for (;;)
1533        case 0x000d:        case 0x000d:
1534        if (eptr < md->end_subject && *eptr == 0x0a) eptr++;        if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1535        break;        break;
1536    
1537        case 0x000a:        case 0x000a:
1538          break;
1539    
1540        case 0x000b:        case 0x000b:
1541        case 0x000c:        case 0x000c:
1542        case 0x0085:        case 0x0085:
1543        case 0x2028:        case 0x2028:
1544        case 0x2029:        case 0x2029:
1545          if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
1546        break;        break;
1547        }        }
1548      ecode++;      ecode++;
# Line 1642  for (;;) Line 1653  for (;;)
1653      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1654      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1655        {        {
1656        int chartype, script;        const ucd_record * prop = GET_UCD(c);
       int category = _pcre_ucp_findprop(c, &chartype, &script);  
1657    
1658        switch(ecode[1])        switch(ecode[1])
1659          {          {
# Line 1652  for (;;) Line 1662  for (;;)
1662          break;          break;
1663    
1664          case PT_LAMP:          case PT_LAMP:
1665          if ((chartype == ucp_Lu ||          if ((prop->chartype == ucp_Lu ||
1666               chartype == ucp_Ll ||               prop->chartype == ucp_Ll ||
1667               chartype == ucp_Lt) == (op == OP_NOTPROP))               prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
1668            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1669           break;           break;
1670    
1671          case PT_GC:          case PT_GC:
1672          if ((ecode[2] != category) == (op == OP_PROP))          if ((ecode[2] != ucp_gentype[prop->chartype]) == (op == OP_PROP))
1673            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1674          break;          break;
1675    
1676          case PT_PC:          case PT_PC:
1677          if ((ecode[2] != chartype) == (op == OP_PROP))          if ((ecode[2] != prop->chartype) == (op == OP_PROP))
1678            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1679          break;          break;
1680    
1681          case PT_SC:          case PT_SC:
1682          if ((ecode[2] != script) == (op == OP_PROP))          if ((ecode[2] != prop->script) == (op == OP_PROP))
1683            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1684          break;          break;
1685    
# Line 1688  for (;;) Line 1698  for (;;)
1698      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1699      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1700        {        {
1701        int chartype, script;        int category = UCD_CATEGORY(c);
       int category = _pcre_ucp_findprop(c, &chartype, &script);  
1702        if (category == ucp_M) RRETURN(MATCH_NOMATCH);        if (category == ucp_M) RRETURN(MATCH_NOMATCH);
1703        while (eptr < md->end_subject)        while (eptr < md->end_subject)
1704          {          {
# Line 1698  for (;;) Line 1707  for (;;)
1707            {            {
1708            GETCHARLEN(c, eptr, len);            GETCHARLEN(c, eptr, len);
1709            }            }
1710          category = _pcre_ucp_findprop(c, &chartype, &script);          category = UCD_CATEGORY(c);
1711          if (category != ucp_M) break;          if (category != ucp_M) break;
1712          eptr += len;          eptr += len;
1713          }          }
# Line 1719  for (;;) Line 1728  for (;;)
1728      case OP_REF:      case OP_REF:
1729        {        {
1730        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
1731        ecode += 3;                                 /* Advance past item */        ecode += 3;
1732    
1733          /* If the reference is unset, there are two possibilities:
1734    
1735          (a) In the default, Perl-compatible state, set the length to be longer
1736          than the amount of subject left; this ensures that every attempt at a
1737          match fails. We can't just fail here, because of the possibility of
1738          quantifiers with zero minima.
1739    
1740        /* If the reference is unset, set the length to be longer than the amount        (b) If the JavaScript compatibility flag is set, set the length to zero
1741        of subject left; this ensures that every attempt at a match fails. We        so that the back reference matches an empty string.
1742        can't just fail here, because of the possibility of quantifiers with zero  
1743        minima. */        Otherwise, set the length to the length of what was matched by the
1744          referenced subpattern. */
1745        length = (offset >= offset_top || md->offset_vector[offset] < 0)?  
1746          md->end_subject - eptr + 1 :        if (offset >= offset_top || md->offset_vector[offset] < 0)
1747          md->offset_vector[offset+1] - md->offset_vector[offset];          length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;
1748          else
1749            length = md->offset_vector[offset+1] - md->offset_vector[offset];
1750    
1751        /* Set up for repetition, or handle the non-repeated case */        /* Set up for repetition, or handle the non-repeated case */
1752    
# Line 2154  for (;;) Line 2172  for (;;)
2172          if (fc != dc)          if (fc != dc)
2173            {            {
2174  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2175            if (dc != _pcre_ucp_othercase(fc))            if (dc != UCD_OTHERCASE(fc))
2176  #endif  #endif
2177              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2178            }            }
# Line 2245  for (;;) Line 2263  for (;;)
2263  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2264          unsigned int othercase;          unsigned int othercase;
2265          if ((ims & PCRE_CASELESS) != 0 &&          if ((ims & PCRE_CASELESS) != 0 &&
2266              (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)              (othercase = UCD_OTHERCASE(fc)) != fc)
2267            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = _pcre_ord2utf8(othercase, occhars);
2268          else oclength = 0;          else oclength = 0;
2269  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
# Line 2850  for (;;) Line 2868  for (;;)
2868              {              {
2869              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2870              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
2871              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
2872              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
2873                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
2874                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
# Line 2863  for (;;) Line 2881  for (;;)
2881              {              {
2882              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2883              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
2884              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
2885              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
2886                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2887              }              }
# Line 2874  for (;;) Line 2892  for (;;)
2892              {              {
2893              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2894              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
2895              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
2896              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
2897                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2898              }              }
# Line 2885  for (;;) Line 2903  for (;;)
2903              {              {
2904              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2905              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
2906              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_script = UCD_SCRIPT(c);
2907              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
2908                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2909              }              }
# Line 2904  for (;;) Line 2922  for (;;)
2922          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2923            {            {
2924            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
2925            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
2926            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
2927            while (eptr < md->end_subject)            while (eptr < md->end_subject)
2928              {              {
# Line 2913  for (;;) Line 2931  for (;;)
2931                {                {
2932                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
2933                }                }
2934              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
2935              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
2936              eptr += len;              eptr += len;
2937              }              }
# Line 2931  for (;;) Line 2949  for (;;)
2949          case OP_ANY:          case OP_ANY:
2950          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2951            {            {
2952            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject || IS_NEWLINE(eptr))
                ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))  
2953              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2954            eptr++;            eptr++;
2955            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2956            }            }
2957          break;          break;
2958    
2959            case OP_ALLANY:
2960            for (i = 1; i <= min; i++)
2961              {
2962              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2963              eptr++;
2964              while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2965              }
2966            break;
2967    
2968          case OP_ANYBYTE:          case OP_ANYBYTE:
2969          eptr += min;          eptr += min;
2970          break;          break;
# Line 2954  for (;;) Line 2980  for (;;)
2980              case 0x000d:              case 0x000d:
2981              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2982              break;              break;
2983    
2984              case 0x000a:              case 0x000a:
2985                break;
2986    
2987              case 0x000b:              case 0x000b:
2988              case 0x000c:              case 0x000c:
2989              case 0x0085:              case 0x0085:
2990              case 0x2028:              case 0x2028:
2991              case 0x2029:              case 0x2029:
2992                if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
2993              break;              break;
2994              }              }
2995            }            }
# Line 3143  for (;;) Line 3173  for (;;)
3173        switch(ctype)        switch(ctype)
3174          {          {
3175          case OP_ANY:          case OP_ANY:
3176          if ((ims & PCRE_DOTALL) == 0)          for (i = 1; i <= min; i++)
3177            {            {
3178            for (i = 1; i <= min; i++)            if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3179              {            eptr++;
             if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);  
             eptr++;  
             }  
3180            }            }
3181          else eptr += min;          break;
3182    
3183            case OP_ALLANY:
3184            eptr += min;
3185          break;          break;
3186    
3187          case OP_ANYBYTE:          case OP_ANYBYTE:
# Line 3172  for (;;) Line 3202  for (;;)
3202              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3203              break;              break;
3204              case 0x000a:              case 0x000a:
3205                break;
3206    
3207              case 0x000b:              case 0x000b:
3208              case 0x000c:              case 0x000c:
3209              case 0x0085:              case 0x0085:
3210                if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3211              break;              break;
3212              }              }
3213            }            }
# Line 3314  for (;;) Line 3347  for (;;)
3347              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3348              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3349              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3350              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
3351              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
3352                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
3353                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
# Line 3329  for (;;) Line 3362  for (;;)
3362              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3363              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3364              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3365              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
3366              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
3367                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3368              }              }
# Line 3342  for (;;) Line 3375  for (;;)
3375              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3376              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3377              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3378              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
3379              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
3380                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3381              }              }
# Line 3355  for (;;) Line 3388  for (;;)
3388              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3389              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3390              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3391              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_script = UCD_SCRIPT(c);
3392              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
3393                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3394              }              }
# Line 3377  for (;;) Line 3410  for (;;)
3410            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3411            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3412            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3413            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
3414            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3415            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3416              {              {
# Line 3386  for (;;) Line 3419  for (;;)
3419                {                {
3420                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3421                }                }
3422              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
3423              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3424              eptr += len;              eptr += len;
3425              }              }
# Line 3405  for (;;) Line 3438  for (;;)
3438            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
3439            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3440            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max || eptr >= md->end_subject ||
3441                 (ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&                 (ctype == OP_ANY && IS_NEWLINE(eptr)))
                 IS_NEWLINE(eptr)))  
3442              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3443    
3444            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3445            switch(ctype)            switch(ctype)
3446              {              {
3447              case OP_ANY:        /* This is the DOTALL case */              case OP_ANY:        /* This is the non-NL case */
3448              break;              case OP_ALLANY:
   
3449              case OP_ANYBYTE:              case OP_ANYBYTE:
3450              break;              break;
3451    
# Line 3426  for (;;) Line 3457  for (;;)
3457                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3458                break;                break;
3459                case 0x000a:                case 0x000a:
3460                  break;
3461    
3462                case 0x000b:                case 0x000b:
3463                case 0x000c:                case 0x000c:
3464                case 0x0085:                case 0x0085:
3465                case 0x2028:                case 0x2028:
3466                case 0x2029:                case 0x2029:
3467                  if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3468                break;                break;
3469                }                }
3470              break;              break;
# Line 3563  for (;;) Line 3597  for (;;)
3597            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
3598            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3599            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max || eptr >= md->end_subject ||
3600                 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))                 (ctype == OP_ANY && IS_NEWLINE(eptr)))
3601              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3602    
3603            c = *eptr++;            c = *eptr++;
3604            switch(ctype)            switch(ctype)
3605              {              {
3606              case OP_ANY:   /* This is the DOTALL case */              case OP_ANY:     /* This is the non-NL case */
3607              break;              case OP_ALLANY:
   
3608              case OP_ANYBYTE:              case OP_ANYBYTE:
3609              break;              break;
3610    
# Line 3582  for (;;) Line 3615  for (;;)
3615                case 0x000d:                case 0x000d:
3616                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3617                break;                break;
3618    
3619                case 0x000a:                case 0x000a:
3620                  break;
3621    
3622                case 0x000b:                case 0x000b:
3623                case 0x000c:                case 0x000c:
3624                case 0x0085:                case 0x0085:
3625                  if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3626                break;                break;
3627                }                }
3628              break;              break;
# Line 3700  for (;;) Line 3737  for (;;)
3737              int len = 1;              int len = 1;
3738              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject) break;
3739              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
3740              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
3741              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
3742                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
3743                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
# Line 3715  for (;;) Line 3752  for (;;)
3752              int len = 1;              int len = 1;
3753              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject) break;
3754              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
3755              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
3756              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
3757                break;                break;
3758              eptr+= len;              eptr+= len;
# Line 3728  for (;;) Line 3765  for (;;)
3765              int len = 1;              int len = 1;
3766              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject) break;
3767              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
3768              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
3769              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
3770                break;                break;
3771              eptr+= len;              eptr+= len;
# Line 3741  for (;;) Line 3778  for (;;)
3778              int len = 1;              int len = 1;
3779              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject) break;
3780              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
3781              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_script = UCD_SCRIPT(c);
3782              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
3783                break;                break;
3784              eptr+= len;              eptr+= len;
# Line 3770  for (;;) Line 3807  for (;;)
3807            {            {
3808            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject) break;
3809            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3810            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
3811            if (prop_category == ucp_M) break;            if (prop_category == ucp_M) break;
3812            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3813              {              {
# Line 3779  for (;;) Line 3816  for (;;)
3816                {                {
3817                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3818                }                }
3819              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
3820              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3821              eptr += len;              eptr += len;
3822              }              }
# Line 3801  for (;;) Line 3838  for (;;)
3838                BACKCHAR(eptr);                BACKCHAR(eptr);
3839                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3840                }                }
3841              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
3842              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3843              eptr--;              eptr--;
3844              }              }
# Line 3821  for (;;) Line 3858  for (;;)
3858            case OP_ANY:            case OP_ANY:
3859            if (max < INT_MAX)            if (max < INT_MAX)
3860              {              {
3861              if ((ims & PCRE_DOTALL) == 0)              for (i = min; i < max; i++)
               {  
               for (i = min; i < max; i++)  
                 {  
                 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;  
                 eptr++;  
                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
                 }  
               }  
             else  
3862                {                {
3863                for (i = min; i < max; i++)                if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3864                  {                eptr++;
3865                  if (eptr >= md->end_subject) break;                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                 eptr++;  
                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
                 }  
3866                }                }
3867              }              }
3868    
# Line 3845  for (;;) Line 3870  for (;;)
3870    
3871            else            else
3872              {              {
3873              if ((ims & PCRE_DOTALL) == 0)              for (i = min; i < max; i++)
3874                {                {
3875                for (i = min; i < max; i++)                if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3876                  {                eptr++;
3877                  if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                 eptr++;  
                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
                 }  
3878                }                }
3879              else              }
3880              break;
3881    
3882              case OP_ALLANY:
3883              if (max < INT_MAX)
3884                {
3885                for (i = min; i < max; i++)
3886                {                {
3887                eptr = md->end_subject;                if (eptr >= md->end_subject) break;
3888                  eptr++;
3889                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3890                }                }
3891              }              }
3892              else eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
3893            break;            break;
3894    
3895            /* The byte case is the same as non-UTF8 */            /* The byte case is the same as non-UTF8 */
# Line 3883  for (;;) Line 3914  for (;;)
3914                }                }
3915              else              else
3916                {                {
3917                if (c != 0x000a && c != 0x000b && c != 0x000c &&                if (c != 0x000a &&
3918                    c != 0x0085 && c != 0x2028 && c != 0x2029)                    (md->bsr_anycrlf ||
3919                       (c != 0x000b && c != 0x000c &&
3920                        c != 0x0085 && c != 0x2028 && c != 0x2029)))
3921                  break;                  break;
3922                eptr += len;                eptr += len;
3923                }                }
# Line 4044  for (;;) Line 4077  for (;;)
4077          switch(ctype)          switch(ctype)
4078            {            {
4079            case OP_ANY:            case OP_ANY:
4080            if ((ims & PCRE_DOTALL) == 0)            for (i = min; i < max; i++)
4081              {              {
4082              for (i = min; i < max; i++)              if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
4083                {              eptr++;
               if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;  
               eptr++;  
               }  
             break;  
4084              }              }
4085            /* For DOTALL case, fall through and treat as \C */            break;
4086    
4087              case OP_ALLANY:
4088            case OP_ANYBYTE:            case OP_ANYBYTE:
4089            c = max - min;            c = max - min;
4090            if (c > (unsigned int)(md->end_subject - eptr))            if (c > (unsigned int)(md->end_subject - eptr))
# Line 4074  for (;;) Line 4104  for (;;)
4104                }                }
4105              else              else
4106                {                {
4107                if (c != 0x000a && c != 0x000b && c != 0x000c && c != 0x0085)                if (c != 0x000a &&
4108                      (md->bsr_anycrlf ||
4109                        (c != 0x000b && c != 0x000c && c != 0x0085)))
4110                  break;                  break;
4111                eptr++;                eptr++;
4112                }                }
# Line 4224  HEAP_RETURN: Line 4256  HEAP_RETURN:
4256  switch (frame->Xwhere)  switch (frame->Xwhere)
4257    {    {
4258    LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)    LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
4259    LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(16)    LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
4260    LBL(17) LBL(18) LBL(19) LBL(20) LBL(21) LBL(22) LBL(23) LBL(24)    LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
4261    LBL(25) LBL(26) LBL(27) LBL(28) LBL(29) LBL(30) LBL(31) LBL(32)    LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
4262    LBL(33) LBL(34) LBL(35) LBL(36) LBL(37) LBL(38) LBL(39) LBL(40)    LBL(53) LBL(54)
4263    LBL(41) LBL(42) LBL(43) LBL(44) LBL(45) LBL(46) LBL(47) LBL(48)  #ifdef SUPPORT_UTF8
4264    LBL(49) LBL(50) LBL(51) LBL(52) LBL(53) LBL(54)    LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)
4265      LBL(32) LBL(34) LBL(42) LBL(46)
4266    #ifdef SUPPORT_UCP
4267      LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
4268    #endif  /* SUPPORT_UCP */
4269    #endif  /* SUPPORT_UTF8 */
4270    default:    default:
4271    DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));    DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
4272    return PCRE_ERROR_INTERNAL;    return PCRE_ERROR_INTERNAL;
# Line 4408  if (re->magic_number != MAGIC_NUMBER) Line 4445  if (re->magic_number != MAGIC_NUMBER)
4445  /* Set up other data */  /* Set up other data */
4446    
4447  anchored = ((re->options | options) & PCRE_ANCHORED) != 0;  anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
4448  startline = (re->options & PCRE_STARTLINE) != 0;  startline = (re->flags & PCRE_STARTLINE) != 0;
4449  firstline = (re->options & PCRE_FIRSTLINE) != 0;  firstline = (re->options & PCRE_FIRSTLINE) != 0;
4450    
4451  /* The code starts after the real_pcre block and the capture name table. */  /* The code starts after the real_pcre block and the capture name table. */
# Line 4423  end_subject = md->end_subject; Line 4460  end_subject = md->end_subject;
4460    
4461  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
4462  utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;  utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
4463    md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
4464    
4465  md->notbol = (options & PCRE_NOTBOL) != 0;  md->notbol = (options & PCRE_NOTBOL) != 0;
4466  md->noteol = (options & PCRE_NOTEOL) != 0;  md->noteol = (options & PCRE_NOTEOL) != 0;
# Line 4435  md->recursive = NULL; Line 4473  md->recursive = NULL;
4473  md->lcc = tables + lcc_offset;  md->lcc = tables + lcc_offset;
4474  md->ctypes = tables + ctypes_offset;  md->ctypes = tables + ctypes_offset;
4475    
4476    /* Handle different \R options. */
4477    
4478    switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
4479      {
4480      case 0:
4481      if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
4482        md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
4483      else
4484    #ifdef BSR_ANYCRLF
4485      md->bsr_anycrlf = TRUE;
4486    #else
4487      md->bsr_anycrlf = FALSE;
4488    #endif
4489      break;
4490    
4491      case PCRE_BSR_ANYCRLF:
4492      md->bsr_anycrlf = TRUE;
4493      break;
4494    
4495      case PCRE_BSR_UNICODE:
4496      md->bsr_anycrlf = FALSE;
4497      break;
4498    
4499      default: return PCRE_ERROR_BADNEWLINE;
4500      }
4501    
4502  /* Handle different types of newline. The three bits give eight cases. If  /* Handle different types of newline. The three bits give eight cases. If
4503  nothing is set at run time, whatever was used at compile time applies. */  nothing is set at run time, whatever was used at compile time applies. */
4504    
4505  switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &  switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
4506         PCRE_NEWLINE_BITS)          (pcre_uint32)options) & PCRE_NEWLINE_BITS)
4507    {    {
4508    case 0: newline = NEWLINE; break;   /* Compile-time default */    case 0: newline = NEWLINE; break;   /* Compile-time default */
4509    case PCRE_NEWLINE_CR: newline = '\r'; break;    case PCRE_NEWLINE_CR: newline = '\r'; break;
# Line 4478  else Line 4542  else
4542  /* Partial matching is supported only for a restricted set of regexes at the  /* Partial matching is supported only for a restricted set of regexes at the
4543  moment. */  moment. */
4544    
4545  if (md->partial && (re->options & PCRE_NOPARTIAL) != 0)  if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
4546    return PCRE_ERROR_BADPARTIAL;    return PCRE_ERROR_BADPARTIAL;
4547    
4548  /* Check a UTF-8 string if required. Unfortunately there's no way of passing  /* Check a UTF-8 string if required. Unfortunately there's no way of passing
# Line 4555  studied, there may be a bitmap of possib Line 4619  studied, there may be a bitmap of possib
4619    
4620  if (!anchored)  if (!anchored)
4621    {    {
4622    if ((re->options & PCRE_FIRSTSET) != 0)    if ((re->flags & PCRE_FIRSTSET) != 0)
4623      {      {
4624      first_byte = re->first_byte & 255;      first_byte = re->first_byte & 255;
4625      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
# Line 4570  if (!anchored) Line 4634  if (!anchored)
4634  /* For anchored or unanchored matches, there may be a "last known required  /* For anchored or unanchored matches, there may be a "last known required
4635  character" set. */  character" set. */
4636    
4637  if ((re->options & PCRE_REQCHSET) != 0)  if ((re->flags & PCRE_REQCHSET) != 0)
4638    {    {
4639    req_byte = re->req_byte & 255;    req_byte = re->req_byte & 255;
4640    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
# Line 4617  for(;;) Line 4681  for(;;)
4681      if (first_byte_caseless)      if (first_byte_caseless)
4682        while (start_match < end_subject &&        while (start_match < end_subject &&
4683               md->lcc[*start_match] != first_byte)               md->lcc[*start_match] != first_byte)
4684          start_match++;          { NEXTCHAR(start_match); }
4685      else      else
4686        while (start_match < end_subject && *start_match != first_byte)        while (start_match < end_subject && *start_match != first_byte)
4687          start_match++;          { NEXTCHAR(start_match); }
4688      }      }
4689    
4690    /* Or to just after a linebreak for a multiline match if possible */    /* Or to just after a linebreak for a multiline match if possible */
# Line 4630  for(;;) Line 4694  for(;;)
4694      if (start_match > md->start_subject + start_offset)      if (start_match > md->start_subject + start_offset)
4695        {        {
4696        while (start_match <= end_subject && !WAS_NEWLINE(start_match))        while (start_match <= end_subject && !WAS_NEWLINE(start_match))
4697          start_match++;          { NEXTCHAR(start_match); }
4698    
4699        /* If we have just passed a CR and the newline option is ANY or ANYCRLF,        /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
4700        and we are now at a LF, advance the match position by one more character.        and we are now at a LF, advance the match position by one more character.
# Line 4651  for(;;) Line 4715  for(;;)
4715      while (start_match < end_subject)      while (start_match < end_subject)
4716        {        {
4717        register unsigned int c = *start_match;        register unsigned int c = *start_match;
4718        if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++; else break;        if ((start_bits[c/8] & (1 << (c&7))) == 0)
4719            { NEXTCHAR(start_match); }
4720          else break;
4721        }        }
4722      }      }
4723    
# Line 4792  for(;;) Line 4858  for(;;)
4858    if (start_match[-1] == '\r' &&    if (start_match[-1] == '\r' &&
4859        start_match < end_subject &&        start_match < end_subject &&
4860        *start_match == '\n' &&        *start_match == '\n' &&
4861        (re->options & PCRE_HASCRORLF) == 0 &&        (re->flags & PCRE_HASCRORLF) == 0 &&
4862          (md->nltype == NLTYPE_ANY ||          (md->nltype == NLTYPE_ANY ||
4863           md->nltype == NLTYPE_ANYCRLF ||           md->nltype == NLTYPE_ANYCRLF ||
4864           md->nllen == 2))           md->nllen == 2))

Legend:
Removed from v.227  
changed lines
  Added in v.349

  ViewVC Help
Powered by ViewVC 1.1.5