/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 511 by ph10, Mon Mar 29 09:25:38 2010 UTC revision 530 by ph10, Tue Jun 1 13:42:06 2010 UTC
# Line 255  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM Line 255  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM
255         RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,         RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
256         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
257         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
258         RM51,  RM52, RM53, RM54 };         RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
259           RM61,  RM62 };
260    
261  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
262  versions and production versions. Note that the "rw" argument of RMATCH isn't  versions and production versions. Note that the "rw" argument of RMATCH isn't
# Line 314  argument of match(), which never changes Line 315  argument of match(), which never changes
315    
316  #define RRETURN(ra)\  #define RRETURN(ra)\
317    {\    {\
318    heapframe *newframe = frame;\    heapframe *oldframe = frame;\
319    frame = newframe->Xprevframe;\    frame = oldframe->Xprevframe;\
320    (pcre_stack_free)(newframe);\    (pcre_stack_free)(oldframe);\
321    if (frame != NULL)\    if (frame != NULL)\
322      {\      {\
323      rrc = ra;\      rrc = ra;\
# Line 685  for (;;) Line 686  for (;;)
686      case OP_MARK:      case OP_MARK:
687      markptr = ecode + 2;      markptr = ecode + 2;
688      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
689        ims, eptrb, flags, RM51);        ims, eptrb, flags, RM55);
690    
691      /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an      /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
692      argument, and we must check whether that argument matches this MARK's      argument, and we must check whether that argument matches this MARK's
693      argument. It is passed back in md->start_match_ptr (an overloading of that      argument. It is passed back in md->start_match_ptr (an overloading of that
694      variable). If it does match, we reset that variable to the current subject      variable). If it does match, we reset that variable to the current subject
695      position and return MATCH_SKIP. Otherwise, pass back the return code      position and return MATCH_SKIP. Otherwise, pass back the return code
696      unaltered. */      unaltered. */
697    
698      if (rrc == MATCH_SKIP_ARG &&      if (rrc == MATCH_SKIP_ARG &&
699          strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0)          strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0)
700        {        {
701        md->start_match_ptr = eptr;        md->start_match_ptr = eptr;
702        RRETURN(MATCH_SKIP);        RRETURN(MATCH_SKIP);
703        }        }
704    
705      if (md->mark == NULL) md->mark = markptr;      if (md->mark == NULL) md->mark = markptr;
706      RRETURN(rrc);      RRETURN(rrc);
707    
708      case OP_FAIL:      case OP_FAIL:
# Line 721  for (;;) Line 722  for (;;)
722    
723      case OP_PRUNE_ARG:      case OP_PRUNE_ARG:
724      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
725        ims, eptrb, flags, RM51);        ims, eptrb, flags, RM56);
726      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
727      md->mark = ecode + 2;      md->mark = ecode + 2;
728      RRETURN(MATCH_PRUNE);      RRETURN(MATCH_PRUNE);
# Line 735  for (;;) Line 736  for (;;)
736    
737      case OP_SKIP_ARG:      case OP_SKIP_ARG:
738      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
739        ims, eptrb, flags, RM53);        ims, eptrb, flags, RM57);
740      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
741    
742      /* Pass back the current skip name by overloading md->start_match_ptr and      /* Pass back the current skip name by overloading md->start_match_ptr and
743      returning the special MATCH_SKIP_ARG return code. This will either be      returning the special MATCH_SKIP_ARG return code. This will either be
744      caught by a matching MARK, or get to the top, where it is treated the same      caught by a matching MARK, or get to the top, where it is treated the same
745      as PRUNE. */      as PRUNE. */
746    
747      md->start_match_ptr = ecode + 2;      md->start_match_ptr = ecode + 2;
748      RRETURN(MATCH_SKIP_ARG);      RRETURN(MATCH_SKIP_ARG);
749    
750      case OP_THEN:      case OP_THEN:
751      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
752        ims, eptrb, flags, RM54);        ims, eptrb, flags, RM54);
# Line 754  for (;;) Line 755  for (;;)
755    
756      case OP_THEN_ARG:      case OP_THEN_ARG:
757      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
758        ims, eptrb, flags, RM54);        ims, eptrb, flags, RM58);
759      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
760      md->mark = ecode + 2;      md->mark = ecode + 2;
761      RRETURN(MATCH_THEN);      RRETURN(MATCH_THEN);
# Line 793  for (;;) Line 794  for (;;)
794        save_capture_last = md->capture_last;        save_capture_last = md->capture_last;
795    
796        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
797        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;        md->offset_vector[md->offset_end - number] =
798            (int)(eptr - md->start_subject);
799    
800        flags = (op == OP_SCBRA)? match_cbegroup : 0;        flags = (op == OP_SCBRA)? match_cbegroup : 0;
801        do        do
# Line 852  for (;;) Line 854  for (;;)
854    
855          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
856            eptrb, flags, RM48);            eptrb, flags, RM48);
857          if (rrc == MATCH_NOMATCH) md->mark = markptr;          if (rrc == MATCH_NOMATCH) md->mark = markptr;
858          RRETURN(rrc);          RRETURN(rrc);
859          }          }
860    
861        /* For non-final alternatives, continue the loop for a NOMATCH result;        /* For non-final alternatives, continue the loop for a NOMATCH result;
# Line 888  for (;;) Line 890  for (;;)
890          cb.callout_number   = ecode[LINK_SIZE+2];          cb.callout_number   = ecode[LINK_SIZE+2];
891          cb.offset_vector    = md->offset_vector;          cb.offset_vector    = md->offset_vector;
892          cb.subject          = (PCRE_SPTR)md->start_subject;          cb.subject          = (PCRE_SPTR)md->start_subject;
893          cb.subject_length   = md->end_subject - md->start_subject;          cb.subject_length   = (int)(md->end_subject - md->start_subject);
894          cb.start_match      = mstart - md->start_subject;          cb.start_match      = (int)(mstart - md->start_subject);
895          cb.current_position = eptr - md->start_subject;          cb.current_position = (int)(eptr - md->start_subject);
896          cb.pattern_position = GET(ecode, LINK_SIZE + 3);          cb.pattern_position = GET(ecode, LINK_SIZE + 3);
897          cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);          cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
898          cb.capture_top      = offset_top/2;          cb.capture_top      = offset_top/2;
# Line 1116  for (;;) Line 1118  for (;;)
1118        {        {
1119        md->offset_vector[offset] =        md->offset_vector[offset] =
1120          md->offset_vector[md->offset_end - number];          md->offset_vector[md->offset_end - number];
1121        md->offset_vector[offset+1] = eptr - md->start_subject;        md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1122        if (offset_top <= offset) offset_top = offset + 2;        if (offset_top <= offset) offset_top = offset + 2;
1123        }        }
1124      ecode += 3;      ecode += 3;
# Line 1158  for (;;) Line 1160  for (;;)
1160      md->end_match_ptr = eptr;           /* Record where we ended */      md->end_match_ptr = eptr;           /* Record where we ended */
1161      md->end_offset_top = offset_top;    /* and how many extracts were taken */      md->end_offset_top = offset_top;    /* and how many extracts were taken */
1162      md->start_match_ptr = mstart;       /* and the start (\K can modify) */      md->start_match_ptr = mstart;       /* and the start (\K can modify) */
1163      MRRETURN(((op == OP_END)? MATCH_MATCH : MATCH_ACCEPT));  
1164        /* For some reason, the macros don't work properly if an expression is
1165        given as the argument to MRRETURN when the heap is in use. */
1166    
1167        rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1168        MRRETURN(rrc);
1169    
1170      /* Change option settings */      /* Change option settings */
1171    
# Line 1274  for (;;) Line 1281  for (;;)
1281        cb.callout_number   = ecode[1];        cb.callout_number   = ecode[1];
1282        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
1283        cb.subject          = (PCRE_SPTR)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
1284        cb.subject_length   = md->end_subject - md->start_subject;        cb.subject_length   = (int)(md->end_subject - md->start_subject);
1285        cb.start_match      = mstart - md->start_subject;        cb.start_match      = (int)(mstart - md->start_subject);
1286        cb.current_position = eptr - md->start_subject;        cb.current_position = (int)(eptr - md->start_subject);
1287        cb.pattern_position = GET(ecode, 2);        cb.pattern_position = GET(ecode, 2);
1288        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1289        cb.capture_top      = offset_top/2;        cb.capture_top      = offset_top/2;
# Line 1553  for (;;) Line 1560  for (;;)
1560          {          {
1561          md->offset_vector[offset] =          md->offset_vector[offset] =
1562            md->offset_vector[md->offset_end - number];            md->offset_vector[md->offset_end - number];
1563          md->offset_vector[offset+1] = eptr - md->start_subject;          md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1564          if (offset_top <= offset) offset_top = offset + 2;          if (offset_top <= offset) offset_top = offset + 2;
1565          }          }
1566    
# Line 1713  for (;;) Line 1720  for (;;)
1720  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1721        if (utf8)        if (utf8)
1722          {          {
1723            /* Get status of previous character */
1724    
1725          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else
1726            {            {
1727            USPTR lastptr = eptr - 1;            USPTR lastptr = eptr - 1;
1728            while((*lastptr & 0xc0) == 0x80) lastptr--;            while((*lastptr & 0xc0) == 0x80) lastptr--;
1729            if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;            if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1730            GETCHAR(c, lastptr);            GETCHAR(c, lastptr);
1731    #ifdef SUPPORT_UCP
1732              if (md->use_ucp)
1733                {
1734                if (c == '_') prev_is_word = TRUE; else
1735                  {
1736                  int cat = UCD_CATEGORY(c);
1737                  prev_is_word = (cat == ucp_L || cat == ucp_N);
1738                  }
1739                }
1740              else
1741    #endif
1742            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1743            }            }
1744    
1745            /* Get status of next character */
1746    
1747          if (eptr >= md->end_subject)          if (eptr >= md->end_subject)
1748            {            {
1749            SCHECK_PARTIAL();            SCHECK_PARTIAL();
# Line 1729  for (;;) Line 1752  for (;;)
1752          else          else
1753            {            {
1754            GETCHAR(c, eptr);            GETCHAR(c, eptr);
1755    #ifdef SUPPORT_UCP
1756              if (md->use_ucp)
1757                {
1758                if (c == '_') cur_is_word = TRUE; else
1759                  {
1760                  int cat = UCD_CATEGORY(c);
1761                  cur_is_word = (cat == ucp_L || cat == ucp_N);
1762                  }
1763                }
1764              else
1765    #endif
1766            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1767            }            }
1768          }          }
1769        else        else
1770  #endif  #endif
1771    
1772        /* Not in UTF-8 mode */        /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
1773          consistency with the behaviour of \w we do use it in this case. */
1774    
1775          {          {
1776            /* Get status of previous character */
1777    
1778          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else
1779            {            {
1780            if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;            if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
1781    #ifdef SUPPORT_UCP
1782              if (md->use_ucp)
1783                {
1784                c = eptr[-1];
1785                if (c == '_') prev_is_word = TRUE; else
1786                  {
1787                  int cat = UCD_CATEGORY(c);
1788                  prev_is_word = (cat == ucp_L || cat == ucp_N);
1789                  }
1790                }
1791              else
1792    #endif
1793            prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);            prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1794            }            }
1795    
1796            /* Get status of next character */
1797    
1798          if (eptr >= md->end_subject)          if (eptr >= md->end_subject)
1799            {            {
1800            SCHECK_PARTIAL();            SCHECK_PARTIAL();
1801            cur_is_word = FALSE;            cur_is_word = FALSE;
1802            }            }
1803          else cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);          else
1804    #ifdef SUPPORT_UCP
1805            if (md->use_ucp)
1806              {
1807              c = *eptr;
1808              if (c == '_') cur_is_word = TRUE; else
1809                {
1810                int cat = UCD_CATEGORY(c);
1811                cur_is_word = (cat == ucp_L || cat == ucp_N);
1812                }
1813              }
1814            else
1815    #endif
1816            cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1817          }          }
1818    
1819        /* Now see if the situation is what we want */        /* Now see if the situation is what we want */
# Line 2055  for (;;) Line 2120  for (;;)
2120               prop->chartype == ucp_Ll ||               prop->chartype == ucp_Ll ||
2121               prop->chartype == ucp_Lt) == (op == OP_NOTPROP))               prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2122            MRRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2123           break;          break;
2124    
2125          case PT_GC:          case PT_GC:
2126          if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))          if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
# Line 2072  for (;;) Line 2137  for (;;)
2137            MRRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2138          break;          break;
2139    
2140            /* These are specials */
2141    
2142            case PT_ALNUM:
2143            if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2144                 _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2145              MRRETURN(MATCH_NOMATCH);
2146            break;
2147    
2148            case PT_SPACE:    /* Perl space */
2149            if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2150                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2151                   == (op == OP_NOTPROP))
2152              MRRETURN(MATCH_NOMATCH);
2153            break;
2154    
2155            case PT_PXSPACE:  /* POSIX space */
2156            if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2157                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2158                 c == CHAR_FF || c == CHAR_CR)
2159                   == (op == OP_NOTPROP))
2160              MRRETURN(MATCH_NOMATCH);
2161            break;
2162    
2163            case PT_WORD:
2164            if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2165                 _pcre_ucp_gentype[prop->chartype] == ucp_N ||
2166                 c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2167              MRRETURN(MATCH_NOMATCH);
2168            break;
2169    
2170            /* This should never occur */
2171    
2172          default:          default:
2173          RRETURN(PCRE_ERROR_INTERNAL);          RRETURN(PCRE_ERROR_INTERNAL);
2174          }          }
# Line 2137  for (;;) Line 2234  for (;;)
2234        referenced subpattern. */        referenced subpattern. */
2235    
2236        if (offset >= offset_top || md->offset_vector[offset] < 0)        if (offset >= offset_top || md->offset_vector[offset] < 0)
2237          length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;          length = (md->jscript_compat)? 0 : (int)(md->end_subject - eptr + 1);
2238        else        else
2239          length = md->offset_vector[offset+1] - md->offset_vector[offset];          length = md->offset_vector[offset+1] - md->offset_vector[offset];
2240    
# Line 3488  for (;;) Line 3585  for (;;)
3585              }              }
3586            break;            break;
3587    
3588              case PT_ALNUM:
3589              for (i = 1; i <= min; i++)
3590                {
3591                if (eptr >= md->end_subject)
3592                  {
3593                  SCHECK_PARTIAL();
3594                  MRRETURN(MATCH_NOMATCH);
3595                  }
3596                GETCHARINCTEST(c, eptr);
3597                prop_category = UCD_CATEGORY(c);
3598                if ((prop_category == ucp_L || prop_category == ucp_N)
3599                       == prop_fail_result)
3600                  MRRETURN(MATCH_NOMATCH);
3601                }
3602              break;
3603    
3604              case PT_SPACE:    /* Perl space */
3605              for (i = 1; i <= min; i++)
3606                {
3607                if (eptr >= md->end_subject)
3608                  {
3609                  SCHECK_PARTIAL();
3610                  MRRETURN(MATCH_NOMATCH);
3611                  }
3612                GETCHARINCTEST(c, eptr);
3613                prop_category = UCD_CATEGORY(c);
3614                if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3615                     c == CHAR_FF || c == CHAR_CR)
3616                       == prop_fail_result)
3617                  MRRETURN(MATCH_NOMATCH);
3618                }
3619              break;
3620    
3621              case PT_PXSPACE:  /* POSIX space */
3622              for (i = 1; i <= min; i++)
3623                {
3624                if (eptr >= md->end_subject)
3625                  {
3626                  SCHECK_PARTIAL();
3627                  MRRETURN(MATCH_NOMATCH);
3628                  }
3629                GETCHARINCTEST(c, eptr);
3630                prop_category = UCD_CATEGORY(c);
3631                if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3632                     c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
3633                       == prop_fail_result)
3634                  MRRETURN(MATCH_NOMATCH);
3635                }
3636              break;
3637    
3638              case PT_WORD:
3639              for (i = 1; i <= min; i++)
3640                {
3641                if (eptr >= md->end_subject)
3642                  {
3643                  SCHECK_PARTIAL();
3644                  MRRETURN(MATCH_NOMATCH);
3645                  }
3646                GETCHARINCTEST(c, eptr);
3647                prop_category = UCD_CATEGORY(c);
3648                if ((prop_category == ucp_L || prop_category == ucp_N ||
3649                     c == CHAR_UNDERSCORE)
3650                       == prop_fail_result)
3651                  MRRETURN(MATCH_NOMATCH);
3652                }
3653              break;
3654    
3655              /* This should not occur */
3656    
3657            default:            default:
3658            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
3659            }            }
# Line 4048  for (;;) Line 4214  for (;;)
4214                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4215                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4216                }                }
4217              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
4218              if (prop_fail_result) MRRETURN(MATCH_NOMATCH);              if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
4219              }              }
4220            /* Control never gets here */            /* Control never gets here */
# Line 4064  for (;;) Line 4230  for (;;)
4230                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4231                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4232                }                }
4233              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
4234              prop_chartype = UCD_CHARTYPE(c);              prop_chartype = UCD_CHARTYPE(c);
4235              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
4236                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
# Line 4084  for (;;) Line 4250  for (;;)
4250                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4251                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4252                }                }
4253              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
4254              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
4255              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
4256                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
# Line 4102  for (;;) Line 4268  for (;;)
4268                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4269                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4270                }                }
4271              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
4272              prop_chartype = UCD_CHARTYPE(c);              prop_chartype = UCD_CHARTYPE(c);
4273              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
4274                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
# Line 4120  for (;;) Line 4286  for (;;)
4286                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4287                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4288                }                }
4289              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
4290              prop_script = UCD_SCRIPT(c);              prop_script = UCD_SCRIPT(c);
4291              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
4292                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4293              }              }
4294            /* Control never gets here */            /* Control never gets here */
4295    
4296              case PT_ALNUM:
4297              for (fi = min;; fi++)
4298                {
4299                RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM59);
4300                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4301                if (fi >= max) MRRETURN(MATCH_NOMATCH);
4302                if (eptr >= md->end_subject)
4303                  {
4304                  SCHECK_PARTIAL();
4305                  MRRETURN(MATCH_NOMATCH);
4306                  }
4307                GETCHARINCTEST(c, eptr);
4308                prop_category = UCD_CATEGORY(c);
4309                if ((prop_category == ucp_L || prop_category == ucp_N)
4310                       == prop_fail_result)
4311                  MRRETURN(MATCH_NOMATCH);
4312                }
4313              /* Control never gets here */
4314    
4315              case PT_SPACE:    /* Perl space */
4316              for (fi = min;; fi++)
4317                {
4318                RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM60);
4319                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4320                if (fi >= max) MRRETURN(MATCH_NOMATCH);
4321                if (eptr >= md->end_subject)
4322                  {
4323                  SCHECK_PARTIAL();
4324                  MRRETURN(MATCH_NOMATCH);
4325                  }
4326                GETCHARINCTEST(c, eptr);
4327                prop_category = UCD_CATEGORY(c);
4328                if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4329                     c == CHAR_FF || c == CHAR_CR)
4330                       == prop_fail_result)
4331                  MRRETURN(MATCH_NOMATCH);
4332                }
4333              /* Control never gets here */
4334    
4335              case PT_PXSPACE:  /* POSIX space */
4336              for (fi = min;; fi++)
4337                {
4338                RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM61);
4339                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4340                if (fi >= max) MRRETURN(MATCH_NOMATCH);
4341                if (eptr >= md->end_subject)
4342                  {
4343                  SCHECK_PARTIAL();
4344                  MRRETURN(MATCH_NOMATCH);
4345                  }
4346                GETCHARINCTEST(c, eptr);
4347                prop_category = UCD_CATEGORY(c);
4348                if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4349                     c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
4350                       == prop_fail_result)
4351                  MRRETURN(MATCH_NOMATCH);
4352                }
4353              /* Control never gets here */
4354    
4355              case PT_WORD:
4356              for (fi = min;; fi++)
4357                {
4358                RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM62);
4359                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4360                if (fi >= max) MRRETURN(MATCH_NOMATCH);
4361                if (eptr >= md->end_subject)
4362                  {
4363                  SCHECK_PARTIAL();
4364                  MRRETURN(MATCH_NOMATCH);
4365                  }
4366                GETCHARINCTEST(c, eptr);
4367                prop_category = UCD_CATEGORY(c);
4368                if ((prop_category == ucp_L ||
4369                     prop_category == ucp_N ||
4370                     c == CHAR_UNDERSCORE)
4371                       == prop_fail_result)
4372                  MRRETURN(MATCH_NOMATCH);
4373                }
4374              /* Control never gets here */
4375    
4376              /* This should never occur */
4377    
4378            default:            default:
4379            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
4380            }            }
# Line 4473  for (;;) Line 4721  for (;;)
4721                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4722                break;                break;
4723                }                }
4724              GETCHARLEN(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
4725              if (prop_fail_result) break;              if (prop_fail_result) break;
4726              eptr+= len;              eptr+= len;
4727              }              }
# Line 4488  for (;;) Line 4736  for (;;)
4736                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4737                break;                break;
4738                }                }
4739              GETCHARLEN(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
4740              prop_chartype = UCD_CHARTYPE(c);              prop_chartype = UCD_CHARTYPE(c);
4741              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
4742                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
# Line 4507  for (;;) Line 4755  for (;;)
4755                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4756                break;                break;
4757                }                }
4758              GETCHARLEN(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
4759              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
4760              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
4761                break;                break;
# Line 4524  for (;;) Line 4772  for (;;)
4772                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4773                break;                break;
4774                }                }
4775              GETCHARLEN(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
4776              prop_chartype = UCD_CHARTYPE(c);              prop_chartype = UCD_CHARTYPE(c);
4777              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
4778                break;                break;
# Line 4541  for (;;) Line 4789  for (;;)
4789                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4790                break;                break;
4791                }                }
4792              GETCHARLEN(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
4793              prop_script = UCD_SCRIPT(c);              prop_script = UCD_SCRIPT(c);
4794              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
4795                break;                break;
4796              eptr+= len;              eptr+= len;
4797              }              }
4798            break;            break;
4799    
4800              case PT_ALNUM:
4801              for (i = min; i < max; i++)
4802                {
4803                int len = 1;
4804                if (eptr >= md->end_subject)
4805                  {
4806                  SCHECK_PARTIAL();
4807                  break;
4808                  }
4809                GETCHARLENTEST(c, eptr, len);
4810                prop_category = UCD_CATEGORY(c);
4811                if ((prop_category == ucp_L || prop_category == ucp_N)
4812                     == prop_fail_result)
4813                  break;
4814                eptr+= len;
4815                }
4816              break;
4817    
4818              case PT_SPACE:    /* Perl space */
4819              for (i = min; i < max; i++)
4820                {
4821                int len = 1;
4822                if (eptr >= md->end_subject)
4823                  {
4824                  SCHECK_PARTIAL();
4825                  break;
4826                  }
4827                GETCHARLENTEST(c, eptr, len);
4828                prop_category = UCD_CATEGORY(c);
4829                if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4830                     c == CHAR_FF || c == CHAR_CR)
4831                     == prop_fail_result)
4832                  break;
4833                eptr+= len;
4834                }
4835              break;
4836    
4837              case PT_PXSPACE:  /* POSIX space */
4838              for (i = min; i < max; i++)
4839                {
4840                int len = 1;
4841                if (eptr >= md->end_subject)
4842                  {
4843                  SCHECK_PARTIAL();
4844                  break;
4845                  }
4846                GETCHARLENTEST(c, eptr, len);
4847                prop_category = UCD_CATEGORY(c);
4848                if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4849                     c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
4850                     == prop_fail_result)
4851                  break;
4852                eptr+= len;
4853                }
4854              break;
4855    
4856              case PT_WORD:
4857              for (i = min; i < max; i++)
4858                {
4859                int len = 1;
4860                if (eptr >= md->end_subject)
4861                  {
4862                  SCHECK_PARTIAL();
4863                  break;
4864                  }
4865                GETCHARLENTEST(c, eptr, len);
4866                prop_category = UCD_CATEGORY(c);
4867                if ((prop_category == ucp_L || prop_category == ucp_N ||
4868                     c == CHAR_UNDERSCORE) == prop_fail_result)
4869                  break;
4870                eptr+= len;
4871                }
4872              break;
4873    
4874              default:
4875              RRETURN(PCRE_ERROR_INTERNAL);
4876            }            }
4877    
4878          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
# Line 5133  switch (frame->Xwhere) Line 5458  switch (frame->Xwhere)
5458    LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)    LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
5459    LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)    LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
5460    LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)    LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
5461    LBL(53) LBL(54)    LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58)
5462  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
5463    LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)    LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)
5464    LBL(32) LBL(34) LBL(42) LBL(46)    LBL(32) LBL(34) LBL(42) LBL(46)
5465  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
5466    LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)    LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
5467      LBL(59) LBL(60) LBL(61) LBL(62)
5468  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
5469  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 */
5470    default:    default:
# Line 5342  end_subject = md->end_subject; Line 5668  end_subject = md->end_subject;
5668    
5669  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
5670  utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;  utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
5671    md->use_ucp = (re->options & PCRE_UCP) != 0;
5672  md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;  md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
5673    
5674  md->notbol = (options & PCRE_NOTBOL) != 0;  md->notbol = (options & PCRE_NOTBOL) != 0;
# Line 5722  for(;;) Line 6049  for(;;)
6049    
6050    /* OK, we can now run the match. If "hitend" is set afterwards, remember the    /* OK, we can now run the match. If "hitend" is set afterwards, remember the
6051    first starting point for which a partial match was found. */    first starting point for which a partial match was found. */
6052    
6053    md->start_match_ptr = start_match;    md->start_match_ptr = start_match;
6054    md->start_used_ptr = start_match;    md->start_used_ptr = start_match;
6055    md->match_call_count = 0;    md->match_call_count = 0;
# Line 5732  for(;;) Line 6059  for(;;)
6059    
6060    switch(rc)    switch(rc)
6061      {      {
6062      /* NOMATCH and PRUNE advance by one character. If MATCH_SKIP_ARG reaches      /* NOMATCH and PRUNE advance by one character. If MATCH_SKIP_ARG reaches
6063      this level it means that a MARK that matched the SKIP's arg was not found.      this level it means that a MARK that matched the SKIP's arg was not found.
6064      We treat this as NOMATCH. THEN at this level acts exactly like PRUNE. */      We treat this as NOMATCH. THEN at this level acts exactly like PRUNE. */
6065    
6066      case MATCH_NOMATCH:      case MATCH_NOMATCH:
6067      case MATCH_PRUNE:      case MATCH_PRUNE:
6068      case MATCH_SKIP_ARG:      case MATCH_SKIP_ARG:
6069      case MATCH_THEN:      case MATCH_THEN:
6070      new_start_match = start_match + 1;      new_start_match = start_match + 1;
6071  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 5850  if (rc == MATCH_MATCH || rc == MATCH_ACC Line 6177  if (rc == MATCH_MATCH || rc == MATCH_ACC
6177    
6178    if (offsetcount < 2) rc = 0; else    if (offsetcount < 2) rc = 0; else
6179      {      {
6180      offsets[0] = md->start_match_ptr - md->start_subject;      offsets[0] = (int)(md->start_match_ptr - md->start_subject);
6181      offsets[1] = md->end_match_ptr - md->start_subject;      offsets[1] = (int)(md->end_match_ptr - md->start_subject);
6182      }      }
6183    
6184    DPRINTF((">>>> returning %d\n", rc));    DPRINTF((">>>> returning %d\n", rc));
# Line 5866  if (using_temporary_offsets) Line 6193  if (using_temporary_offsets)
6193    DPRINTF(("Freeing temporary memory\n"));    DPRINTF(("Freeing temporary memory\n"));
6194    (pcre_free)(md->offset_vector);    (pcre_free)(md->offset_vector);
6195    }    }
6196    
6197  /* For anything other than nomatch or partial match, just return the code. */  /* For anything other than nomatch or partial match, just return the code. */
6198    
6199  if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)  if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
6200    {    {
6201    DPRINTF((">>>> error: returning %d\n", rc));    DPRINTF((">>>> error: returning %d\n", rc));
6202    return rc;    return rc;
6203    }    }
6204    
6205  /* Handle partial matches - disable any mark data */  /* Handle partial matches - disable any mark data */
6206    
6207  if (start_partial != NULL)  if (start_partial != NULL)
6208    {    {
6209    DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));    DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
6210    md->mark = NULL;    md->mark = NULL;
6211    if (offsetcount > 1)    if (offsetcount > 1)
6212      {      {
6213      offsets[0] = start_partial - (USPTR)subject;      offsets[0] = (int)(start_partial - (USPTR)subject);
6214      offsets[1] = end_subject - (USPTR)subject;      offsets[1] = (int)(end_subject - (USPTR)subject);
6215      }      }
6216    rc = PCRE_ERROR_PARTIAL;    rc = PCRE_ERROR_PARTIAL;
6217    }    }
6218    
6219  /* This is the classic nomatch case */  /* This is the classic nomatch case */
6220    
6221  else  else
6222    {    {
6223    DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));    DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
6224    rc = PCRE_ERROR_NOMATCH;    rc = PCRE_ERROR_NOMATCH;
6225    }    }
6226    
6227  /* Return the MARK data if it has been requested. */  /* Return the MARK data if it has been requested. */
6228    
6229  RETURN_MARK:  RETURN_MARK:
6230    
6231  if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)  if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
6232    *(extra_data->mark) = (unsigned char *)(md->mark);    *(extra_data->mark) = (unsigned char *)(md->mark);
6233  return rc;  return rc;
6234  }  }
6235    
6236  /* End of pcre_exec.c */  /* End of pcre_exec.c */

Legend:
Removed from v.511  
changed lines
  Added in v.530

  ViewVC Help
Powered by ViewVC 1.1.5