663 |
{ |
{ |
664 |
minimize = possessive = FALSE; |
minimize = possessive = FALSE; |
665 |
op = *ecode; |
op = *ecode; |
666 |
|
|
667 |
switch(op) |
switch(op) |
668 |
{ |
{ |
669 |
case OP_FAIL: |
case OP_FAIL: |
934 |
set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of |
set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of |
935 |
the subject. In both cases, backtracking will then try other alternatives, |
the subject. In both cases, backtracking will then try other alternatives, |
936 |
if any. */ |
if any. */ |
937 |
|
|
938 |
if (eptr == mstart && |
if (eptr == mstart && |
939 |
(md->notempty || |
(md->notempty || |
940 |
(md->notempty_atstart && |
(md->notempty_atstart && |
941 |
mstart == md->start_subject + md->start_offset))) |
mstart == md->start_subject + md->start_offset))) |
942 |
RRETURN(MATCH_NOMATCH); |
RRETURN(MATCH_NOMATCH); |
943 |
|
|
944 |
/* Otherwise, we have a match. */ |
/* Otherwise, we have a match. */ |
945 |
|
|
946 |
md->end_match_ptr = eptr; /* Record where we ended */ |
md->end_match_ptr = eptr; /* Record where we ended */ |
1478 |
|
|
1479 |
/* Find out if the previous and current characters are "word" characters. |
/* Find out if the previous and current characters are "word" characters. |
1480 |
It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to |
It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to |
1481 |
be "non-word" characters. Remember the earliest consulted character for |
be "non-word" characters. Remember the earliest consulted character for |
1482 |
partial matching. */ |
partial matching. */ |
1483 |
|
|
1484 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
1488 |
{ |
{ |
1489 |
USPTR lastptr = eptr - 1; |
USPTR lastptr = eptr - 1; |
1490 |
while((*lastptr & 0xc0) == 0x80) lastptr--; |
while((*lastptr & 0xc0) == 0x80) lastptr--; |
1491 |
if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr; |
if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr; |
1492 |
GETCHAR(c, lastptr); |
GETCHAR(c, lastptr); |
1493 |
prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0; |
prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0; |
1494 |
} |
} |
1495 |
if (eptr >= md->end_subject) |
if (eptr >= md->end_subject) |
1496 |
{ |
{ |
1497 |
SCHECK_PARTIAL(); |
SCHECK_PARTIAL(); |
1498 |
cur_is_word = FALSE; |
cur_is_word = FALSE; |
1499 |
} |
} |
1500 |
else |
else |
1501 |
{ |
{ |
1511 |
{ |
{ |
1512 |
if (eptr == md->start_subject) prev_is_word = FALSE; else |
if (eptr == md->start_subject) prev_is_word = FALSE; else |
1513 |
{ |
{ |
1514 |
if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1; |
if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1; |
1515 |
prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0); |
prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0); |
1516 |
} |
} |
1517 |
if (eptr >= md->end_subject) |
if (eptr >= md->end_subject) |
1518 |
{ |
{ |
1519 |
SCHECK_PARTIAL(); |
SCHECK_PARTIAL(); |
1520 |
cur_is_word = FALSE; |
cur_is_word = FALSE; |
1521 |
} |
} |
1522 |
else cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0); |
else cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0); |
1523 |
} |
} |
1537 |
/* Fall through */ |
/* Fall through */ |
1538 |
|
|
1539 |
case OP_ALLANY: |
case OP_ALLANY: |
1540 |
if (eptr++ >= md->end_subject) |
if (eptr++ >= md->end_subject) |
1541 |
{ |
{ |
1542 |
SCHECK_PARTIAL(); |
SCHECK_PARTIAL(); |
1543 |
RRETURN(MATCH_NOMATCH); |
RRETURN(MATCH_NOMATCH); |
1544 |
} |
} |
1545 |
if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; |
if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; |
1546 |
ecode++; |
ecode++; |
1547 |
break; |
break; |
1550 |
any byte, even newline, independent of the setting of PCRE_DOTALL. */ |
any byte, even newline, independent of the setting of PCRE_DOTALL. */ |
1551 |
|
|
1552 |
case OP_ANYBYTE: |
case OP_ANYBYTE: |
1553 |
if (eptr++ >= md->end_subject) |
if (eptr++ >= md->end_subject) |
1554 |
{ |
{ |
1555 |
SCHECK_PARTIAL(); |
SCHECK_PARTIAL(); |
1556 |
RRETURN(MATCH_NOMATCH); |
RRETURN(MATCH_NOMATCH); |
1557 |
} |
} |
1558 |
ecode++; |
ecode++; |
1559 |
break; |
break; |
1560 |
|
|
1561 |
case OP_NOT_DIGIT: |
case OP_NOT_DIGIT: |
1562 |
if (eptr >= md->end_subject) |
if (eptr >= md->end_subject) |
1563 |
{ |
{ |
1564 |
SCHECK_PARTIAL(); |
SCHECK_PARTIAL(); |
1565 |
RRETURN(MATCH_NOMATCH); |
RRETURN(MATCH_NOMATCH); |
1566 |
} |
} |
1567 |
GETCHARINCTEST(c, eptr); |
GETCHARINCTEST(c, eptr); |
1568 |
if ( |
if ( |
1569 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
1576 |
break; |
break; |
1577 |
|
|
1578 |
case OP_DIGIT: |
case OP_DIGIT: |
1579 |
if (eptr >= md->end_subject) |
if (eptr >= md->end_subject) |
1580 |
{ |
{ |
1581 |
SCHECK_PARTIAL(); |
SCHECK_PARTIAL(); |
1582 |
RRETURN(MATCH_NOMATCH); |
RRETURN(MATCH_NOMATCH); |
1583 |
} |
} |
1584 |
GETCHARINCTEST(c, eptr); |
GETCHARINCTEST(c, eptr); |
1585 |
if ( |
if ( |
1586 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
1593 |
break; |
break; |
1594 |
|
|
1595 |
case OP_NOT_WHITESPACE: |
case OP_NOT_WHITESPACE: |
1596 |
if (eptr >= md->end_subject) |
if (eptr >= md->end_subject) |
1597 |
{ |
{ |
1598 |
SCHECK_PARTIAL(); |
SCHECK_PARTIAL(); |
1599 |
RRETURN(MATCH_NOMATCH); |
RRETURN(MATCH_NOMATCH); |
1600 |
} |
} |
1601 |
GETCHARINCTEST(c, eptr); |
GETCHARINCTEST(c, eptr); |
1602 |
if ( |
if ( |
1603 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
1610 |
break; |
break; |
1611 |
|
|
1612 |
case OP_WHITESPACE: |
case OP_WHITESPACE: |
1613 |
if (eptr >= md->end_subject) |
if (eptr >= md->end_subject) |
1614 |
{ |
{ |
1615 |
SCHECK_PARTIAL(); |
SCHECK_PARTIAL(); |
1616 |
RRETURN(MATCH_NOMATCH); |
RRETURN(MATCH_NOMATCH); |
1617 |
} |
} |
1618 |
GETCHARINCTEST(c, eptr); |
GETCHARINCTEST(c, eptr); |
1619 |
if ( |
if ( |
1620 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
1627 |
break; |
break; |
1628 |
|
|
1629 |
case OP_NOT_WORDCHAR: |
case OP_NOT_WORDCHAR: |
1630 |
if (eptr >= md->end_subject) |
if (eptr >= md->end_subject) |
1631 |
{ |
{ |
1632 |
SCHECK_PARTIAL(); |
SCHECK_PARTIAL(); |
1633 |
RRETURN(MATCH_NOMATCH); |
RRETURN(MATCH_NOMATCH); |
1634 |
} |
} |
1635 |
GETCHARINCTEST(c, eptr); |
GETCHARINCTEST(c, eptr); |
1636 |
if ( |
if ( |
1637 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
1644 |
break; |
break; |
1645 |
|
|
1646 |
case OP_WORDCHAR: |
case OP_WORDCHAR: |
1647 |
if (eptr >= md->end_subject) |
if (eptr >= md->end_subject) |
1648 |
{ |
{ |
1649 |
SCHECK_PARTIAL(); |
SCHECK_PARTIAL(); |
1650 |
RRETURN(MATCH_NOMATCH); |
RRETURN(MATCH_NOMATCH); |
1651 |
} |
} |
1652 |
GETCHARINCTEST(c, eptr); |
GETCHARINCTEST(c, eptr); |
1653 |
if ( |
if ( |
1654 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
1661 |
break; |
break; |
1662 |
|
|
1663 |
case OP_ANYNL: |
case OP_ANYNL: |
1664 |
if (eptr >= md->end_subject) |
if (eptr >= md->end_subject) |
1665 |
{ |
{ |
1666 |
SCHECK_PARTIAL(); |
SCHECK_PARTIAL(); |
1667 |
RRETURN(MATCH_NOMATCH); |
RRETURN(MATCH_NOMATCH); |
1668 |
} |
} |
1669 |
GETCHARINCTEST(c, eptr); |
GETCHARINCTEST(c, eptr); |
1670 |
switch(c) |
switch(c) |
1671 |
{ |
{ |
1689 |
break; |
break; |
1690 |
|
|
1691 |
case OP_NOT_HSPACE: |
case OP_NOT_HSPACE: |
1692 |
if (eptr >= md->end_subject) |
if (eptr >= md->end_subject) |
1693 |
{ |
{ |
1694 |
SCHECK_PARTIAL(); |
SCHECK_PARTIAL(); |
1695 |
RRETURN(MATCH_NOMATCH); |
RRETURN(MATCH_NOMATCH); |
1696 |
} |
} |
1697 |
GETCHARINCTEST(c, eptr); |
GETCHARINCTEST(c, eptr); |
1698 |
switch(c) |
switch(c) |
1699 |
{ |
{ |
1723 |
break; |
break; |
1724 |
|
|
1725 |
case OP_HSPACE: |
case OP_HSPACE: |
1726 |
if (eptr >= md->end_subject) |
if (eptr >= md->end_subject) |
1727 |
{ |
{ |
1728 |
SCHECK_PARTIAL(); |
SCHECK_PARTIAL(); |
1729 |
RRETURN(MATCH_NOMATCH); |
RRETURN(MATCH_NOMATCH); |
1730 |
} |
} |
1731 |
GETCHARINCTEST(c, eptr); |
GETCHARINCTEST(c, eptr); |
1732 |
switch(c) |
switch(c) |
1733 |
{ |
{ |
1757 |
break; |
break; |
1758 |
|
|
1759 |
case OP_NOT_VSPACE: |
case OP_NOT_VSPACE: |
1760 |
if (eptr >= md->end_subject) |
if (eptr >= md->end_subject) |
1761 |
{ |
{ |
1762 |
SCHECK_PARTIAL(); |
SCHECK_PARTIAL(); |
1763 |
RRETURN(MATCH_NOMATCH); |
RRETURN(MATCH_NOMATCH); |
1764 |
} |
} |
1765 |
GETCHARINCTEST(c, eptr); |
GETCHARINCTEST(c, eptr); |
1766 |
switch(c) |
switch(c) |
1767 |
{ |
{ |
1779 |
break; |
break; |
1780 |
|
|
1781 |
case OP_VSPACE: |
case OP_VSPACE: |
1782 |
if (eptr >= md->end_subject) |
if (eptr >= md->end_subject) |
1783 |
{ |
{ |
1784 |
SCHECK_PARTIAL(); |
SCHECK_PARTIAL(); |
1785 |
RRETURN(MATCH_NOMATCH); |
RRETURN(MATCH_NOMATCH); |
1786 |
} |
} |
1787 |
GETCHARINCTEST(c, eptr); |
GETCHARINCTEST(c, eptr); |
1788 |
switch(c) |
switch(c) |
1789 |
{ |
{ |
1806 |
|
|
1807 |
case OP_PROP: |
case OP_PROP: |
1808 |
case OP_NOTPROP: |
case OP_NOTPROP: |
1809 |
if (eptr >= md->end_subject) |
if (eptr >= md->end_subject) |
1810 |
{ |
{ |
1811 |
SCHECK_PARTIAL(); |
SCHECK_PARTIAL(); |
1812 |
RRETURN(MATCH_NOMATCH); |
RRETURN(MATCH_NOMATCH); |
1813 |
} |
} |
1814 |
GETCHARINCTEST(c, eptr); |
GETCHARINCTEST(c, eptr); |
1815 |
{ |
{ |
1816 |
const ucd_record *prop = GET_UCD(c); |
const ucd_record *prop = GET_UCD(c); |
1855 |
is in the binary; otherwise a compile-time error occurs. */ |
is in the binary; otherwise a compile-time error occurs. */ |
1856 |
|
|
1857 |
case OP_EXTUNI: |
case OP_EXTUNI: |
1858 |
if (eptr >= md->end_subject) |
if (eptr >= md->end_subject) |
1859 |
{ |
{ |
1860 |
SCHECK_PARTIAL(); |
SCHECK_PARTIAL(); |
1861 |
RRETURN(MATCH_NOMATCH); |
RRETURN(MATCH_NOMATCH); |
1862 |
} |
} |
1863 |
GETCHARINCTEST(c, eptr); |
GETCHARINCTEST(c, eptr); |
1864 |
{ |
{ |
1865 |
int category = UCD_CATEGORY(c); |
int category = UCD_CATEGORY(c); |
1939 |
break; |
break; |
1940 |
|
|
1941 |
default: /* No repeat follows */ |
default: /* No repeat follows */ |
1942 |
if (!match_ref(offset, eptr, length, md, ims)) |
if (!match_ref(offset, eptr, length, md, ims)) |
1943 |
{ |
{ |
1944 |
CHECK_PARTIAL(); |
CHECK_PARTIAL(); |
1945 |
RRETURN(MATCH_NOMATCH); |
RRETURN(MATCH_NOMATCH); |
1946 |
} |
} |
1947 |
eptr += length; |
eptr += length; |
1948 |
continue; /* With the main loop */ |
continue; /* With the main loop */ |
1949 |
} |
} |
1950 |
|
|
1951 |
/* If the length of the reference is zero, just continue with the |
/* If the length of the reference is zero, just continue with the |
1952 |
main loop. */ |
main loop. */ |
1953 |
|
|
1954 |
if (length == 0) continue; |
if (length == 0) continue; |
1955 |
|
|
1956 |
/* First, ensure the minimum number of matches are present. We get back |
/* First, ensure the minimum number of matches are present. We get back |
2326 |
length = 1; |
length = 1; |
2327 |
ecode++; |
ecode++; |
2328 |
GETCHARLEN(fc, ecode, length); |
GETCHARLEN(fc, ecode, length); |
2329 |
if (length > md->end_subject - eptr) |
if (length > md->end_subject - eptr) |
2330 |
{ |
{ |
2331 |
CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */ |
CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */ |
2332 |
RRETURN(MATCH_NOMATCH); |
RRETURN(MATCH_NOMATCH); |
2333 |
} |
} |
2334 |
while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH); |
while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH); |
2335 |
} |
} |
2336 |
else |
else |
2338 |
|
|
2339 |
/* Non-UTF-8 mode */ |
/* Non-UTF-8 mode */ |
2340 |
{ |
{ |
2341 |
if (md->end_subject - eptr < 1) |
if (md->end_subject - eptr < 1) |
2342 |
{ |
{ |
2343 |
SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */ |
SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */ |
2344 |
RRETURN(MATCH_NOMATCH); |
RRETURN(MATCH_NOMATCH); |
2345 |
} |
} |
2346 |
if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH); |
if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH); |
2347 |
ecode += 2; |
ecode += 2; |
2348 |
} |
} |
2358 |
ecode++; |
ecode++; |
2359 |
GETCHARLEN(fc, ecode, length); |
GETCHARLEN(fc, ecode, length); |
2360 |
|
|
2361 |
if (length > md->end_subject - eptr) |
if (length > md->end_subject - eptr) |
2362 |
{ |
{ |
2363 |
CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */ |
CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */ |
2364 |
RRETURN(MATCH_NOMATCH); |
RRETURN(MATCH_NOMATCH); |
2365 |
} |
} |
2366 |
|
|
2367 |
/* If the pattern character's value is < 128, we have only one byte, and |
/* If the pattern character's value is < 128, we have only one byte, and |
2368 |
can use the fast lookup table. */ |
can use the fast lookup table. */ |
2397 |
|
|
2398 |
/* Non-UTF-8 mode */ |
/* Non-UTF-8 mode */ |
2399 |
{ |
{ |
2400 |
if (md->end_subject - eptr < 1) |
if (md->end_subject - eptr < 1) |
2401 |
{ |
{ |
2402 |
SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */ |
SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */ |
2403 |
RRETURN(MATCH_NOMATCH); |
RRETURN(MATCH_NOMATCH); |
2404 |
} |
} |
2405 |
if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH); |
if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH); |
2406 |
ecode += 2; |
ecode += 2; |
2407 |
} |
} |
2455 |
case OP_MINQUERY: |
case OP_MINQUERY: |
2456 |
c = *ecode++ - OP_STAR; |
c = *ecode++ - OP_STAR; |
2457 |
minimize = (c & 1) != 0; |
minimize = (c & 1) != 0; |
2458 |
|
|
2459 |
min = rep_min[c]; /* Pick up values from tables; */ |
min = rep_min[c]; /* Pick up values from tables; */ |
2460 |
max = rep_max[c]; /* zero for max => infinity */ |
max = rep_max[c]; /* zero for max => infinity */ |
2461 |
if (max == 0) max = INT_MAX; |
if (max == 0) max = INT_MAX; |
2568 |
/* When not in UTF-8 mode, load a single-byte character. */ |
/* When not in UTF-8 mode, load a single-byte character. */ |
2569 |
|
|
2570 |
fc = *ecode++; |
fc = *ecode++; |
2571 |
|
|
2572 |
/* The value of fc at this point is always less than 256, though we may or |
/* The value of fc at this point is always less than 256, though we may or |
2573 |
may not be in UTF-8 mode. The code is duplicated for the caseless and |
may not be in UTF-8 mode. The code is duplicated for the caseless and |
2574 |
caseful cases, for speed, since matching characters is likely to be quite |
caseful cases, for speed, since matching characters is likely to be quite |
2645 |
} |
} |
2646 |
if (fc != *eptr++) RRETURN(MATCH_NOMATCH); |
if (fc != *eptr++) RRETURN(MATCH_NOMATCH); |
2647 |
} |
} |
2648 |
|
|
2649 |
if (min == max) continue; |
if (min == max) continue; |
2650 |
|
|
2651 |
if (minimize) |
if (minimize) |
2652 |
{ |
{ |
2653 |
for (fi = min;; fi++) |
for (fi = min;; fi++) |
2673 |
eptr++; |
eptr++; |
2674 |
} |
} |
2675 |
if (possessive) continue; |
if (possessive) continue; |
2676 |
|
|
2677 |
while (eptr >= pp) |
while (eptr >= pp) |
2678 |
{ |
{ |
2679 |
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27); |
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27); |
2689 |
checking can be multibyte. */ |
checking can be multibyte. */ |
2690 |
|
|
2691 |
case OP_NOT: |
case OP_NOT: |
2692 |
if (eptr >= md->end_subject) |
if (eptr >= md->end_subject) |
2693 |
{ |
{ |
2694 |
SCHECK_PARTIAL(); |
SCHECK_PARTIAL(); |
2695 |
RRETURN(MATCH_NOMATCH); |
RRETURN(MATCH_NOMATCH); |
2696 |
} |
} |
2697 |
ecode++; |
ecode++; |
2698 |
GETCHARINCTEST(c, eptr); |
GETCHARINCTEST(c, eptr); |
2699 |
if ((ims & PCRE_CASELESS) != 0) |
if ((ims & PCRE_CASELESS) != 0) |
3539 |
break; |
break; |
3540 |
|
|
3541 |
case OP_ALLANY: |
case OP_ALLANY: |
3542 |
if (eptr > md->end_subject - min) |
if (eptr > md->end_subject - min) |
3543 |
{ |
{ |
3544 |
SCHECK_PARTIAL(); |
SCHECK_PARTIAL(); |
3545 |
RRETURN(MATCH_NOMATCH); |
RRETURN(MATCH_NOMATCH); |
3546 |
} |
} |
3547 |
eptr += min; |
eptr += min; |
3548 |
break; |
break; |
3549 |
|
|
3550 |
case OP_ANYBYTE: |
case OP_ANYBYTE: |
3551 |
if (eptr > md->end_subject - min) |
if (eptr > md->end_subject - min) |
3552 |
{ |
{ |
3553 |
SCHECK_PARTIAL(); |
SCHECK_PARTIAL(); |
3554 |
RRETURN(MATCH_NOMATCH); |
RRETURN(MATCH_NOMATCH); |
3555 |
} |
} |
3556 |
eptr += min; |
eptr += min; |
3557 |
break; |
break; |
3558 |
|
|
5293 |
first starting point for which a partial match was found. */ |
first starting point for which a partial match was found. */ |
5294 |
|
|
5295 |
md->start_match_ptr = start_match; |
md->start_match_ptr = start_match; |
5296 |
md->start_used_ptr = start_match; |
md->start_used_ptr = start_match; |
5297 |
md->match_call_count = 0; |
md->match_call_count = 0; |
5298 |
rc = match(start_match, md->start_code, start_match, 2, md, ims, NULL, 0, 0); |
rc = match(start_match, md->start_code, start_match, 2, md, ims, NULL, 0, 0); |
5299 |
if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr; |
if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr; |